ggml-org · ochafik · Feb 18, 2025 · Feb 15, 2025 · Feb 15, 2025 · Feb 15, 2025
diff --git a/Makefile b/Makefile
@@ -1364,7 +1364,7 @@ llama-server: \
 	examples/server/index.html.hpp \
 	examples/server/loading.html.hpp \
 	common/chat.cpp \
-	common/chat.hpp \
+	common/chat.h \
 	common/chat-template.hpp \
 	common/json.hpp \
 	common/minja.hpp \

@@ -57,7 +57,7 @@ add_library(${TARGET} STATIC
     arg.h
     base64.hpp
     chat.cpp
-    chat.hpp
+    chat.h
     common.cpp
     common.h
     console.cpp

@@ -2,7 +2,7 @@
 
 #include "log.h"
 #include "sampling.h"
-#include "chat.hpp"
+#include "chat.h"
 
 #include <algorithm>
 #include <climits>

diff --git a/common/chat.cpp b/common/chat.cpp
@@ -1,4 +1,4 @@
-#include "chat.hpp"
+#include "chat.h"
 #include "json-schema-to-grammar.h"
 #include "log.h"
 #include "minja/chat-template.hpp"
@@ -269,12 +269,12 @@ bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) {
             msg.role = "user";
             msg.content = "test";
 
-            auto * tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl);
+            auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl);
 
             common_chat_templates_inputs inputs;
             inputs.messages = {msg};
 
-            common_chat_templates_apply(tmpls, inputs);
+            common_chat_templates_apply(tmpls.get(), inputs);
             return true;
         } catch (const std::exception & e) {
             LOG_ERR("%s: failed to apply template: %s\n", __func__, e.what());
@@ -362,7 +362,7 @@ const char * common_chat_templates_source(const struct common_chat_templates * t
     return tmpls->template_default->source().c_str();
 }
 
-struct common_chat_templates * common_chat_templates_init(
+common_chat_templates_ptr common_chat_templates_init(
     const struct llama_model * model,
     const std::string & chat_template_override,
     const std::string & bos_token_override,
@@ -426,7 +426,7 @@ struct common_chat_templates * common_chat_templates_init(
             LOG_ERR("%s: failed to parse tool use chat template (ignoring it): %s\n", __func__, e.what());
         }
     }
-    return tmpls;
+    return {tmpls, common_chat_templates_free};
 }
 
 std::string common_chat_format_name(common_chat_format format) {

diff --git a/common/chat.hpp → common/chat.h b/common/chat.hpp → common/chat.h
@@ -85,17 +85,19 @@ struct common_chat_params {
 // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
 bool common_chat_verify_template(const std::string & tmpl, bool use_jinja);
 
-struct common_chat_templates * common_chat_templates_init(
+
+void common_chat_templates_free(struct common_chat_templates * tmpls);
+typedef std::unique_ptr<struct common_chat_templates, decltype(&common_chat_templates_free)> common_chat_templates_ptr;
+
+common_chat_templates_ptr common_chat_templates_init(
                                     const struct llama_model * model,
                                            const std::string & chat_template_override,
                                            const std::string & bos_token_override = "",
                                            const std::string & eos_token_override = "");
 
 bool         common_chat_templates_was_explicit(const struct common_chat_templates * tmpls);
 const char * common_chat_templates_source(const struct common_chat_templates * tmpls, const char * variant = nullptr);
-void         common_chat_templates_free(struct common_chat_templates * tmpls);
 
-typedef std::unique_ptr<struct common_chat_templates, decltype(&common_chat_templates_free)> common_chat_templates_ptr;
 
 struct common_chat_params      common_chat_templates_apply(
     const struct common_chat_templates * tmpls,

diff --git a/examples/main/main.cpp b/examples/main/main.cpp
@@ -4,7 +4,7 @@
 #include "log.h"
 #include "sampling.h"
 #include "llama.h"
-#include "chat.hpp"
+#include "chat.h"
 
 #include <cstdio>
 #include <cstring>
@@ -158,9 +158,7 @@ int main(int argc, char ** argv) {
     }
 
     const llama_vocab * vocab = llama_model_get_vocab(model);
-    common_chat_templates_ptr chat_templates(
-        common_chat_templates_init(model, params.chat_template),
-        &common_chat_templates_free);
+    auto chat_templates = common_chat_templates_init(model, params.chat_template);
 
     LOG_INF("%s: llama threadpool init, n_threads = %d\n", __func__, (int) params.cpuparams.n_threads);
 

diff --git a/examples/run/run.cpp b/examples/run/run.cpp
@@ -24,7 +24,7 @@
 #include <string>
 #include <vector>
 
-#include "chat.hpp"
+#include "chat.h"
 #include "common.h"
 #include "json.hpp"
 #include "linenoise.cpp/linenoise.h"
@@ -1057,9 +1057,7 @@ static int get_user_input(std::string & user_input, const std::string & user) {
 static int chat_loop(LlamaData & llama_data, const std::string & user, bool use_jinja) {
     int prev_len = 0;
     llama_data.fmtted.resize(llama_n_ctx(llama_data.context.get()));
-    common_chat_templates_ptr chat_templates(
-        common_chat_templates_init(llama_data.model.get(), ""),
-        &common_chat_templates_free);
+    auto chat_templates = common_chat_templates_init(llama_data.model.get(), "");
     static const bool stdout_a_terminal = is_stdout_a_terminal();
     while (true) {
         // Get user input

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -1804,7 +1804,9 @@ struct server_context {
     // Necessary similarity of prompt for slot selection
     float slot_prompt_similarity = 0.0f;
 
-    struct common_chat_templates * chat_templates = nullptr;
+    common_chat_templates_ptr chat_templates;
+
+    server_context() : chat_templates(nullptr, nullptr) {}
 
     ~server_context() {
         // Clear any sampling context
@@ -1822,7 +1824,6 @@ struct server_context {
         }
 
         llama_batch_free(batch);
-        common_chat_templates_free(chat_templates);
     }
 
     bool load_model(const common_params & params) {
@@ -1891,10 +1892,9 @@ struct server_context {
 
         chat_templates = common_chat_templates_init(model, params_base.chat_template);
         try {
-            common_chat_format_example(chat_templates, params.use_jinja);
+            common_chat_format_example(chat_templates.get(), params.use_jinja);
         } catch (const std::exception & e) {
             SRV_WRN("%s: The chat template that comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses\n", __func__);
-            common_chat_templates_free(chat_templates);
             chat_templates = common_chat_templates_init(model, "chatml");
         }
 
@@ -3793,13 +3793,13 @@ int main(int argc, char ** argv) {
             { "default_generation_settings", ctx_server.default_generation_settings_for_props },
             { "total_slots",                 ctx_server.params_base.n_parallel },
             { "model_path",                  ctx_server.params_base.model },
-            { "chat_template",               common_chat_templates_source(ctx_server.chat_templates) },
+            { "chat_template",               common_chat_templates_source(ctx_server.chat_templates.get()) },
             { "bos_token",                   common_token_to_piece(ctx_server.ctx, llama_vocab_bos(ctx_server.vocab), /* special= */ true)},
             { "eos_token",                   common_token_to_piece(ctx_server.ctx, llama_vocab_eos(ctx_server.vocab), /* special= */ true)},
             { "build_info",                  build_info },
         };
         if (ctx_server.params_base.use_jinja) {
-            if (auto tool_use_src = common_chat_templates_source(ctx_server.chat_templates, "tool_use")) {
+            if (auto tool_use_src = common_chat_templates_source(ctx_server.chat_templates.get(), "tool_use")) {
                 data["chat_template_tool_use"] = tool_use_src;
             }
         }
@@ -4036,7 +4036,7 @@ int main(int argc, char ** argv) {
         }
 
         auto body = json::parse(req.body);
-        json data = oaicompat_completion_params_parse(body, params.use_jinja, params.reasoning_format, ctx_server.chat_templates);
+        json data = oaicompat_completion_params_parse(body, params.use_jinja, params.reasoning_format, ctx_server.chat_templates.get());
 
         return handle_completions_impl(
             SERVER_TASK_TYPE_COMPLETION,
@@ -4049,7 +4049,7 @@ int main(int argc, char ** argv) {
     // same with handle_chat_completions, but without inference part
     const auto handle_apply_template = [&ctx_server, &params, &res_ok](const httplib::Request & req, httplib::Response & res) {
         auto body = json::parse(req.body);
-        json data = oaicompat_completion_params_parse(body, params.use_jinja, params.reasoning_format, ctx_server.chat_templates);
+        json data = oaicompat_completion_params_parse(body, params.use_jinja, params.reasoning_format, ctx_server.chat_templates.get());
         res_ok(res, {{ "prompt", std::move(data.at("prompt")) }});
     };
 
@@ -4455,8 +4455,8 @@ int main(int argc, char ** argv) {
 
     // print sample chat example to make it clear which template is used
     LOG_INF("%s: chat template, chat_template: %s, example_format: '%s'\n", __func__,
-        common_chat_templates_source(ctx_server.chat_templates),
-        common_chat_format_example(ctx_server.chat_templates, ctx_server.params_base.use_jinja).c_str());
+        common_chat_templates_source(ctx_server.chat_templates.get()),
+        common_chat_format_example(ctx_server.chat_templates.get(), ctx_server.params_base.use_jinja).c_str());
 
     ctx_server.queue_tasks.on_new_task([&ctx_server](const server_task & task) {
         ctx_server.process_single_task(task);

diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
@@ -12,7 +12,7 @@
 // Change JSON_ASSERT from assert() to GGML_ASSERT:
 #define JSON_ASSERT GGML_ASSERT
 #include "json.hpp"
-#include "chat.hpp"
+#include "chat.h"
 
 #include <random>
 #include <sstream>

@@ -8,7 +8,7 @@
 
 #include "llama.h"
 #include "common.h"
-#include "chat.hpp"
+#include "chat.h"
 
 static std::string normalize_newlines(const std::string & s) {
 #ifdef _WIN32
@@ -322,7 +322,7 @@ int main(void) {
         }
         printf("\n\n=== %s (jinja) ===\n\n", test_case.name.c_str());
         try {
-            common_chat_templates_ptr tmpls(common_chat_templates_init(/* model= */ nullptr, test_case.template_str.c_str(), test_case.bos_token, test_case.eos_token), &common_chat_templates_free);
+            auto tmpls = common_chat_templates_init(/* model= */ nullptr, test_case.template_str.c_str(), test_case.bos_token, test_case.eos_token);
             common_chat_templates_inputs inputs;
             inputs.use_jinja = true;
             inputs.messages = messages;
@@ -349,7 +349,7 @@ int main(void) {
     auto sys_msg = simple_msg("system", "You are a helpful assistant");
 
     auto fmt_sys = [&](std::string tmpl_str) {
-        common_chat_templates_ptr tmpls(common_chat_templates_init(/* model= */ nullptr, tmpl_str), &common_chat_templates_free);
+        auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl_str);
         auto output = common_chat_format_single(tmpls.get(), chat2, sys_msg, false, /* use_jinja= */ false);
         printf("fmt_sys(%s) : %s\n", tmpl_str.c_str(), output.c_str());
         printf("-------------------------\n");
@@ -376,7 +376,7 @@ int main(void) {
     auto new_msg = simple_msg("user", "How are you");
 
     auto fmt_single = [&](const std::string & tmpl_str) {
-        common_chat_templates_ptr tmpls(common_chat_templates_init(/* model= */ nullptr, tmpl_str.c_str()), &common_chat_templates_free);
+        auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl_str.c_str());
         auto output = common_chat_format_single(tmpls.get(), chat2, new_msg, true, /* use_jinja= */ false);
         printf("fmt_single(%s) : %s\n", tmpl_str.c_str(), output.c_str());
         printf("-------------------------\n");

@@ -10,7 +10,7 @@
 #include <json.hpp>
 #include <string>
 
-#include "chat.hpp"
+#include "chat.h"
 #include "llama-grammar.h"
 #include "unicode.h"
 
@@ -45,7 +45,7 @@ static std::string read_file(const std::string & path) {
 }
 
 static common_chat_templates_ptr read_templates(const std::string & path) {
-    return common_chat_templates_ptr(common_chat_templates_init(/* model= */ nullptr, read_file(path)), &common_chat_templates_free);
+    return common_chat_templates_ptr(common_chat_templates_init(/* model= */ nullptr, read_file(path)));
 }
 
 static std::unique_ptr<llama_grammar> build_grammar(const std::string & grammar_str) {