From d230722d8ed16d6a04360db026db8cbd7deceec0 Mon Sep 17 00:00:00 2001 From: Mason M Date: Wed, 15 Oct 2025 22:16:15 -0300 Subject: [PATCH 01/17] Add partial formatter --- tools/main/main.cpp | 118 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 107 insertions(+), 11 deletions(-) diff --git a/tools/main/main.cpp b/tools/main/main.cpp index 498e00e3a5e..b31527cbc9a 100644 --- a/tools/main/main.cpp +++ b/tools/main/main.cpp @@ -83,6 +83,102 @@ static void sigint_handler(int signo) { } #endif +class partial_formatter { +public: + enum output_type { + CONTENT, + REASONING, + }; + + struct output { + std::string formatted; + output_type type; + }; + + partial_formatter(const common_chat_syntax & syntax) : syntax_(syntax), had_reasoning_(false) {} + + std::vector operator()(const std::string & accumulated) { + common_chat_msg next = common_chat_parse(accumulated, true, syntax_); + + auto diffs = common_chat_msg_diff::compute_diffs(previous_, next); + std::vector result; + for (const auto & diff : diffs) { + if (!diff.reasoning_content_delta.empty()) { + result.push_back({diff.reasoning_content_delta, REASONING}); + had_reasoning_ = true; + } + if (!diff.content_delta.empty()) { + if (had_reasoning_) { + result.push_back({"\n", REASONING}); + had_reasoning_ = false; + } + result.push_back({diff.content_delta, CONTENT}); + } + } + previous_ = next; + return result; + } + +private: + common_chat_syntax syntax_; + common_chat_msg previous_; + bool had_reasoning_; +}; + +class chat_formatter { +public: + chat_formatter( + std::vector & chat_msgs, + const common_chat_templates_ptr & chat_templates, + const common_params & params) + : chat_msgs_(chat_msgs), + chat_templates_(chat_templates), + params_(params) {} + + std::string operator()(const std::string & role, const std::string & content) { + common_chat_msg new_msg; + new_msg.role = role; + new_msg.content = content; + chat_msgs_.push_back(new_msg); + + common_chat_templates_inputs cinputs; + cinputs.use_jinja = params_.use_jinja; + cinputs.messages = chat_msgs_; + cinputs.add_generation_prompt = (role == "user"); + cinputs.reasoning_format = params_.reasoning_format; + + cinputs.enable_thinking = + params_.use_jinja && params_.reasoning_budget != 0 && + common_chat_templates_support_enable_thinking(chat_templates_.get()); + + common_chat_params cparams = common_chat_templates_apply(chat_templates_.get(), cinputs); + + if (!partial_formatter_ptr_ && params_.reasoning_format != COMMON_REASONING_FORMAT_NONE) { + common_chat_syntax chat_syntax; + chat_syntax.format = cparams.format; + chat_syntax.reasoning_format = params_.reasoning_format; + chat_syntax.thinking_forced_open = cparams.thinking_forced_open; + chat_syntax.parse_tool_calls = false; + partial_formatter_ptr_ = std::make_unique(chat_syntax); + } + + std::string formatted = cparams.prompt.substr(formatted_cumulative_.size()); + formatted_cumulative_ = cparams.prompt; + + LOG_DBG("formatted: '%s'\n", formatted.c_str()); + return formatted; + } + + partial_formatter * get_partial_formatter() { return partial_formatter_ptr_.get(); } + +private: + std::vector & chat_msgs_; + const common_chat_templates_ptr & chat_templates_; + const common_params & params_; + std::unique_ptr partial_formatter_ptr_; + std::string formatted_cumulative_; +}; + int main(int argc, char ** argv) { common_params params; g_params = ¶ms; @@ -265,15 +361,7 @@ int main(int argc, char ** argv) { std::vector embd_inp; bool waiting_for_first_input = false; - auto chat_add_and_format = [&chat_msgs, &chat_templates](const std::string & role, const std::string & content) { - common_chat_msg new_msg; - new_msg.role = role; - new_msg.content = content; - auto formatted = common_chat_format_single(chat_templates.get(), chat_msgs, new_msg, role == "user", g_params->use_jinja); - chat_msgs.push_back(new_msg); - LOG_DBG("formatted: '%s'\n", formatted.c_str()); - return formatted; - }; + chat_formatter chat_add_and_format(chat_msgs, chat_templates, params); std::string prompt; { @@ -709,6 +797,13 @@ int main(int argc, char ** argv) { if (params.conversation_mode && !waiting_for_first_input && !llama_vocab_is_eog(vocab, id)) { assistant_ss << common_token_to_piece(ctx, id, false); + + if (auto * formatter = chat_add_and_format.get_partial_formatter()) { + auto outputs = (*formatter)(assistant_ss.str()); + for (const auto & out : outputs) { + LOG("%s", out.formatted.c_str()); + } + } } // echo this to console @@ -740,8 +835,9 @@ int main(int argc, char ** argv) { for (auto id : embd) { const std::string token_str = common_token_to_piece(ctx, id, params.special); - // Console/Stream Output - LOG("%s", token_str.c_str()); + if (!chat_add_and_format.get_partial_formatter() || assistant_ss.str().empty()) { + LOG("%s", token_str.c_str()); + } // Record Displayed Tokens To Log // Note: Generated tokens are created one by one hence this check From 3d941129a72c9014b213781daffecdb9fdc47979 Mon Sep 17 00:00:00 2001 From: Mason M Date: Thu, 16 Oct 2025 09:13:46 -0300 Subject: [PATCH 02/17] Remove extra call to common_chat_templates_apply --- tools/main/main.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tools/main/main.cpp b/tools/main/main.cpp index b31527cbc9a..684dd6dca74 100644 --- a/tools/main/main.cpp +++ b/tools/main/main.cpp @@ -170,6 +170,7 @@ class chat_formatter { } partial_formatter * get_partial_formatter() { return partial_formatter_ptr_.get(); } + const std::string & get_full_prompt() const { return formatted_cumulative_; } private: std::vector & chat_msgs_; @@ -379,13 +380,9 @@ int main(int argc, char ** argv) { } if (!params.system_prompt.empty() || !params.prompt.empty()) { - common_chat_templates_inputs inputs; - inputs.use_jinja = g_params->use_jinja; - inputs.messages = chat_msgs; - inputs.add_generation_prompt = !params.prompt.empty(); - - prompt = common_chat_templates_apply(chat_templates.get(), inputs).prompt; + prompt = chat_add_and_format.get_full_prompt(); } + } else { // otherwise use the prompt as is prompt = params.prompt; From a7771c1b429f5893e190d2d01f9ef737cfe8063f Mon Sep 17 00:00:00 2001 From: Mason M Date: Thu, 16 Oct 2025 10:40:59 -0300 Subject: [PATCH 03/17] Suppress template markup in system & prompt display --- tools/main/main.cpp | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/tools/main/main.cpp b/tools/main/main.cpp index 684dd6dca74..ec8bc4b7b34 100644 --- a/tools/main/main.cpp +++ b/tools/main/main.cpp @@ -365,6 +365,8 @@ int main(int argc, char ** argv) { chat_formatter chat_add_and_format(chat_msgs, chat_templates, params); std::string prompt; + std::string system_remaining; + std::string prompt_remaining; { if (params.conversation_mode && params.enable_chat_template) { if (!params.system_prompt.empty()) { @@ -400,6 +402,19 @@ int main(int argc, char ** argv) { LOG_DBG("tokens: %s\n", string_from(ctx, embd_inp).c_str()); } + // Set up content tracking to skip template markup during display + bool skip_template_markup = false; + if (params.conversation_mode && params.enable_chat_template) { + for (const auto & msg : chat_msgs) { + if (msg.role == "system") { + system_remaining = msg.content; + } else if (msg.role == "user") { + prompt_remaining = msg.content; + } + } + skip_template_markup = !system_remaining.empty() || !prompt_remaining.empty(); + } + // Should not run without any tokens if (!waiting_for_first_input && embd_inp.empty()) { if (add_bos) { @@ -833,7 +848,29 @@ int main(int argc, char ** argv) { const std::string token_str = common_token_to_piece(ctx, id, params.special); if (!chat_add_and_format.get_partial_formatter() || assistant_ss.str().empty()) { - LOG("%s", token_str.c_str()); + if (skip_template_markup) { + if (!token_str.empty() && !system_remaining.empty() && + system_remaining.compare(0, token_str.length(), token_str) == 0) { + + system_remaining.erase(0, token_str.length()); + LOG("%s", token_str.c_str()); + if (system_remaining.empty()) { + LOG("\n"); + } + + } else if (!token_str.empty() && !prompt_remaining.empty() && + prompt_remaining.compare(0, token_str.length(), token_str) == 0) { + + prompt_remaining.erase(0, token_str.length()); + LOG("%s", token_str.c_str()); + if (prompt_remaining.empty()) { + LOG("\n"); + } + } + + } else { + LOG("%s", token_str.c_str()); + } } // Record Displayed Tokens To Log @@ -853,6 +890,7 @@ int main(int argc, char ** argv) { if (input_echo && (int) embd_inp.size() == n_consumed) { console::set_display(console::reset); display = true; + skip_template_markup = false; // system & prompt processing complete } // if not currently processing queued inputs; From 8694fa3f8b1fe8a444df114c0aaec55da0192de8 Mon Sep 17 00:00:00 2001 From: Mason M Date: Fri, 17 Oct 2025 12:01:51 -0300 Subject: [PATCH 04/17] Track system/user prompt position --- tools/main/main.cpp | 82 +++++++++++++++++++++++++++++++-------------- 1 file changed, 57 insertions(+), 25 deletions(-) diff --git a/tools/main/main.cpp b/tools/main/main.cpp index ec8bc4b7b34..a539f1f111d 100644 --- a/tools/main/main.cpp +++ b/tools/main/main.cpp @@ -83,6 +83,33 @@ static void sigint_handler(int signo) { } #endif +class template_content_tracker { +public: + template_content_tracker() : pos_(0), start_(std::string::npos), end_(std::string::npos) {} + + void set_range(size_t start, size_t end) { + start_ = start; + end_ = end; + } + + bool should_display(size_t pos) const { + return start_ != std::string::npos && pos >= start_ && pos < end_; + } + + bool at_end(size_t pos) const { + return start_ != std::string::npos && pos >= end_; + } + + bool is_active() const { + return start_ != std::string::npos; + } + +private: + size_t pos_; + size_t start_; + size_t end_; +}; + class partial_formatter { public: enum output_type { @@ -365,8 +392,6 @@ int main(int argc, char ** argv) { chat_formatter chat_add_and_format(chat_msgs, chat_templates, params); std::string prompt; - std::string system_remaining; - std::string prompt_remaining; { if (params.conversation_mode && params.enable_chat_template) { if (!params.system_prompt.empty()) { @@ -403,16 +428,27 @@ int main(int argc, char ** argv) { } // Set up content tracking to skip template markup during display - bool skip_template_markup = false; + size_t prompt_pos = 0; + template_content_tracker system_tracker; + template_content_tracker prompt_tracker; + if (params.conversation_mode && params.enable_chat_template) { + size_t search_pos = 0; for (const auto & msg : chat_msgs) { if (msg.role == "system") { - system_remaining = msg.content; + size_t content_start = prompt.find(msg.content, search_pos); + if (content_start != std::string::npos) { + system_tracker.set_range(content_start, content_start + msg.content.length()); + search_pos = content_start + msg.content.length(); + } } else if (msg.role == "user") { - prompt_remaining = msg.content; + size_t content_start = prompt.find(msg.content, search_pos); + if (content_start != std::string::npos) { + prompt_tracker.set_range(content_start, content_start + msg.content.length()); + search_pos = content_start + msg.content.length(); + } } } - skip_template_markup = !system_remaining.empty() || !prompt_remaining.empty(); } // Should not run without any tokens @@ -848,29 +884,26 @@ int main(int argc, char ** argv) { const std::string token_str = common_token_to_piece(ctx, id, params.special); if (!chat_add_and_format.get_partial_formatter() || assistant_ss.str().empty()) { - if (skip_template_markup) { - if (!token_str.empty() && !system_remaining.empty() && - system_remaining.compare(0, token_str.length(), token_str) == 0) { - - system_remaining.erase(0, token_str.length()); - LOG("%s", token_str.c_str()); - if (system_remaining.empty()) { - LOG("\n"); - } - - } else if (!token_str.empty() && !prompt_remaining.empty() && - prompt_remaining.compare(0, token_str.length(), token_str) == 0) { + bool always_display = !system_tracker.is_active() && !prompt_tracker.is_active(); + if (always_display) { + LOG("%s", token_str.c_str()); - prompt_remaining.erase(0, token_str.length()); - LOG("%s", token_str.c_str()); - if (prompt_remaining.empty()) { - LOG("\n"); - } + } else if (system_tracker.should_display(prompt_pos)) { + LOG("%s", token_str.c_str()); + size_t next_pos = prompt_pos + token_str.length(); + if (system_tracker.at_end(next_pos)) { + LOG("\n"); } - } else { + } else if (prompt_tracker.should_display(prompt_pos)) { LOG("%s", token_str.c_str()); + size_t next_pos = prompt_pos + token_str.length(); + if (prompt_tracker.at_end(next_pos)) { + LOG("\n"); + } } + + prompt_pos += token_str.length(); } // Record Displayed Tokens To Log @@ -890,7 +923,6 @@ int main(int argc, char ** argv) { if (input_echo && (int) embd_inp.size() == n_consumed) { console::set_display(console::reset); display = true; - skip_template_markup = false; // system & prompt processing complete } // if not currently processing queued inputs; From e403844e1ab23e3227ddd8c3ed5b87d244e7e3aa Mon Sep 17 00:00:00 2001 From: Mason M Date: Sat, 18 Oct 2025 23:07:41 -0300 Subject: [PATCH 05/17] Remove complexity --- tools/main/main.cpp | 83 ++++++--------------------------------------- 1 file changed, 10 insertions(+), 73 deletions(-) diff --git a/tools/main/main.cpp b/tools/main/main.cpp index a539f1f111d..264f7654448 100644 --- a/tools/main/main.cpp +++ b/tools/main/main.cpp @@ -83,33 +83,6 @@ static void sigint_handler(int signo) { } #endif -class template_content_tracker { -public: - template_content_tracker() : pos_(0), start_(std::string::npos), end_(std::string::npos) {} - - void set_range(size_t start, size_t end) { - start_ = start; - end_ = end; - } - - bool should_display(size_t pos) const { - return start_ != std::string::npos && pos >= start_ && pos < end_; - } - - bool at_end(size_t pos) const { - return start_ != std::string::npos && pos >= end_; - } - - bool is_active() const { - return start_ != std::string::npos; - } - -private: - size_t pos_; - size_t start_; - size_t end_; -}; - class partial_formatter { public: enum output_type { @@ -175,7 +148,8 @@ class chat_formatter { cinputs.reasoning_format = params_.reasoning_format; cinputs.enable_thinking = - params_.use_jinja && params_.reasoning_budget != 0 && + params_.use_jinja && + params_.reasoning_budget != 0 && common_chat_templates_support_enable_thinking(chat_templates_.get()); common_chat_params cparams = common_chat_templates_apply(chat_templates_.get(), cinputs); @@ -427,30 +401,6 @@ int main(int argc, char ** argv) { LOG_DBG("tokens: %s\n", string_from(ctx, embd_inp).c_str()); } - // Set up content tracking to skip template markup during display - size_t prompt_pos = 0; - template_content_tracker system_tracker; - template_content_tracker prompt_tracker; - - if (params.conversation_mode && params.enable_chat_template) { - size_t search_pos = 0; - for (const auto & msg : chat_msgs) { - if (msg.role == "system") { - size_t content_start = prompt.find(msg.content, search_pos); - if (content_start != std::string::npos) { - system_tracker.set_range(content_start, content_start + msg.content.length()); - search_pos = content_start + msg.content.length(); - } - } else if (msg.role == "user") { - size_t content_start = prompt.find(msg.content, search_pos); - if (content_start != std::string::npos) { - prompt_tracker.set_range(content_start, content_start + msg.content.length()); - search_pos = content_start + msg.content.length(); - } - } - } - } - // Should not run without any tokens if (!waiting_for_first_input && embd_inp.empty()) { if (add_bos) { @@ -698,6 +648,12 @@ int main(int argc, char ** argv) { embd_inp.push_back(decoder_start_token_id); } + if (chat_add_and_format.get_partial_formatter()) { + for (const auto & msg : chat_msgs) { + LOG("%s\n", msg.content.c_str()); + } + } + while ((n_remain != 0 && !is_antiprompt) || params.interactive) { // predict if (!embd.empty()) { @@ -883,27 +839,8 @@ int main(int argc, char ** argv) { for (auto id : embd) { const std::string token_str = common_token_to_piece(ctx, id, params.special); - if (!chat_add_and_format.get_partial_formatter() || assistant_ss.str().empty()) { - bool always_display = !system_tracker.is_active() && !prompt_tracker.is_active(); - if (always_display) { - LOG("%s", token_str.c_str()); - - } else if (system_tracker.should_display(prompt_pos)) { - LOG("%s", token_str.c_str()); - size_t next_pos = prompt_pos + token_str.length(); - if (system_tracker.at_end(next_pos)) { - LOG("\n"); - } - - } else if (prompt_tracker.should_display(prompt_pos)) { - LOG("%s", token_str.c_str()); - size_t next_pos = prompt_pos + token_str.length(); - if (prompt_tracker.at_end(next_pos)) { - LOG("\n"); - } - } - - prompt_pos += token_str.length(); + if (!chat_add_and_format.get_partial_formatter()) { + LOG("%s", token_str.c_str()); } // Record Displayed Tokens To Log From c3768f4a45ed6c8f8c5f247282c964b3ad8d63b1 Mon Sep 17 00:00:00 2001 From: Mason M Date: Thu, 23 Oct 2025 12:14:53 -0300 Subject: [PATCH 06/17] Add guards against stripped reasoning --- tools/main/main.cpp | 43 +++++++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/tools/main/main.cpp b/tools/main/main.cpp index 264f7654448..733021ceb9b 100644 --- a/tools/main/main.cpp +++ b/tools/main/main.cpp @@ -136,14 +136,23 @@ class chat_formatter { params_(params) {} std::string operator()(const std::string & role, const std::string & content) { + if (role == "user") { + formatted_cumulative_.clear(); // Needed if template strips reasoning + } + common_chat_msg new_msg; + if (syntax_) { + new_msg = common_chat_parse(content, false, *syntax_); + } else { + new_msg.content = content; + } new_msg.role = role; - new_msg.content = content; + chat_msgs_.push_back(new_msg); common_chat_templates_inputs cinputs; + cinputs.messages.assign(chat_msgs_.cbegin(), chat_msgs_.cend()); cinputs.use_jinja = params_.use_jinja; - cinputs.messages = chat_msgs_; cinputs.add_generation_prompt = (role == "user"); cinputs.reasoning_format = params_.reasoning_format; @@ -154,16 +163,29 @@ class chat_formatter { common_chat_params cparams = common_chat_templates_apply(chat_templates_.get(), cinputs); - if (!partial_formatter_ptr_ && params_.reasoning_format != COMMON_REASONING_FORMAT_NONE) { - common_chat_syntax chat_syntax; - chat_syntax.format = cparams.format; - chat_syntax.reasoning_format = params_.reasoning_format; - chat_syntax.thinking_forced_open = cparams.thinking_forced_open; - chat_syntax.parse_tool_calls = false; - partial_formatter_ptr_ = std::make_unique(chat_syntax); + if (!syntax_) { + syntax_.reset(new common_chat_syntax); + syntax_->format = cparams.format; + syntax_->reasoning_format = params_.reasoning_format; + syntax_->thinking_forced_open = cparams.thinking_forced_open; + syntax_->parse_tool_calls = false; + } + + bool use_partial_formatter = params_.reasoning_format != COMMON_REASONING_FORMAT_NONE; + if (!partial_formatter_ptr_ && use_partial_formatter) { + partial_formatter_ptr_ = std::make_unique(*syntax_); + } + + std::string formatted; + if (formatted_cumulative_.size() > cparams.prompt.size()) { + LOG_WRN("template cumulative size was reduced from \"%zu\" to \"%zu\" " + "likely due to template's removal of message reasoning.\n", + formatted_cumulative_.size(), cparams.prompt.size()); + + } else { + formatted = cparams.prompt.substr(formatted_cumulative_.size()); } - std::string formatted = cparams.prompt.substr(formatted_cumulative_.size()); formatted_cumulative_ = cparams.prompt; LOG_DBG("formatted: '%s'\n", formatted.c_str()); @@ -177,6 +199,7 @@ class chat_formatter { std::vector & chat_msgs_; const common_chat_templates_ptr & chat_templates_; const common_params & params_; + std::unique_ptr syntax_; std::unique_ptr partial_formatter_ptr_; std::string formatted_cumulative_; }; From c381ea5e066ba1839452810c53367dfab8bd3637 Mon Sep 17 00:00:00 2001 From: Mason M Date: Thu, 23 Oct 2025 12:32:45 -0300 Subject: [PATCH 07/17] Remove trailing _ for member variables --- tools/main/main.cpp | 94 ++++++++++++++++++++++----------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/tools/main/main.cpp b/tools/main/main.cpp index 733021ceb9b..92378262b85 100644 --- a/tools/main/main.cpp +++ b/tools/main/main.cpp @@ -95,34 +95,34 @@ class partial_formatter { output_type type; }; - partial_formatter(const common_chat_syntax & syntax) : syntax_(syntax), had_reasoning_(false) {} + partial_formatter(const common_chat_syntax & syntax) : syntax(syntax), had_reasoning(false) {} std::vector operator()(const std::string & accumulated) { - common_chat_msg next = common_chat_parse(accumulated, true, syntax_); + common_chat_msg next = common_chat_parse(accumulated, true, syntax); - auto diffs = common_chat_msg_diff::compute_diffs(previous_, next); + auto diffs = common_chat_msg_diff::compute_diffs(previous, next); std::vector result; for (const auto & diff : diffs) { if (!diff.reasoning_content_delta.empty()) { result.push_back({diff.reasoning_content_delta, REASONING}); - had_reasoning_ = true; + had_reasoning = true; } if (!diff.content_delta.empty()) { - if (had_reasoning_) { + if (had_reasoning) { result.push_back({"\n", REASONING}); - had_reasoning_ = false; + had_reasoning = false; } result.push_back({diff.content_delta, CONTENT}); } } - previous_ = next; + previous = next; return result; } private: - common_chat_syntax syntax_; - common_chat_msg previous_; - bool had_reasoning_; + common_chat_syntax syntax; + common_chat_msg previous; + bool had_reasoning; }; class chat_formatter { @@ -131,77 +131,77 @@ class chat_formatter { std::vector & chat_msgs, const common_chat_templates_ptr & chat_templates, const common_params & params) - : chat_msgs_(chat_msgs), - chat_templates_(chat_templates), - params_(params) {} + : chat_msgs(chat_msgs), + chat_templates(chat_templates), + params(params) {} std::string operator()(const std::string & role, const std::string & content) { if (role == "user") { - formatted_cumulative_.clear(); // Needed if template strips reasoning + formatted_cumulative.clear(); // Needed if template strips reasoning } common_chat_msg new_msg; - if (syntax_) { - new_msg = common_chat_parse(content, false, *syntax_); + if (syntax) { + new_msg = common_chat_parse(content, false, *syntax); } else { new_msg.content = content; } new_msg.role = role; - chat_msgs_.push_back(new_msg); + chat_msgs.push_back(new_msg); common_chat_templates_inputs cinputs; - cinputs.messages.assign(chat_msgs_.cbegin(), chat_msgs_.cend()); - cinputs.use_jinja = params_.use_jinja; + cinputs.messages.assign(chat_msgs.cbegin(), chat_msgs.cend()); + cinputs.use_jinja = params.use_jinja; cinputs.add_generation_prompt = (role == "user"); - cinputs.reasoning_format = params_.reasoning_format; + cinputs.reasoning_format = params.reasoning_format; cinputs.enable_thinking = - params_.use_jinja && - params_.reasoning_budget != 0 && - common_chat_templates_support_enable_thinking(chat_templates_.get()); - - common_chat_params cparams = common_chat_templates_apply(chat_templates_.get(), cinputs); - - if (!syntax_) { - syntax_.reset(new common_chat_syntax); - syntax_->format = cparams.format; - syntax_->reasoning_format = params_.reasoning_format; - syntax_->thinking_forced_open = cparams.thinking_forced_open; - syntax_->parse_tool_calls = false; + params.use_jinja && + params.reasoning_budget != 0 && + common_chat_templates_support_enable_thinking(chat_templates.get()); + + common_chat_params cparams = common_chat_templates_apply(chat_templates.get(), cinputs); + + if (!syntax) { + syntax.reset(new common_chat_syntax); + syntax->format = cparams.format; + syntax->reasoning_format = params.reasoning_format; + syntax->thinking_forced_open = cparams.thinking_forced_open; + syntax->parse_tool_calls = false; } - bool use_partial_formatter = params_.reasoning_format != COMMON_REASONING_FORMAT_NONE; - if (!partial_formatter_ptr_ && use_partial_formatter) { - partial_formatter_ptr_ = std::make_unique(*syntax_); + bool use_partial_formatter = params.reasoning_format != COMMON_REASONING_FORMAT_NONE; + if (!partial_formatter_ptr && use_partial_formatter) { + partial_formatter_ptr = std::make_unique(*syntax); } std::string formatted; - if (formatted_cumulative_.size() > cparams.prompt.size()) { + if (formatted_cumulative.size() > cparams.prompt.size()) { LOG_WRN("template cumulative size was reduced from \"%zu\" to \"%zu\" " "likely due to template's removal of message reasoning.\n", - formatted_cumulative_.size(), cparams.prompt.size()); + formatted_cumulative.size(), cparams.prompt.size()); } else { - formatted = cparams.prompt.substr(formatted_cumulative_.size()); + formatted = cparams.prompt.substr(formatted_cumulative.size()); } - formatted_cumulative_ = cparams.prompt; + formatted_cumulative = cparams.prompt; LOG_DBG("formatted: '%s'\n", formatted.c_str()); return formatted; } - partial_formatter * get_partial_formatter() { return partial_formatter_ptr_.get(); } - const std::string & get_full_prompt() const { return formatted_cumulative_; } + partial_formatter * get_partial_formatter() { return partial_formatter_ptr.get(); } + const std::string & get_full_prompt() const { return formatted_cumulative; } private: - std::vector & chat_msgs_; - const common_chat_templates_ptr & chat_templates_; - const common_params & params_; - std::unique_ptr syntax_; - std::unique_ptr partial_formatter_ptr_; - std::string formatted_cumulative_; + std::vector & chat_msgs; + const common_chat_templates_ptr & chat_templates; + const common_params & params; + std::unique_ptr syntax; + std::unique_ptr partial_formatter_ptr; + std::string formatted_cumulative; }; int main(int argc, char ** argv) { From 3087ff725d8d9260a93f660b2d1ddd231489dd32 Mon Sep 17 00:00:00 2001 From: Mason M Date: Thu, 23 Oct 2025 13:27:48 -0300 Subject: [PATCH 08/17] WIP: colorizing the reasoning content --- common/console.cpp | 4 ++++ common/console.h | 3 ++- tools/main/main.cpp | 9 ++++++++- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/common/console.cpp b/common/console.cpp index 078a8d678d9..da2d0101296 100644 --- a/common/console.cpp +++ b/common/console.cpp @@ -155,6 +155,10 @@ namespace console { break; case error: fprintf(out, ANSI_BOLD ANSI_COLOR_RED); + break; + case reasoning: + fprintf(out, ANSI_COLOR_BLUE); + break; } current_display = display; fflush(out); diff --git a/common/console.h b/common/console.h index ec175269b9d..99a61ac329f 100644 --- a/common/console.h +++ b/common/console.h @@ -9,7 +9,8 @@ namespace console { reset = 0, prompt, user_input, - error + error, + reasoning }; void init(bool use_simple_io, bool use_advanced_display); diff --git a/tools/main/main.cpp b/tools/main/main.cpp index 92378262b85..c9418562061 100644 --- a/tools/main/main.cpp +++ b/tools/main/main.cpp @@ -828,8 +828,15 @@ int main(int argc, char ** argv) { if (auto * formatter = chat_add_and_format.get_partial_formatter()) { auto outputs = (*formatter)(assistant_ss.str()); for (const auto & out : outputs) { - LOG("%s", out.formatted.c_str()); + if (out.type == partial_formatter::REASONING) { + console::set_display(console::reasoning); + } else { + console::set_display(console::reset); + } + fprintf(stdout, "%s", out.formatted.c_str()); + fflush(stdout); } + console::set_display(console::reset); } } From 98b0d26b6dbafe6f2ebc2dc00f21e908dcbdfb66 Mon Sep 17 00:00:00 2001 From: Mason M Date: Thu, 23 Oct 2025 15:34:13 -0300 Subject: [PATCH 09/17] Add new console::write routine --- common/console.cpp | 4 ++++ common/console.h | 18 ++++++++++++++++++ tools/main/main.cpp | 43 +++++++++++++++++++++---------------------- 3 files changed, 43 insertions(+), 22 deletions(-) diff --git a/common/console.cpp b/common/console.cpp index da2d0101296..71c3b078422 100644 --- a/common/console.cpp +++ b/common/console.cpp @@ -165,6 +165,10 @@ namespace console { } } + FILE* get_output_handle() { + return out; + } + static char32_t getchar32() { #if defined(_WIN32) HANDLE hConsole = GetStdHandle(STD_INPUT_HANDLE); diff --git a/common/console.h b/common/console.h index 99a61ac329f..5afdc76e344 100644 --- a/common/console.h +++ b/common/console.h @@ -3,6 +3,7 @@ #pragma once #include +#include namespace console { enum display_t { @@ -17,4 +18,21 @@ namespace console { void cleanup(); void set_display(display_t display); bool readline(std::string & line, bool multiline_input); + + FILE* get_output_handle(); + + template + void write(const char* format, Args... args) { + FILE* out = get_output_handle(); + fprintf(out, format, args...); + fflush(out); + } + + inline void write(const char* str) { + write("%s", str); + } + + inline void write(const std::string & data) { + write(data.c_str()); + } } diff --git a/tools/main/main.cpp b/tools/main/main.cpp index c9418562061..bb1f858d112 100644 --- a/tools/main/main.cpp +++ b/tools/main/main.cpp @@ -44,10 +44,10 @@ static bool need_insert_eot = false; static void print_usage(int argc, char ** argv) { (void) argc; - LOG("\nexample usage:\n"); - LOG("\n text generation: %s -m your_model.gguf -p \"I believe the meaning of life is\" -n 128 -no-cnv\n", argv[0]); - LOG("\n chat (conversation): %s -m your_model.gguf -sys \"You are a helpful assistant\"\n", argv[0]); - LOG("\n"); + console::write("\nexample usage:\n"); + console::write("\n text generation: %s -m your_model.gguf -p \"I believe the meaning of life is\" -n 128 -no-cnv\n", argv[0]); + console::write("\n chat (conversation): %s -m your_model.gguf -sys \"You are a helpful assistant\"\n", argv[0]); + console::write("\n"); } static bool file_exists(const std::string & path) { @@ -70,11 +70,11 @@ static void sigint_handler(int signo) { need_insert_eot = true; } else { console::cleanup(); - LOG("\n"); + console::write("\n"); common_perf_print(*g_ctx, *g_smpl); // make sure all logs are flushed - LOG("Interrupted by user\n"); + console::write("Interrupted by user\n"); common_log_pause(common_log_main()); _exit(130); @@ -673,7 +673,7 @@ int main(int argc, char ** argv) { if (chat_add_and_format.get_partial_formatter()) { for (const auto & msg : chat_msgs) { - LOG("%s\n", msg.content.c_str()); + console::write(msg.content + "\n"); } } @@ -833,8 +833,7 @@ int main(int argc, char ** argv) { } else { console::set_display(console::reset); } - fprintf(stdout, "%s", out.formatted.c_str()); - fflush(stdout); + console::write(out.formatted); } console::set_display(console::reset); } @@ -870,7 +869,7 @@ int main(int argc, char ** argv) { const std::string token_str = common_token_to_piece(ctx, id, params.special); if (!chat_add_and_format.get_partial_formatter()) { - LOG("%s", token_str.c_str()); + console::write(token_str); } // Record Displayed Tokens To Log @@ -954,7 +953,7 @@ int main(int argc, char ** argv) { chat_add_and_format("assistant", assistant_ss.str()); } is_interacting = true; - LOG("\n"); + console::write("\n"); } } @@ -968,8 +967,12 @@ int main(int argc, char ** argv) { if ((n_past > 0 || waiting_for_first_input) && is_interacting) { LOG_DBG("waiting for user input\n"); + // color user input only + console::set_display(console::user_input); + display = params.display_prompt; + if (params.conversation_mode) { - LOG("\n> "); + console::write("\n> "); } if (params.input_prefix_bos) { @@ -980,13 +983,9 @@ int main(int argc, char ** argv) { std::string buffer; if (!params.input_prefix.empty() && !params.conversation_mode) { LOG_DBG("appending input prefix: '%s'\n", params.input_prefix.c_str()); - LOG("%s", params.input_prefix.c_str()); + console::write(params.input_prefix); } - // color user input only - console::set_display(console::user_input); - display = params.display_prompt; - std::string line; bool another_line = true; do { @@ -999,7 +998,7 @@ int main(int argc, char ** argv) { display = true; if (buffer.empty()) { // Ctrl+D on empty line exits - LOG("EOF by user\n"); + console::write("EOF by user\n"); break; } @@ -1017,7 +1016,7 @@ int main(int argc, char ** argv) { // append input suffix if any if (!params.input_suffix.empty() && !params.conversation_mode) { LOG_DBG("appending input suffix: '%s'\n", params.input_suffix.c_str()); - LOG("%s", params.input_suffix.c_str()); + console::write(params.input_suffix); } LOG_DBG("buffer: '%s'\n", buffer.c_str()); @@ -1091,7 +1090,7 @@ int main(int argc, char ** argv) { // end of generation if (!embd.empty() && llama_vocab_is_eog(vocab, embd.back()) && !(params.interactive)) { - LOG(" [end of text]\n"); + console::write(" [end of text]\n"); break; } @@ -1104,11 +1103,11 @@ int main(int argc, char ** argv) { } if (!path_session.empty() && params.prompt_cache_all && !params.prompt_cache_ro) { - LOG("\n%s: saving final output to session file '%s'\n", __func__, path_session.c_str()); + LOG_INF("\n%s: saving final output to session file '%s'\n", __func__, path_session.c_str()); llama_state_save_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size()); } - LOG("\n\n"); + console::write("\n\n"); common_perf_print(ctx, smpl); common_sampler_free(smpl); From becf4c5e30aab7d2e6cf5aa164aceff5cf270241 Mon Sep 17 00:00:00 2001 From: Mason M Date: Thu, 23 Oct 2025 16:15:36 -0300 Subject: [PATCH 10/17] Rename syntax variable --- tools/main/main.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/main/main.cpp b/tools/main/main.cpp index bb1f858d112..2ad256b4ea6 100644 --- a/tools/main/main.cpp +++ b/tools/main/main.cpp @@ -141,8 +141,8 @@ class chat_formatter { } common_chat_msg new_msg; - if (syntax) { - new_msg = common_chat_parse(content, false, *syntax); + if (syntax_ptr) { + new_msg = common_chat_parse(content, false, *syntax_ptr); } else { new_msg.content = content; } @@ -163,17 +163,17 @@ class chat_formatter { common_chat_params cparams = common_chat_templates_apply(chat_templates.get(), cinputs); - if (!syntax) { - syntax.reset(new common_chat_syntax); - syntax->format = cparams.format; - syntax->reasoning_format = params.reasoning_format; - syntax->thinking_forced_open = cparams.thinking_forced_open; - syntax->parse_tool_calls = false; + if (!syntax_ptr) { + syntax_ptr.reset(new common_chat_syntax); + syntax_ptr->format = cparams.format; + syntax_ptr->reasoning_format = params.reasoning_format; + syntax_ptr->thinking_forced_open = cparams.thinking_forced_open; + syntax_ptr->parse_tool_calls = false; } bool use_partial_formatter = params.reasoning_format != COMMON_REASONING_FORMAT_NONE; if (!partial_formatter_ptr && use_partial_formatter) { - partial_formatter_ptr = std::make_unique(*syntax); + partial_formatter_ptr = std::make_unique(*syntax_ptr); } std::string formatted; @@ -199,7 +199,7 @@ class chat_formatter { std::vector & chat_msgs; const common_chat_templates_ptr & chat_templates; const common_params & params; - std::unique_ptr syntax; + std::unique_ptr syntax_ptr; std::unique_ptr partial_formatter_ptr; std::string formatted_cumulative; }; From c87931791f641fa37dcbcd2bc820d272676e252b Mon Sep 17 00:00:00 2001 From: Mason M Date: Fri, 24 Oct 2025 10:03:29 -0300 Subject: [PATCH 11/17] Use non-template version of write routine --- common/console.cpp | 9 +++++++-- common/console.h | 16 ++-------------- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/common/console.cpp b/common/console.cpp index 71c3b078422..2dca5f85c83 100644 --- a/common/console.cpp +++ b/common/console.cpp @@ -1,6 +1,7 @@ #include "console.h" #include #include +#include #if defined(_WIN32) #define WIN32_LEAN_AND_MEAN @@ -165,8 +166,12 @@ namespace console { } } - FILE* get_output_handle() { - return out; + void write(const char* format, ...) { + va_list args; + va_start(args, format); + vfprintf(out, format, args); + va_end(args); + fflush(out); } static char32_t getchar32() { diff --git a/common/console.h b/common/console.h index 5afdc76e344..4b9652ce26a 100644 --- a/common/console.h +++ b/common/console.h @@ -3,7 +3,6 @@ #pragma once #include -#include namespace console { enum display_t { @@ -19,20 +18,9 @@ namespace console { void set_display(display_t display); bool readline(std::string & line, bool multiline_input); - FILE* get_output_handle(); - - template - void write(const char* format, Args... args) { - FILE* out = get_output_handle(); - fprintf(out, format, args...); - fflush(out); - } - - inline void write(const char* str) { - write("%s", str); - } + void write(const char* format, ...); inline void write(const std::string & data) { - write(data.c_str()); + write("%s", data.c_str()); } } From edb8c0f4a708223f6dd10c527337d9da1ee27281 Mon Sep 17 00:00:00 2001 From: Mason M Date: Sun, 26 Oct 2025 21:11:20 -0300 Subject: [PATCH 12/17] Write to log when enabled otherwise direct --- common/console.cpp | 84 ++++++++++++++++++++++++++++++---------------- common/console.h | 21 +++++++++++- common/log.cpp | 25 ++++++++++++++ common/log.h | 8 +++-- 4 files changed, 106 insertions(+), 32 deletions(-) diff --git a/common/console.cpp b/common/console.cpp index 2dca5f85c83..a8e9aa4340d 100644 --- a/common/console.cpp +++ b/common/console.cpp @@ -25,14 +25,14 @@ #include #endif -#define ANSI_COLOR_RED "\x1b[31m" -#define ANSI_COLOR_GREEN "\x1b[32m" -#define ANSI_COLOR_YELLOW "\x1b[33m" -#define ANSI_COLOR_BLUE "\x1b[34m" -#define ANSI_COLOR_MAGENTA "\x1b[35m" -#define ANSI_COLOR_CYAN "\x1b[36m" -#define ANSI_COLOR_RESET "\x1b[0m" -#define ANSI_BOLD "\x1b[1m" +#define ANSI_COLOR_RED LOG_COL_RED +#define ANSI_COLOR_GREEN LOG_COL_GREEN +#define ANSI_COLOR_YELLOW LOG_COL_YELLOW +#define ANSI_COLOR_BLUE LOG_COL_BLUE +#define ANSI_COLOR_MAGENTA LOG_COL_MAGENTA +#define ANSI_COLOR_CYAN LOG_COL_CYAN +#define ANSI_COLOR_RESET LOG_COL_DEFAULT +#define ANSI_BOLD LOG_COL_BOLD namespace console { @@ -143,30 +143,58 @@ namespace console { // Keep track of current display and only emit ANSI code if it changes void set_display(display_t display) { if (advanced_display && current_display != display) { - fflush(stdout); - switch(display) { - case reset: - fprintf(out, ANSI_COLOR_RESET); - break; - case prompt: - fprintf(out, ANSI_COLOR_YELLOW); - break; - case user_input: - fprintf(out, ANSI_BOLD ANSI_COLOR_GREEN); - break; - case error: - fprintf(out, ANSI_BOLD ANSI_COLOR_RED); - break; - case reasoning: - fprintf(out, ANSI_COLOR_BLUE); - break; - } current_display = display; - fflush(out); + + if (display == user_input && common_log_is_active(common_log_main())) { + common_log_flush(common_log_main()); + } + + if (display == user_input || !common_log_is_active(common_log_main())) { + fflush(stdout); + switch(display) { + case reset: + fprintf(out, ANSI_COLOR_RESET); + break; + case prompt: + fprintf(out, ANSI_COLOR_YELLOW); + break; + case user_input: + fprintf(out, ANSI_BOLD ANSI_COLOR_GREEN); + break; + case error: + fprintf(out, ANSI_BOLD ANSI_COLOR_RED); + break; + case reasoning: + fprintf(out, ANSI_COLOR_BLUE); + break; + } + fflush(out); + } + } + } + + display_t get_display() { + return current_display; + } + + const char * get_display_color() { + switch(current_display) { + case reset: + return ANSI_COLOR_RESET; + case prompt: + return ANSI_COLOR_YELLOW; + case user_input: + return ANSI_BOLD ANSI_COLOR_GREEN; + case error: + return ANSI_BOLD ANSI_COLOR_RED; + case reasoning: + return ANSI_COLOR_BLUE; + default: + return ""; } } - void write(const char* format, ...) { + void write_console(const char* format, ...) { va_list args; va_start(args, format); vfprintf(out, format, args); diff --git a/common/console.h b/common/console.h index 4b9652ce26a..d25e8d83e28 100644 --- a/common/console.h +++ b/common/console.h @@ -3,6 +3,7 @@ #pragma once #include +#include "log.h" namespace console { enum display_t { @@ -16,9 +17,27 @@ namespace console { void init(bool use_simple_io, bool use_advanced_display); void cleanup(); void set_display(display_t display); + display_t get_display(); + const char * get_display_color(); bool readline(std::string & line, bool multiline_input); - void write(const char* format, ...); + void write_console(const char* format, ...); + + template + void write(const char* format, Args... args) { + if (get_display() == user_input || !common_log_is_active(common_log_main())) { + write_console(format, args...); + + } else { + const char * color = get_display_color(); + std::string colored_format = std::string(color) + format + LOG_COL_DEFAULT; + common_log_add(common_log_main(), GGML_LOG_LEVEL_CONT, colored_format.c_str(), args...); + } + } + + inline void write(const char* data) { + write("%s", data); + } inline void write(const std::string & data) { write("%s", data.c_str()); diff --git a/common/log.cpp b/common/log.cpp index 4ccdbd17cd7..a25ddd7fd0e 100644 --- a/common/log.cpp +++ b/common/log.cpp @@ -174,6 +174,7 @@ struct common_log { std::mutex mtx; std::thread thrd; std::condition_variable cv; + std::condition_variable cv_flushed; FILE * file; @@ -288,6 +289,10 @@ struct common_log { cur = entries[head]; head = (head + 1) % entries.size(); + + if (head == tail) { + cv_flushed.notify_all(); + } } if (cur.is_end) { @@ -376,6 +381,18 @@ struct common_log { this->timestamps = timestamps; } + + bool is_active() const { + return running; + } + + void flush() { + if (!running) { + return; + } + std::unique_lock lock(mtx); + cv_flushed.wait(lock, [this]() { return head == tail; }); + } }; // @@ -409,6 +426,14 @@ void common_log_free(struct common_log * log) { delete log; } +bool common_log_is_active(struct common_log * log) { + return log->is_active(); +} + +void common_log_flush(struct common_log * log) { + log->flush(); +} + void common_log_add(struct common_log * log, enum ggml_log_level level, const char * fmt, ...) { va_list args; va_start(args, fmt); diff --git a/common/log.h b/common/log.h index f329b434c93..dab39baa085 100644 --- a/common/log.h +++ b/common/log.h @@ -42,9 +42,11 @@ struct common_log; struct common_log * common_log_init(); struct common_log * common_log_main(); // singleton, automatically destroys itself on exit -void common_log_pause (struct common_log * log); // pause the worker thread, not thread-safe -void common_log_resume(struct common_log * log); // resume the worker thread, not thread-safe -void common_log_free (struct common_log * log); +void common_log_pause (struct common_log * log); // pause the worker thread, not thread-safe +void common_log_resume (struct common_log * log); // resume the worker thread, not thread-safe +void common_log_free (struct common_log * log); +bool common_log_is_active(struct common_log * log); // check if logging is active +void common_log_flush (struct common_log * log); // wait for all pending messages to be processed LOG_ATTRIBUTE_FORMAT(3, 4) void common_log_add(struct common_log * log, enum ggml_log_level level, const char * fmt, ...); From 72164486a21c5b35b5339638af2b1fece65dfc25 Mon Sep 17 00:00:00 2001 From: Mason M Date: Mon, 27 Oct 2025 09:06:37 -0300 Subject: [PATCH 13/17] Fix pointer formatting --- common/console.cpp | 2 +- common/console.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/common/console.cpp b/common/console.cpp index a8e9aa4340d..f57d17f1ce3 100644 --- a/common/console.cpp +++ b/common/console.cpp @@ -194,7 +194,7 @@ namespace console { } } - void write_console(const char* format, ...) { + void write_console(const char * format, ...) { va_list args; va_start(args, format); vfprintf(out, format, args); diff --git a/common/console.h b/common/console.h index d25e8d83e28..ce35bbe2efa 100644 --- a/common/console.h +++ b/common/console.h @@ -21,10 +21,10 @@ namespace console { const char * get_display_color(); bool readline(std::string & line, bool multiline_input); - void write_console(const char* format, ...); + void write_console(const char * format, ...); template - void write(const char* format, Args... args) { + void write(const char * format, Args... args) { if (get_display() == user_input || !common_log_is_active(common_log_main())) { write_console(format, args...); @@ -35,7 +35,7 @@ namespace console { } } - inline void write(const char* data) { + inline void write(const char * data) { write("%s", data); } From c0ca21d9fafd88fd6cefd669af81b43b6fbc5c3f Mon Sep 17 00:00:00 2001 From: Mason M Date: Mon, 27 Oct 2025 10:25:38 -0300 Subject: [PATCH 14/17] Only call common_chat_parse with assistant messages --- tools/main/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/main/main.cpp b/tools/main/main.cpp index 2ad256b4ea6..ef8b32fbdea 100644 --- a/tools/main/main.cpp +++ b/tools/main/main.cpp @@ -141,7 +141,7 @@ class chat_formatter { } common_chat_msg new_msg; - if (syntax_ptr) { + if (role == "assistant" && syntax_ptr) { new_msg = common_chat_parse(content, false, *syntax_ptr); } else { new_msg.content = content; From 1b1629d2b2f02b52adc60366ddb4f6ab93ff2880 Mon Sep 17 00:00:00 2001 From: Mason M Date: Tue, 28 Oct 2025 10:17:28 -0300 Subject: [PATCH 15/17] Add reasoning delimiters --- tools/main/main.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/main/main.cpp b/tools/main/main.cpp index ef8b32fbdea..34c154b23cc 100644 --- a/tools/main/main.cpp +++ b/tools/main/main.cpp @@ -104,12 +104,15 @@ class partial_formatter { std::vector result; for (const auto & diff : diffs) { if (!diff.reasoning_content_delta.empty()) { + if (!had_reasoning) { + result.push_back({"Thinking... ", REASONING}); + } result.push_back({diff.reasoning_content_delta, REASONING}); had_reasoning = true; } if (!diff.content_delta.empty()) { if (had_reasoning) { - result.push_back({"\n", REASONING}); + result.push_back({" ...\n", REASONING}); had_reasoning = false; } result.push_back({diff.content_delta, CONTENT}); From fc248d102b06b31e597e8c248a876fc6363e9e7d Mon Sep 17 00:00:00 2001 From: Mason M Date: Tue, 28 Oct 2025 10:20:25 -0300 Subject: [PATCH 16/17] Remove stale data from delta --- tools/main/main.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/main/main.cpp b/tools/main/main.cpp index 34c154b23cc..61025fadd87 100644 --- a/tools/main/main.cpp +++ b/tools/main/main.cpp @@ -112,7 +112,7 @@ class partial_formatter { } if (!diff.content_delta.empty()) { if (had_reasoning) { - result.push_back({" ...\n", REASONING}); + result.push_back({" ...\n\n", REASONING}); had_reasoning = false; } result.push_back({diff.content_delta, CONTENT}); @@ -122,6 +122,11 @@ class partial_formatter { return result; } + void clear() { + previous = common_chat_msg(); + had_reasoning = false; + } + private: common_chat_syntax syntax; common_chat_msg previous; @@ -141,6 +146,10 @@ class chat_formatter { std::string operator()(const std::string & role, const std::string & content) { if (role == "user") { formatted_cumulative.clear(); // Needed if template strips reasoning + + if (partial_formatter_ptr) { + partial_formatter_ptr->clear(); // Remove stale data from delta + } } common_chat_msg new_msg; From e42715e035af574e1f3e8080f2ce3dd60c858f66 Mon Sep 17 00:00:00 2001 From: Mason M Date: Sat, 8 Nov 2025 10:55:31 -0400 Subject: [PATCH 17/17] Use double-arrow as reasoning delimiter --- tools/main/main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/main/main.cpp b/tools/main/main.cpp index 61025fadd87..9955adccc0a 100644 --- a/tools/main/main.cpp +++ b/tools/main/main.cpp @@ -105,14 +105,14 @@ class partial_formatter { for (const auto & diff : diffs) { if (!diff.reasoning_content_delta.empty()) { if (!had_reasoning) { - result.push_back({"Thinking... ", REASONING}); + result.push_back({"\n⇒ ", REASONING}); } result.push_back({diff.reasoning_content_delta, REASONING}); had_reasoning = true; } if (!diff.content_delta.empty()) { if (had_reasoning) { - result.push_back({" ...\n\n", REASONING}); + result.push_back({"\n\n", REASONING}); had_reasoning = false; } result.push_back({diff.content_delta, CONTENT});