Skip to content

Commit d5131e2

Browse files
committed
feat: implement GLM46VChatHandler for GLM-4.6V Series Model
Signed-off-by: JamePeng <jame_peng@sina.com>
1 parent 70d5844 commit d5131e2

File tree

2 files changed

+101
-0
lines changed

2 files changed

+101
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,7 @@ Below are the supported multi-modal models and their respective chat handlers (P
497497
| [minicpm-v-2.6](https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf) | `MiniCPMv26ChatHandler` | `minicpm-v-2.6`, `minicpm-v-4.0` |
498498
| [gemma3](https://huggingface.co/unsloth/gemma-3-27b-it-GGUF) | `Gemma3ChatHandler` | `gemma3` |
499499
| [glm4.1v](https://huggingface.co/unsloth/GLM-4.1V-9B-Thinking-GGUF) | `GLM41VChatHandler` | `glm4.1v` |
500+
| [glm4.6v](https://huggingface.co/unsloth/GLM-4.6V-Flash-GGUF) | `GLM46VChatHandler` | `glm4.6v` |
500501
| [lfm2-vl](https://huggingface.co/LiquidAI/LFM2-VL-3B-GGUF) | `LFM2VLChatHandler` | `lfm2-vl` |
501502
| [qwen2.5-vl](https://huggingface.co/unsloth/Qwen2.5-VL-3B-Instruct-GGUF) | `Qwen25VLChatHandler` | `qwen2.5-vl` |
502503
| [qwen3-vl](https://huggingface.co/unsloth/Qwen3-VL-8B-Thinking-GGUF) | `Qwen3VLChatHandler` | `qwen3-vl` |

llama_cpp/llama_chat_format.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3801,6 +3801,106 @@ def __call__(self, **kwargs):
38013801
return super().__call__(**kwargs)
38023802

38033803

3804+
class GLM46VChatHandler(Llava15ChatHandler):
3805+
GLM46V_EOS_TOKEN = "<|endoftext|>"
3806+
GLM46V_PAD_TOKEN = "<|endoftext|>"
3807+
GLM46V_IMAGE_START_TOKEN = "<|begin_of_image|>"
3808+
GLM46V_IMAGE_END_TOKEN = "<|end_of_image|>"
3809+
3810+
CHAT_FORMAT = (
3811+
"[gMASK]<sop>"
3812+
"{%- if tools -%}"
3813+
"<|system|>\n# Tools\n\nYou may call one or more functions to assist with the user query.\n"
3814+
"You are provided with function signatures within <tools></tools> XML tags:\n<tools>\n"
3815+
"{%- for tool in tools -%}"
3816+
"{{ tool | tojson(ensure_ascii=False) }}\n"
3817+
"{%- endfor -%}"
3818+
"</tools>\n\nFor each function call, output the function name and arguments within the following XML format:\n"
3819+
"<tool_call>{function-name}\n<arg_key>{arg-key-1}</arg_key>\n<arg_value>{arg-value-1}</arg_value>\n...\n</tool_call>"
3820+
"{%- endif -%}"
3821+
3822+
"{%- for m in messages -%}"
3823+
"{%- if m.role == 'system' -%}"
3824+
"<|system|>\n{{ m.content }}"
3825+
"{%- elif m.role == 'user' -%}"
3826+
"<|user|>\n"
3827+
"{%- if m.content is string -%}"
3828+
"{{ m.content }}"
3829+
"{%- else -%}"
3830+
"{%- for item in m.content -%}"
3831+
"{%- if item.type == 'image_url' or 'image_url' in item -%}"
3832+
"<|begin_of_image|>"
3833+
"{%- if item.image_url is string -%}"
3834+
"{{- item.image_url -}}"
3835+
"{%- else -%}"
3836+
"{{- item.image_url.url -}}"
3837+
"{%- endif -%}"
3838+
"<|end_of_image|>"
3839+
"{%- elif item.type == 'text' -%}"
3840+
"{{ item.text }}"
3841+
"{%- endif -%}"
3842+
"{%- endfor -%}"
3843+
"{%- endif -%}"
3844+
# If enable_thinking is disabled, insert `/nothink` according to the source code logic.
3845+
"{{ '/nothink' if not enable_thinking else '' }}"
3846+
"{%- elif m.role == 'assistant' -%}"
3847+
"<|assistant|>"
3848+
"{%- if enable_thinking -%}"
3849+
"{%- set reasoning = m.reasoning_content if m.reasoning_content is string else '' -%}"
3850+
"\n<think>{{ reasoning.strip() }}</think>"
3851+
"{%- else -%}"
3852+
"\n<think></think>"
3853+
"{%- endif -%}"
3854+
"{{ '\n' + m.content.strip() if m.content.strip() else '' }}"
3855+
"{%- endif -%}"
3856+
"{{ GLM46V_EOS_TOKEN }}"
3857+
"{%- endfor -%}"
3858+
3859+
"{%- if add_generation_prompt -%}"
3860+
"<|assistant|>\n"
3861+
"{{ '<think>' if enable_thinking else '<think></think>\n' }}"
3862+
"{%- endif -%}"
3863+
)
3864+
3865+
def __init__(self, enable_thinking: bool = True, **kwargs):
3866+
"""
3867+
GLM-4.6V Handler
3868+
Parameters:
3869+
- enable_thinking (bool): Whether to enable the model's think process. The default is True.
3870+
"""
3871+
self.enable_thinking = enable_thinking
3872+
super().__init__(**kwargs)
3873+
3874+
def __call__(self, **kwargs):
3875+
self.extra_template_arguments["enable_thinking"] = self.enable_thinking
3876+
self.extra_template_arguments["GLM46V_EOS_TOKEN"] = self.GLM46V_EOS_TOKEN
3877+
3878+
# https://huggingface.co/zai-org/GLM-4.6V-Flash/blob/main/generation_config.json
3879+
kwargs['stop'] = [self.GLM46V_EOS_TOKEN, "<|user|>", "<|observation|>", "<|code_middle|>"] # Stop token patch
3880+
3881+
llama = kwargs['llama']
3882+
llama.reset()
3883+
llama._ctx.memory_clear(True)
3884+
llama.n_tokens = 0
3885+
3886+
if hasattr(llama, 'input_ids'):
3887+
llama.input_ids.fill(0)
3888+
3889+
if hasattr(self, '_last_image_embed'):
3890+
self._last_image_embed = None
3891+
self._last_image_hash = None
3892+
3893+
if self.verbose:
3894+
messages = kwargs.get('messages', [])
3895+
try:
3896+
image_count = len(self.get_image_urls(messages))
3897+
print(f"GLM46VChatHandler(enable_thinking={self.enable_thinking}) - Processing {image_count} images", file=sys.stderr)
3898+
except Exception:
3899+
print(f"GLM46VChatHandler(enable_thinking={self.enable_thinking}) - Cleared state", file=sys.stderr)
3900+
3901+
return super().__call__(**kwargs)
3902+
3903+
38043904
class LFM2VLChatHandler(Llava15ChatHandler):
38053905
LFM2VL_BOS_TOKEN = "<|startoftext|>"
38063906
LFM2VL_EOS_TOKEN = "<|im_end|>"

0 commit comments

Comments
 (0)