Skip to content

Commit 060f06d

Browse files
committed
feat: implement LFM2VLChatHandler for LFM2-VL series models
1 parent 9b9710a commit 060f06d

File tree

2 files changed

+66
-0
lines changed

2 files changed

+66
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,7 @@ Below are the supported multi-modal models and their respective chat handlers (P
497497
| [minicpm-v-2.6](https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf) | `MiniCPMv26ChatHandler` | `minicpm-v-2.6`, `minicpm-v-4.0` |
498498
| [gemma3](https://huggingface.co/unsloth/gemma-3-27b-it-GGUF) | `Gemma3ChatHandler` | `gemma3` |
499499
| [glm4.1v](https://huggingface.co/unsloth/GLM-4.1V-9B-Thinking-GGUF) | `GLM41VChatHandler` | `glm4.1v` |
500+
| [lfm2-vl](https://huggingface.co/LiquidAI/LFM2-VL-3B-GGUF) | `LFM2VLChatHandler` | `lfm2-vl` |
500501
| [qwen2.5-vl](https://huggingface.co/unsloth/Qwen2.5-VL-3B-Instruct-GGUF) | `Qwen25VLChatHandler` | `qwen2.5-vl` |
501502
| [qwen3-vl](https://huggingface.co/unsloth/Qwen3-VL-8B-Thinking-GGUF) | `Qwen3VLChatHandler` | `qwen3-vl` |
502503

llama_cpp/llama_chat_format.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3800,6 +3800,71 @@ def __call__(self, **kwargs):
38003800
return super().__call__(**kwargs)
38013801

38023802

3803+
class LFM2VLChatHandler(Llava15ChatHandler):
3804+
LFM2VL_BOS_TOKEN = "<|startoftext|>"
3805+
LFM2VL_EOS_TOKEN = "<|im_end|>"
3806+
LFM2VL_IMAGE_START_TOKEN = "<|image_start|>"
3807+
LFM2VL_IMAGE_END_TOKEN = "<|image_end|>"
3808+
3809+
CHAT_FORMAT = (
3810+
"{%- for message in messages -%}"
3811+
"{{ '<|im_start|>' + message['role'] + '\n' }}"
3812+
"{%- if message['content'] is string -%}"
3813+
"{{ message['content'] }}"
3814+
"{%- else -%}"
3815+
"{%- for content in message['content'] -%}"
3816+
"{%- if 'image_url' in content -%}"
3817+
"{%- if content.image_url is string -%}"
3818+
"<|image_start|>{{ content.image_url }}<|image_end|>"
3819+
"{%- else -%}"
3820+
"<|image_start|>{{ content.image_url.url }}<|image_end|>"
3821+
"{%- endif -%}"
3822+
"{%- elif content['type'] == 'text' -%}"
3823+
"{{ content['text'] }}"
3824+
"{%- endif -%}"
3825+
"{%- endfor -%}"
3826+
"{%- endif -%}"
3827+
"{{ '<|im_end|>\n' }}"
3828+
"{%- endfor -%}"
3829+
"{%- if add_generation_prompt -%}"
3830+
"{{ '<|im_start|>assistant\n' }}"
3831+
"{%- endif -%}"
3832+
)
3833+
3834+
def __init__(self, image_min_tokens: int = -1, image_max_tokens: int = -1, **kwargs):
3835+
"""
3836+
LFM2-VL Handler
3837+
LiquidAI officially recommends configuring LFM2-VL with the following Vision parameters: min_image_tokens=64, max_image_tokens=256
3838+
"""
3839+
self.image_min_tokens = image_min_tokens
3840+
self.image_max_tokens = image_max_tokens
3841+
super().__init__(image_min_tokens=self.image_min_tokens, image_max_tokens=self.image_max_tokens, **kwargs)
3842+
3843+
def __call__(self, **kwargs):
3844+
3845+
llama = kwargs['llama']
3846+
llama.reset()
3847+
llama._ctx.memory_clear(True)
3848+
llama.n_tokens = 0
3849+
3850+
if hasattr(llama, 'input_ids'):
3851+
llama.input_ids.fill(0)
3852+
3853+
if hasattr(self, '_last_image_embed'):
3854+
self._last_image_embed = None
3855+
self._last_image_hash = None
3856+
3857+
if self.verbose:
3858+
messages = kwargs.get('messages', [])
3859+
try:
3860+
image_count = len(self.get_image_urls(messages))
3861+
print(f"LFM2VLChatHandler - Cleared state, Processing {image_count} images", file=sys.stderr)
3862+
except Exception:
3863+
print(f"LFM2VLChatHandler - Cleared state", file=sys.stderr)
3864+
3865+
return super().__call__(**kwargs)
3866+
3867+
38033868
class Qwen25VLChatHandler(Llava15ChatHandler):
38043869
DEFAULT_SYSTEM_MESSAGE = "You are a helpful assistant."
38053870

0 commit comments

Comments
 (0)