@@ -3800,6 +3800,71 @@ def __call__(self, **kwargs):
38003800 return super ().__call__ (** kwargs )
38013801
38023802
3803+ class LFM2VLChatHandler (Llava15ChatHandler ):
3804+ LFM2VL_BOS_TOKEN = "<|startoftext|>"
3805+ LFM2VL_EOS_TOKEN = "<|im_end|>"
3806+ LFM2VL_IMAGE_START_TOKEN = "<|image_start|>"
3807+ LFM2VL_IMAGE_END_TOKEN = "<|image_end|>"
3808+
3809+ CHAT_FORMAT = (
3810+ "{%- for message in messages -%}"
3811+ "{{ '<|im_start|>' + message['role'] + '\n ' }}"
3812+ "{%- if message['content'] is string -%}"
3813+ "{{ message['content'] }}"
3814+ "{%- else -%}"
3815+ "{%- for content in message['content'] -%}"
3816+ "{%- if 'image_url' in content -%}"
3817+ "{%- if content.image_url is string -%}"
3818+ "<|image_start|>{{ content.image_url }}<|image_end|>"
3819+ "{%- else -%}"
3820+ "<|image_start|>{{ content.image_url.url }}<|image_end|>"
3821+ "{%- endif -%}"
3822+ "{%- elif content['type'] == 'text' -%}"
3823+ "{{ content['text'] }}"
3824+ "{%- endif -%}"
3825+ "{%- endfor -%}"
3826+ "{%- endif -%}"
3827+ "{{ '<|im_end|>\n ' }}"
3828+ "{%- endfor -%}"
3829+ "{%- if add_generation_prompt -%}"
3830+ "{{ '<|im_start|>assistant\n ' }}"
3831+ "{%- endif -%}"
3832+ )
3833+
3834+ def __init__ (self , image_min_tokens : int = - 1 , image_max_tokens : int = - 1 , ** kwargs ):
3835+ """
3836+ LFM2-VL Handler
3837+ LiquidAI officially recommends configuring LFM2-VL with the following Vision parameters: min_image_tokens=64, max_image_tokens=256
3838+ """
3839+ self .image_min_tokens = image_min_tokens
3840+ self .image_max_tokens = image_max_tokens
3841+ super ().__init__ (image_min_tokens = self .image_min_tokens , image_max_tokens = self .image_max_tokens , ** kwargs )
3842+
3843+ def __call__ (self , ** kwargs ):
3844+
3845+ llama = kwargs ['llama' ]
3846+ llama .reset ()
3847+ llama ._ctx .memory_clear (True )
3848+ llama .n_tokens = 0
3849+
3850+ if hasattr (llama , 'input_ids' ):
3851+ llama .input_ids .fill (0 )
3852+
3853+ if hasattr (self , '_last_image_embed' ):
3854+ self ._last_image_embed = None
3855+ self ._last_image_hash = None
3856+
3857+ if self .verbose :
3858+ messages = kwargs .get ('messages' , [])
3859+ try :
3860+ image_count = len (self .get_image_urls (messages ))
3861+ print (f"LFM2VLChatHandler - Cleared state, Processing { image_count } images" , file = sys .stderr )
3862+ except Exception :
3863+ print (f"LFM2VLChatHandler - Cleared state" , file = sys .stderr )
3864+
3865+ return super ().__call__ (** kwargs )
3866+
3867+
38033868class Qwen25VLChatHandler (Llava15ChatHandler ):
38043869 DEFAULT_SYSTEM_MESSAGE = "You are a helpful assistant."
38053870
0 commit comments