@@ -127,13 +127,15 @@ class clip_flash_attn_type (enum.IntEnum):
127127# enum clip_flash_attn_type flash_attn_type;
128128# int image_min_tokens;
129129# int image_max_tokens;
130+ # bool warmup;
130131# };
131132class clip_context_params (Structure ):
132133 _fields_ = [
133134 ("use_gpu" , c_bool ),
134135 ("flash_attn_type" , c_int ),
135136 ("image_min_tokens" , c_int ),
136137 ("image_max_tokens" , c_int ),
138+ ("warmup" , c_bool ),
137139 ]
138140
139141# struct mtmd_context_params {
@@ -143,6 +145,7 @@ class clip_context_params(Structure):
143145# const char * image_marker; // deprecated, use media_marker instead
144146# const char * media_marker;
145147# enum llama_flash_attn_type flash_attn_type;
148+ # bool warmup; // whether to run a warmup encode pass after initialization
146149
147150# // limit number of image tokens, only for vision models with dynamic resolution
148151# int image_min_tokens; // minimum number of tokens for image input (default: read from metadata)
@@ -156,6 +159,7 @@ class mtmd_context_params(Structure):
156159 ("image_marker" , c_char_p ),
157160 ("media_marker" , c_char_p ),
158161 ("flash_attn_type" , c_int ),
162+ ("warmup" , c_bool ),
159163 ("image_min_tokens" , c_int ),
160164 ("image_max_tokens" , c_int ),
161165 ]
0 commit comments