Skip to content

Commit 6ee17c0

Browse files
committed
Sync mtmd: add mtmd_context_params::warmup option
1 parent 4a5baa6 commit 6ee17c0

File tree

2 files changed

+5
-0
lines changed

2 files changed

+5
-0
lines changed

llama_cpp/llama_chat_format.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2828,6 +2828,7 @@ def _init_mtmd_context(self, llama_model: llama.Llama):
28282828
mctx_params.print_timings = self.verbose
28292829
mctx_params.n_threads = llama_model.n_threads
28302830
mctx_params.flash_attn_type = self._mtmd_cpp.clip_flash_attn_type.CLIP_FLASH_ATTN_TYPE_AUTO
2831+
mctx_params.warmup = True
28312832
if self.image_min_tokens > 0:
28322833
mctx_params.image_min_tokens = self.image_min_tokens
28332834
if self.image_max_tokens > 0:

llama_cpp/mtmd_cpp.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,13 +127,15 @@ class clip_flash_attn_type (enum.IntEnum):
127127
# enum clip_flash_attn_type flash_attn_type;
128128
# int image_min_tokens;
129129
# int image_max_tokens;
130+
# bool warmup;
130131
# };
131132
class clip_context_params(Structure):
132133
_fields_ = [
133134
("use_gpu", c_bool),
134135
("flash_attn_type", c_int),
135136
("image_min_tokens", c_int),
136137
("image_max_tokens", c_int),
138+
("warmup", c_bool),
137139
]
138140

139141
# struct mtmd_context_params {
@@ -143,6 +145,7 @@ class clip_context_params(Structure):
143145
# const char * image_marker; // deprecated, use media_marker instead
144146
# const char * media_marker;
145147
# enum llama_flash_attn_type flash_attn_type;
148+
# bool warmup; // whether to run a warmup encode pass after initialization
146149

147150
# // limit number of image tokens, only for vision models with dynamic resolution
148151
# int image_min_tokens; // minimum number of tokens for image input (default: read from metadata)
@@ -156,6 +159,7 @@ class mtmd_context_params(Structure):
156159
("image_marker", c_char_p),
157160
("media_marker", c_char_p),
158161
("flash_attn_type", c_int),
162+
("warmup", c_bool),
159163
("image_min_tokens", c_int),
160164
("image_max_tokens", c_int),
161165
]

0 commit comments

Comments
 (0)