@@ -581,6 +581,7 @@ class llama_model_params(ctypes.Structure):
581581# bool embeddings; // if true, extract embeddings (together with logits)
582582# bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
583583
584+
584585# // Abort callback
585586# // if it returns true, execution of llama_decode() will be aborted
586587# // currently works only with CPU execution
@@ -1006,6 +1007,11 @@ def llama_n_ctx_train(model: llama_model_p, /) -> int: ...
10061007def llama_n_embd (model : llama_model_p , / ) -> int : ...
10071008
10081009
1010+ # LLAMA_API int32_t llama_n_layer (const struct llama_model * model);
1011+ @ctypes_function ("llama_n_layer" , [llama_model_p_ctypes ], ctypes .c_int32 )
1012+ def llama_n_layer (model : llama_model_p , / ) -> int : ...
1013+
1014+
10091015# // Get the model's RoPE frequency scaling factor
10101016# LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model);
10111017@ctypes_function ("llama_rope_freq_scale_train" , [llama_model_p_ctypes ], ctypes .c_float )
@@ -1166,12 +1172,18 @@ def llama_model_quantize(
11661172 ...
11671173
11681174
1175+ # // Apply a LoRA adapter to a loaded model
1176+ # // path_base_model is the path to a higher quality model to use as a base for
1177+ # // the layers modified by the adapter. Can be NULL to use the current loaded model.
1178+ # // The model needs to be reloaded before applying a new adapter, otherwise the adapter
1179+ # // will be applied on top of the previous one
1180+ # // Returns 0 on success
11691181# LLAMA_API int32_t llama_model_apply_lora_from_file(
11701182# const struct llama_model * model,
1171- # const char * path_lora,
1172- # float scale,
1173- # const char * path_base_model,
1174- # int32_t n_threads);
1183+ # const char * path_lora,
1184+ # float scale,
1185+ # const char * path_base_model,
1186+ # int32_t n_threads);
11751187@ctypes_function (
11761188 "llama_model_apply_lora_from_file" ,
11771189 [
@@ -1190,7 +1202,57 @@ def llama_model_apply_lora_from_file(
11901202 path_base_model : Union [ctypes .c_char_p , bytes , None ],
11911203 n_threads : Union [ctypes .c_int32 , int ],
11921204 / ,
1193- ) -> int : ...
1205+ ) -> int :
1206+ """Apply a LoRA adapter to a loaded model
1207+ path_base_model is the path to a higher quality model to use as a base for
1208+ the layers modified by the adapter. Can be NULL to use the current loaded model.
1209+ The model needs to be reloaded before applying a new adapter, otherwise the adapter
1210+ will be applied on top of the previous one
1211+ Returns 0 on success"""
1212+ ...
1213+
1214+
1215+ # // Apply a loaded control vector to a llama_context, or if data is NULL, clear
1216+ # // the currently loaded vector.
1217+ # // n_embd should be the size of a single layer's control, and data should point
1218+ # // to an n_embd x n_layers buffer starting from layer 1.
1219+ # // il_start and il_end are the layer range the vector should apply to (both inclusive)
1220+ # // See llama_control_vector_load in common to load a control vector.
1221+ # LLAMA_API int32_t llama_control_vector_apply(
1222+ # struct llama_context * lctx,
1223+ # const float * data,
1224+ # size_t len,
1225+ # int32_t n_embd,
1226+ # int32_t il_start,
1227+ # int32_t il_end);
1228+ @ctypes_function (
1229+ "llama_control_vector_apply" ,
1230+ [
1231+ llama_context_p_ctypes ,
1232+ ctypes .POINTER (ctypes .c_float ),
1233+ ctypes .c_size_t ,
1234+ ctypes .c_int32 ,
1235+ ctypes .c_int32 ,
1236+ ctypes .c_int32 ,
1237+ ],
1238+ ctypes .c_int32 ,
1239+ )
1240+ def llama_control_vector_apply (
1241+ lctx : llama_context_p ,
1242+ data : CtypesPointerOrRef [ctypes .c_float ],
1243+ len : int ,
1244+ n_embd : int ,
1245+ il_start : int ,
1246+ il_end : int ,
1247+ / ,
1248+ ) -> int :
1249+ """Apply a loaded control vector to a llama_context, or if data is NULL, clear
1250+ the currently loaded vector.
1251+ n_embd should be the size of a single layer's control, and data should point
1252+ to an n_embd x n_layers buffer starting from layer 1.
1253+ il_start and il_end are the layer range the vector should apply to (both inclusive)
1254+ See llama_control_vector_load in common to load a control vector."""
1255+ ...
11941256
11951257
11961258# //
@@ -1205,6 +1267,12 @@ def llama_model_apply_lora_from_file(
12051267# llama_pos pos;
12061268# };
12071269class llama_kv_cache_view_cell (ctypes .Structure ):
1270+ """Information associated with an individual cell in the KV cache view.
1271+
1272+ Attributes:
1273+ pos (llama_pos): The position for this cell. Takes KV cache shifts into account.
1274+ May be negative if the cell is not populated."""
1275+
12081276 _fields_ = [("pos" , llama_pos )]
12091277
12101278
@@ -1985,7 +2053,7 @@ def llama_tokenize(
19852053 / ,
19862054) -> int :
19872055 """Convert the provided text into tokens.
1988-
2056+
19892057 Args:
19902058 model: The model to use for tokenization.
19912059 text: The text to tokenize.
@@ -1995,10 +2063,11 @@ def llama_tokenize(
19952063 add_bos: Whether to add a beginning-of-sentence token.
19962064 special: Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.
19972065 Does not insert a leading space.
1998-
2066+
19992067 Returns:
20002068 Returns the number of tokens on success, no more than n_tokens_max
2001- Returns a negative number on failure - the number of tokens that would have been returned"""
2069+ Returns a negative number on failure - the number of tokens that would have been returned
2070+ """
20022071 ...
20032072
20042073
0 commit comments