@@ -114,7 +114,9 @@ class llama_context_params(Structure):
114114LLAMA_FTYPE_MOSTLY_F16 = ctypes .c_int (1 ) # except 1d tensors
115115LLAMA_FTYPE_MOSTLY_Q4_0 = ctypes .c_int (2 ) # except 1d tensors
116116LLAMA_FTYPE_MOSTLY_Q4_1 = ctypes .c_int (3 ) # except 1d tensors
117- LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = ctypes .c_int (4 ) # tok_embeddings.weight and output.weight are F16
117+ LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = ctypes .c_int (
118+ 4
119+ ) # tok_embeddings.weight and output.weight are F16
118120
119121# Functions
120122
@@ -175,6 +177,22 @@ def llama_model_quantize(fname_inp: bytes, fname_out: bytes, itype: c_int) -> c_
175177_lib .llama_model_quantize .restype = c_int
176178
177179
180+ # Apply a LoRA adapter to a loaded model
181+ # path_base_model is the path to a higher quality model to use as a base for
182+ # the layers modified by the adapter. Can be NULL to use the current loaded model.
183+ # The model needs to be reloaded before applying a new adapter, otherwise the adapter
184+ # will be applied on top of the previous one
185+ # Returns 0 on success
186+ def llama_apply_lora_from_file (
187+ ctx : llama_context_p , path_lora : bytes , path_base_model : bytes , n_threads : c_int
188+ ) -> c_int :
189+ return _lib .llama_apply_lora_from_file (ctx , path_lora , path_base_model , n_threads )
190+
191+
192+ _lib .llama_apply_lora_from_file .argtypes = [llama_context_p , c_char_p , c_char_p , c_int ]
193+ _lib .llama_apply_lora_from_file .restype = c_int
194+
195+
178196# Returns the KV cache that will contain the context for the
179197# ongoing prediction with the model.
180198def llama_get_kv_cache (ctx : llama_context_p ):
0 commit comments