@@ -39,6 +39,7 @@ def __init__(
3939 n_threads : Optional [int ] = None ,
4040 n_batch : int = 8 ,
4141 last_n_tokens_size : int = 64 ,
42+ lora_path : Optional [str ] = None ,
4243 verbose : bool = True ,
4344 ):
4445 """Load a llama.cpp model from `model_path`.
@@ -57,6 +58,7 @@ def __init__(
5758 n_threads: Number of threads to use. If None, the number of threads is automatically determined.
5859 n_batch: Maximum number of prompt tokens to batch together when calling llama_eval.
5960 last_n_tokens_size: Maximum number of tokens to keep in the last_n_tokens deque.
61+ lora_path: Path to a LoRA file to apply to the model.
6062 verbose: Print verbose output to stderr.
6163
6264 Raises:
@@ -108,6 +110,17 @@ def __init__(
108110 self .model_path .encode ("utf-8" ), self .params
109111 )
110112
113+ self .lora_path = None
114+ if lora_path :
115+ self .lora_path = lora_path
116+ if llama_cpp .llama_apply_lora_from_file (
117+ self .ctx ,
118+ self .lora_path .encode ("utf-8" ),
119+ self .model_path .encode ("utf-8" ),
120+ llama_cpp .c_int (self .n_threads ),
121+ ):
122+ raise RuntimeError (f"Failed to apply LoRA from path: { self .lora_path } " )
123+
111124 if self .verbose :
112125 print (llama_cpp .llama_print_system_info ().decode ("utf-8" ), file = sys .stderr )
113126
@@ -802,6 +815,7 @@ def __getstate__(self):
802815 last_n_tokens_size = self .last_n_tokens_size ,
803816 n_batch = self .n_batch ,
804817 n_threads = self .n_threads ,
818+ lora_path = self .lora_path ,
805819 )
806820
807821 def __setstate__ (self , state ):
@@ -819,6 +833,7 @@ def __setstate__(self, state):
819833 n_threads = state ["n_threads" ],
820834 n_batch = state ["n_batch" ],
821835 last_n_tokens_size = state ["last_n_tokens_size" ],
836+ lora_path = state ["lora_path" ],
822837 verbose = state ["verbose" ],
823838 )
824839
0 commit comments