1919from collections import deque , OrderedDict
2020
2121import diskcache
22+ import ctypes
2223
2324from . import llama_cpp
2425from .llama_types import *
2526
2627import numpy as np
2728import numpy .typing as npt
2829
29-
3030class BaseLlamaCache (ABC ):
3131 """Base cache class for a llama.cpp model."""
3232
@@ -207,6 +207,7 @@ def __init__(
207207 n_ctx : int = 512 ,
208208 n_parts : int = - 1 ,
209209 n_gpu_layers : int = 0 ,
210+ tensor_split : list [float ] = None ,
210211 seed : int = 1337 ,
211212 f16_kv : bool = True ,
212213 logits_all : bool = False ,
@@ -248,12 +249,20 @@ def __init__(
248249 Returns:
249250 A Llama instance.
250251 """
252+ if tensor_split is None :
253+ tensor_split = [0.0 ] * llama_cpp .LLAMA_MAX_DEVICES .value
254+
255+ #Type conversion and expand the list to the length of LLAMA_MAX_DEVICES
256+ FloatArray = ctypes .c_float * llama_cpp .LLAMA_MAX_DEVICES .value
257+ c_tensor_split = FloatArray (* tensor_split )
258+
251259 self .verbose = verbose
252260 self .model_path = model_path
253261
254262 self .params = llama_cpp .llama_context_default_params ()
255263 self .params .n_ctx = n_ctx
256264 self .params .n_gpu_layers = n_gpu_layers
265+ self .params .tensor_split = c_tensor_split
257266 self .params .seed = seed
258267 self .params .f16_kv = f16_kv
259268 self .params .logits_all = logits_all
@@ -1490,6 +1499,7 @@ def __getstate__(self):
14901499 model_path = self .model_path ,
14911500 n_ctx = self .params .n_ctx ,
14921501 n_gpu_layers = self .params .n_gpu_layers ,
1502+ tensor_split = self .params .tensor_split ,
14931503 seed = self .params .seed ,
14941504 f16_kv = self .params .f16_kv ,
14951505 logits_all = self .params .logits_all ,
@@ -1514,6 +1524,7 @@ def __setstate__(self, state):
15141524 n_ctx = state ["n_ctx" ],
15151525 n_parts = state ["n_parts" ],
15161526 n_gpu_layers = state ["n_gpu_layers" ],
1527+ tensor_split = state ["tensor_split" ],
15171528 seed = state ["seed" ],
15181529 f16_kv = state ["f16_kv" ],
15191530 logits_all = state ["logits_all" ],
0 commit comments