1+ from typing import TYPE_CHECKING , Any
2+
13from loguru import logger
24
3- try :
5+ if TYPE_CHECKING :
46 import tiktoken
7+ from modelscope import AutoTokenizer as ModelScopeTokenizer
58
6- TIKTOKEN_AVAILABLE = True
7- except ImportError :
8- TIKTOKEN_AVAILABLE = False
9- logger .exception ("tiktoken not installed. OpenAI models will use character-based estimation." )
9+ # Lazy import flags
10+ _TIKTOKEN_CHECKED = False
11+ _TIKTOKEN_AVAILABLE = False
12+ _MODELSCOPE_CHECKED = False
13+ _MODELSCOPE_AVAILABLE = False
1014
11- try :
12- from modelscope import AutoTokenizer as ModelScopeTokenizer
1315
14- MODELSCOPE_AVAILABLE = True
15- except ImportError as e :
16- MODELSCOPE_AVAILABLE = False
17- logger .exception ("modelscope not installed. Qwen models will use character-based estimation." )
16+ def _check_tiktoken () -> bool :
17+ """Lazy check for tiktoken availability."""
18+ global _TIKTOKEN_CHECKED , _TIKTOKEN_AVAILABLE
19+ if not _TIKTOKEN_CHECKED :
20+ try :
21+ import tiktoken # noqa: F401
22+
23+ _TIKTOKEN_AVAILABLE = True
24+ except ImportError :
25+ _TIKTOKEN_AVAILABLE = False
26+ logger .debug ("tiktoken not installed. OpenAI models will use character-based estimation." )
27+ _TIKTOKEN_CHECKED = True
28+ return _TIKTOKEN_AVAILABLE
29+
30+
31+ def _check_modelscope () -> bool :
32+ """Lazy check for modelscope availability."""
33+ global _MODELSCOPE_CHECKED , _MODELSCOPE_AVAILABLE
34+ if not _MODELSCOPE_CHECKED :
35+ try :
36+ from modelscope import AutoTokenizer # noqa: F401
37+
38+ _MODELSCOPE_AVAILABLE = True
39+ except ImportError :
40+ _MODELSCOPE_AVAILABLE = False
41+ logger .debug ("modelscope not installed. Qwen models will use character-based estimation." )
42+ _MODELSCOPE_CHECKED = True
43+ return _MODELSCOPE_AVAILABLE
1844
1945
2046class TokenCounter :
@@ -72,11 +98,13 @@ def count(self, text: str, model_name: str = None) -> int:
7298
7399 def _count_with_tiktoken (self , text : str , model_name : str ) -> int :
74100 """Count tokens using tiktoken (for GPT models)."""
75- if not TIKTOKEN_AVAILABLE :
101+ if not _check_tiktoken () :
76102 logger .debug (f"tiktoken not available, using fallback for { model_name } " )
77103 return self ._estimate_from_chars (text )
78104
79105 try :
106+ import tiktoken
107+
80108 # Get encoding for the model
81109 if model_name .startswith ("gpt-4o" ):
82110 encoding_name = "o200k_base"
@@ -102,11 +130,13 @@ def _count_with_qwen_tokenizer(self, text: str, model_name: str) -> int:
102130
103131 Expects a direct ModelScope path: "qwen/Qwen2.5-72B-Instruct"
104132 """
105- if not MODELSCOPE_AVAILABLE :
133+ if not _check_modelscope () :
106134 logger .debug (f"modelscope not available, using fallback for { model_name } " )
107135 return self ._estimate_from_chars (text )
108136
109137 try :
138+ from modelscope import AutoTokenizer as ModelScopeTokenizer
139+
110140 # Use model_name directly as ModelScope path
111141 ms_model = model_name
112142
0 commit comments