Skip to content

Commit e3489ae

Browse files
committed
refactor(utils): lazy load optional dependencies- Implement lazy loading for tiktoken and modelscope libraries
1 parent 03ebfb7 commit e3489ae

File tree

1 file changed

+43
-13
lines changed

1 file changed

+43
-13
lines changed

flowllm/utils/token_utils.py

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,46 @@
1+
from typing import TYPE_CHECKING, Any
2+
13
from loguru import logger
24

3-
try:
5+
if TYPE_CHECKING:
46
import tiktoken
7+
from modelscope import AutoTokenizer as ModelScopeTokenizer
58

6-
TIKTOKEN_AVAILABLE = True
7-
except ImportError:
8-
TIKTOKEN_AVAILABLE = False
9-
logger.exception("tiktoken not installed. OpenAI models will use character-based estimation.")
9+
# Lazy import flags
10+
_TIKTOKEN_CHECKED = False
11+
_TIKTOKEN_AVAILABLE = False
12+
_MODELSCOPE_CHECKED = False
13+
_MODELSCOPE_AVAILABLE = False
1014

11-
try:
12-
from modelscope import AutoTokenizer as ModelScopeTokenizer
1315

14-
MODELSCOPE_AVAILABLE = True
15-
except ImportError as e:
16-
MODELSCOPE_AVAILABLE = False
17-
logger.exception("modelscope not installed. Qwen models will use character-based estimation.")
16+
def _check_tiktoken() -> bool:
17+
"""Lazy check for tiktoken availability."""
18+
global _TIKTOKEN_CHECKED, _TIKTOKEN_AVAILABLE
19+
if not _TIKTOKEN_CHECKED:
20+
try:
21+
import tiktoken # noqa: F401
22+
23+
_TIKTOKEN_AVAILABLE = True
24+
except ImportError:
25+
_TIKTOKEN_AVAILABLE = False
26+
logger.debug("tiktoken not installed. OpenAI models will use character-based estimation.")
27+
_TIKTOKEN_CHECKED = True
28+
return _TIKTOKEN_AVAILABLE
29+
30+
31+
def _check_modelscope() -> bool:
32+
"""Lazy check for modelscope availability."""
33+
global _MODELSCOPE_CHECKED, _MODELSCOPE_AVAILABLE
34+
if not _MODELSCOPE_CHECKED:
35+
try:
36+
from modelscope import AutoTokenizer # noqa: F401
37+
38+
_MODELSCOPE_AVAILABLE = True
39+
except ImportError:
40+
_MODELSCOPE_AVAILABLE = False
41+
logger.debug("modelscope not installed. Qwen models will use character-based estimation.")
42+
_MODELSCOPE_CHECKED = True
43+
return _MODELSCOPE_AVAILABLE
1844

1945

2046
class TokenCounter:
@@ -72,11 +98,13 @@ def count(self, text: str, model_name: str = None) -> int:
7298

7399
def _count_with_tiktoken(self, text: str, model_name: str) -> int:
74100
"""Count tokens using tiktoken (for GPT models)."""
75-
if not TIKTOKEN_AVAILABLE:
101+
if not _check_tiktoken():
76102
logger.debug(f"tiktoken not available, using fallback for {model_name}")
77103
return self._estimate_from_chars(text)
78104

79105
try:
106+
import tiktoken
107+
80108
# Get encoding for the model
81109
if model_name.startswith("gpt-4o"):
82110
encoding_name = "o200k_base"
@@ -102,11 +130,13 @@ def _count_with_qwen_tokenizer(self, text: str, model_name: str) -> int:
102130
103131
Expects a direct ModelScope path: "qwen/Qwen2.5-72B-Instruct"
104132
"""
105-
if not MODELSCOPE_AVAILABLE:
133+
if not _check_modelscope():
106134
logger.debug(f"modelscope not available, using fallback for {model_name}")
107135
return self._estimate_from_chars(text)
108136

109137
try:
138+
from modelscope import AutoTokenizer as ModelScopeTokenizer
139+
110140
# Use model_name directly as ModelScope path
111141
ms_model = model_name
112142

0 commit comments

Comments
 (0)