Skip to content

Commit 4db148a

Browse files
committed
fix false timeout llm chunk
1 parent 37fff3d commit 4db148a

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

src/inferencesh/models/llm.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,7 @@ def _generate_worker():
650650
last_activity = time.time()
651651
init_timeout = 30.0 # 30 seconds for initial response
652652
chunk_timeout = 10.0 # 10 seconds between chunks
653+
chunks_begun = False
653654

654655
try:
655656
# Wait for initial setup
@@ -680,7 +681,7 @@ def _generate_worker():
680681
pass
681682

682683
# Check for timeout
683-
if time.time() - last_activity > chunk_timeout:
684+
if chunks_begun and time.time() - last_activity > chunk_timeout:
684685
raise RuntimeError(f"No response from model for {chunk_timeout} seconds")
685686

686687
# Get next chunk
@@ -705,6 +706,8 @@ def _generate_worker():
705706
if not timing.first_token_time:
706707
timing.mark_first_token()
707708

709+
chunks_begun = True
710+
708711
# Update response state from chunk
709712
response.update_from_chunk(chunk, timing)
710713

0 commit comments

Comments
 (0)