Skip to content

Commit ed920b3

Browse files
committed
fix stats
1 parent c3c6d45 commit ed920b3

File tree

1 file changed

+16
-1
lines changed

1 file changed

+16
-1
lines changed

src/inferencesh/models/llm.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,16 +236,22 @@ def __init__(self):
236236

237237
def update_from_chunk(self, chunk: Dict[str, Any], timing: Any) -> None:
238238
"""Update response state from a chunk."""
239+
print("DEBUG: Entering update_from_chunk")
240+
print(f"DEBUG: Current usage stats: {self.usage_stats}")
241+
print(f"DEBUG: Chunk: {chunk}")
242+
239243
# Update usage stats if present
240244
if "usage" in chunk:
241245
usage = chunk["usage"]
242246
if usage is not None:
247+
print(f"DEBUG: Updating usage stats with: {usage}")
243248
# Update usage stats preserving existing values if not provided
244249
self.usage_stats.update({
245250
"prompt_tokens": usage.get("prompt_tokens", self.usage_stats["prompt_tokens"]),
246251
"completion_tokens": usage.get("completion_tokens", self.usage_stats["completion_tokens"]),
247252
"total_tokens": usage.get("total_tokens", self.usage_stats["total_tokens"])
248253
})
254+
print(f"DEBUG: Updated usage stats: {self.usage_stats}")
249255

250256
# Get the delta from the chunk
251257
delta = chunk.get("choices", [{}])[0]
@@ -284,6 +290,8 @@ def update_from_chunk(self, chunk: Dict[str, Any], timing: Any) -> None:
284290
self.timing_stats["tokens_per_second"] = (
285291
self.usage_stats["completion_tokens"] / timing_stats["generation_time"]
286292
)
293+
294+
print(f"DEBUG: Final usage stats in update_from_chunk: {self.usage_stats}")
287295

288296
def _update_tool_calls(self, new_tool_calls: List[Dict[str, Any]]) -> None:
289297
"""Update tool calls, handling both full and partial updates."""
@@ -315,17 +323,23 @@ def _update_tool_calls(self, new_tool_calls: List[Dict[str, Any]]) -> None:
315323

316324
def has_updates(self) -> bool:
317325
"""Check if this response has any content or tool call updates."""
318-
return bool(self.content) or bool(self.tool_calls)
326+
has_updates = bool(self.content) or bool(self.tool_calls)
327+
print(f"DEBUG: has_updates: {has_updates}, content: {bool(self.content)}, tool_calls: {bool(self.tool_calls)}")
328+
return has_updates
319329

320330
def to_output(self, buffer: str, transformer: Any) -> LLMOutput:
321331
"""Convert current state to LLMOutput."""
332+
print("DEBUG: Entering to_output")
333+
print(f"DEBUG: Usage stats before conversion: {self.usage_stats}")
334+
322335
buffer, output, _ = transformer(self.content, buffer)
323336

324337
# Add tool calls if present
325338
if self.tool_calls:
326339
output.tool_calls = self.tool_calls
327340

328341
# Add usage stats
342+
print(f"DEBUG: Creating LLMUsage with stats: {self.usage_stats}")
329343
output.usage = LLMUsage(
330344
stop_reason=self.usage_stats["stop_reason"],
331345
time_to_first_token=self.timing_stats["time_to_first_token"] or 0.0,
@@ -336,6 +350,7 @@ def to_output(self, buffer: str, transformer: Any) -> LLMOutput:
336350
reasoning_time=self.timing_stats["reasoning_time"],
337351
reasoning_tokens=self.timing_stats["reasoning_tokens"]
338352
)
353+
print(f"DEBUG: Created output usage: {output.usage}")
339354

340355
return output, buffer
341356

0 commit comments

Comments
 (0)