fix stats

okaris · okaris · commit ed920b3f1fd7 · 2025-06-26T17:53:56.000Z
diff --git a/src/inferencesh/models/llm.py b/src/inferencesh/models/llm.py
@@ -236,16 +236,22 @@ def __init__(self):
 
     def update_from_chunk(self, chunk: Dict[str, Any], timing: Any) -> None:
         """Update response state from a chunk."""
+        print("DEBUG: Entering update_from_chunk")
+        print(f"DEBUG: Current usage stats: {self.usage_stats}")
+        print(f"DEBUG: Chunk: {chunk}")
+        
         # Update usage stats if present
         if "usage" in chunk:
             usage = chunk["usage"]
             if usage is not None:
+                print(f"DEBUG: Updating usage stats with: {usage}")
                 # Update usage stats preserving existing values if not provided
                 self.usage_stats.update({
                     "prompt_tokens": usage.get("prompt_tokens", self.usage_stats["prompt_tokens"]),
                     "completion_tokens": usage.get("completion_tokens", self.usage_stats["completion_tokens"]),
                     "total_tokens": usage.get("total_tokens", self.usage_stats["total_tokens"])
                 })
+                print(f"DEBUG: Updated usage stats: {self.usage_stats}")
         
         # Get the delta from the chunk
         delta = chunk.get("choices", [{}])[0]
@@ -284,6 +290,8 @@ def update_from_chunk(self, chunk: Dict[str, Any], timing: Any) -> None:
             self.timing_stats["tokens_per_second"] = (
                 self.usage_stats["completion_tokens"] / timing_stats["generation_time"]
             )
+        
+        print(f"DEBUG: Final usage stats in update_from_chunk: {self.usage_stats}")
     
     def _update_tool_calls(self, new_tool_calls: List[Dict[str, Any]]) -> None:
         """Update tool calls, handling both full and partial updates."""
@@ -315,17 +323,23 @@ def _update_tool_calls(self, new_tool_calls: List[Dict[str, Any]]) -> None:
     
     def has_updates(self) -> bool:
         """Check if this response has any content or tool call updates."""
-        return bool(self.content) or bool(self.tool_calls)
+        has_updates = bool(self.content) or bool(self.tool_calls)
+        print(f"DEBUG: has_updates: {has_updates}, content: {bool(self.content)}, tool_calls: {bool(self.tool_calls)}")
+        return has_updates
     
     def to_output(self, buffer: str, transformer: Any) -> LLMOutput:
         """Convert current state to LLMOutput."""
+        print("DEBUG: Entering to_output")
+        print(f"DEBUG: Usage stats before conversion: {self.usage_stats}")
+        
         buffer, output, _ = transformer(self.content, buffer)
         
         # Add tool calls if present
         if self.tool_calls:
             output.tool_calls = self.tool_calls
             
         # Add usage stats
+        print(f"DEBUG: Creating LLMUsage with stats: {self.usage_stats}")
         output.usage = LLMUsage(
             stop_reason=self.usage_stats["stop_reason"],
             time_to_first_token=self.timing_stats["time_to_first_token"] or 0.0,
@@ -336,6 +350,7 @@ def to_output(self, buffer: str, transformer: Any) -> LLMOutput:
             reasoning_time=self.timing_stats["reasoning_time"],
             reasoning_tokens=self.timing_stats["reasoning_tokens"]
         )
+        print(f"DEBUG: Created output usage: {output.usage}")
             
         return output, buffer