fix: normalize DeepSeek non-standard tool_calls format

Systematic debugging revealed DeepSeek returns tool_calls in non-standard format that bypasses LangChain's parse_tool_call(): **Root Cause:** - OpenAI standard: {function: {name, arguments}, id} - DeepSeek format: {name, args, id} - LangChain's parse_tool_call() returns None when no 'function' key - Result: Raw tool_call with string args → Pydantic validation error **Solution:** - ToolCallArgsParsingWrapper detects non-standard format - Normalizes to OpenAI standard before LangChain processing - Converts {name, args, id} → {function: {name, arguments}, id} - Added diagnostic logging to identify format variations **Impact:** - DeepSeek models now work via OpenRouter - No breaking changes to other providers (defensive design) - Diagnostic logs help debug future format issues Fixes validation errors: tool_calls.0.args: Input should be a valid dictionary [type=dict_type, input_value='{"symbol": "GILD", ...}', input_type=str]
2026-04-09 12:17:24 -04:00 · 2025-11-06 11:38:35 -05:00
parent 2d41717b2b
commit 7b35394ce7
4 changed files with 177 additions and 15 deletions
--- a/agent/chat_model_wrapper.py
+++ b/agent/chat_model_wrapper.py
@@ -1,24 +1,18 @@
 """
-Chat model wrapper - Passthrough wrapper for ChatOpenAI models.
+Chat model wrapper to fix tool_calls args parsing issues.

-Originally created to fix DeepSeek tool_calls arg parsing issues, but investigation
-revealed DeepSeek already returns the correct format (arguments as JSON strings).
-
-This wrapper is now a simple passthrough that proxies all calls to the underlying model.
-Kept for backward compatibility and potential future use.
+DeepSeek and other providers return tool_calls.args as JSON strings, which need
+to be parsed to dicts before AIMessage construction.
 """

-from typing import Any
+import json
+from typing import Any, Optional, Dict
+from functools import wraps


 class ToolCallArgsParsingWrapper:
    """
-    Passthrough wrapper around ChatOpenAI models.
-
-    After systematic debugging, determined that DeepSeek returns tool_calls.arguments
-    as JSON strings (correct format), so no parsing/conversion is needed.
-
-    This wrapper simply proxies all calls to the wrapped model.
+    Wrapper that adds diagnostic logging and fixes tool_calls args if needed.
    """

    def __init__(self, model: Any, **kwargs):
@@ -30,6 +24,92 @@ class ToolCallArgsParsingWrapper:
            **kwargs: Additional parameters (ignored, for compatibility)
        """
        self.wrapped_model = model
+        self._patch_model()
+
+    def _patch_model(self):
+        """Monkey-patch the model's _create_chat_result to add diagnostics"""
+        if not hasattr(self.wrapped_model, '_create_chat_result'):
+            # Model doesn't have this method (e.g., MockChatModel), skip patching
+            return
+
+        original_create_chat_result = self.wrapped_model._create_chat_result
+
+        @wraps(original_create_chat_result)
+        def patched_create_chat_result(response: Any, generation_info: Optional[Dict] = None):
+            """Patched version with diagnostic logging and args parsing"""
+            response_dict = response if isinstance(response, dict) else response.model_dump()
+
+            # DIAGNOSTIC: Log response structure for debugging
+            print(f"\n[DIAGNOSTIC] Response structure:")
+            print(f"  Response keys: {list(response_dict.keys())}")
+
+            if 'choices' in response_dict and response_dict['choices']:
+                choice = response_dict['choices'][0]
+                print(f"  Choice keys: {list(choice.keys())}")
+
+                if 'message' in choice:
+                    message = choice['message']
+                    print(f"  Message keys: {list(message.keys())}")
+
+                    if 'tool_calls' in message and message['tool_calls']:
+                        print(f"  tool_calls count: {len(message['tool_calls'])}")
+                        for i, tc in enumerate(message['tool_calls'][:2]):  # Show first 2
+                            print(f"  tool_calls[{i}] keys: {list(tc.keys())}")
+                            if 'function' in tc:
+                                print(f"    function keys: {list(tc['function'].keys())}")
+                                if 'arguments' in tc['function']:
+                                    args = tc['function']['arguments']
+                                    print(f"    arguments type: {type(args).__name__}")
+                                    print(f"    arguments value (first 100 chars): {str(args)[:100]}")
+
+            # Fix tool_calls: Normalize to OpenAI format if needed
+            if 'choices' in response_dict:
+                for choice in response_dict['choices']:
+                    if 'message' not in choice:
+                        continue
+
+                    message = choice['message']
+
+                    # Fix tool_calls: Ensure standard OpenAI format
+                    if 'tool_calls' in message and message['tool_calls']:
+                        print(f"[DIAGNOSTIC] Processing {len(message['tool_calls'])} tool_calls...")
+                        for idx, tool_call in enumerate(message['tool_calls']):
+                            # Check if this is non-standard format (has 'args' directly)
+                            if 'args' in tool_call and 'function' not in tool_call:
+                                print(f"[DIAGNOSTIC] tool_calls[{idx}] has non-standard format (direct args)")
+                                # Convert to standard OpenAI format
+                                args = tool_call['args']
+                                tool_call['function'] = {
+                                    'name': tool_call.get('name', ''),
+                                    'arguments': args if isinstance(args, str) else json.dumps(args)
+                                }
+                                # Remove non-standard fields
+                                if 'name' in tool_call:
+                                    del tool_call['name']
+                                if 'args' in tool_call:
+                                    del tool_call['args']
+                                print(f"[DIAGNOSTIC] Converted tool_calls[{idx}] to standard OpenAI format")
+
+                    # Fix invalid_tool_calls: dict args -> string
+                    if 'invalid_tool_calls' in message and message['invalid_tool_calls']:
+                        print(f"[DIAGNOSTIC] Checking invalid_tool_calls for dict-to-string conversion...")
+                        for idx, invalid_call in enumerate(message['invalid_tool_calls']):
+                            if 'args' in invalid_call:
+                                args = invalid_call['args']
+                                # Convert dict arguments to JSON string
+                                if isinstance(args, dict):
+                                    try:
+                                        invalid_call['args'] = json.dumps(args)
+                                        print(f"[DIAGNOSTIC] Converted invalid_tool_calls[{idx}].args from dict to string")
+                                    except (TypeError, ValueError) as e:
+                                        print(f"[DIAGNOSTIC] Failed to serialize invalid_tool_calls[{idx}].args: {e}")
+                                        # Keep as-is if serialization fails
+
+            # Call original method with fixed response
+            return original_create_chat_result(response_dict, generation_info)
+
+        # Replace the method
+        self.wrapped_model._create_chat_result = patched_create_chat_result

    @property
    def _llm_type(self) -> str: