fix: resolve DeepSeek tool_calls args parsing validation error

Added ToolCallArgsParsingWrapper to handle AI providers (like DeepSeek) that return tool_calls.args as JSON strings instead of dictionaries. The wrapper monkey-patches ChatOpenAI's _create_chat_result method to parse string arguments before AIMessage construction, preventing Pydantic validation errors. Changes: - New: agent/chat_model_wrapper.py - Wrapper implementation - Modified: agent/base_agent/base_agent.py - Wrap model during init - Modified: CHANGELOG.md - Document fix as v0.4.1 - New: tests/unit/test_chat_model_wrapper.py - Unit tests Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-14 21:31:18 -04:00 · 2025-11-05 20:57:17 -05:00
parent e20dce7432
commit 3e50868a4d
4 changed files with 371 additions and 4 deletions
@@ -33,6 +33,7 @@ from tools.deployment_config import (
 from agent.context_injector import ContextInjector
 from agent.pnl_calculator import DailyPnLCalculator
 from agent.reasoning_summarizer import ReasoningSummarizer
+from agent.chat_model_wrapper import ToolCallArgsParsingWrapper

 # Load environment variables
 load_dotenv()
@@ -208,10 +209,10 @@ class BaseAgent:
            # Create AI model (mock in DEV mode, real in PROD mode)
            if is_dev_mode():
                from agent.mock_provider import MockChatModel
-                self.model = MockChatModel(date="2025-01-01")  # Date will be updated per session
+                base_model = MockChatModel(date="2025-01-01")  # Date will be updated per session
                print(f"🤖 Using MockChatModel (DEV mode)")
            else:
-                self.model = ChatOpenAI(
+                base_model = ChatOpenAI(
                    model=self.basemodel,
                    base_url=self.openai_base_url,
                    api_key=self.openai_api_key,
@@ -219,6 +220,10 @@ class BaseAgent:
                    timeout=30
                )
                print(f"🤖 Using {self.basemodel} (PROD mode)")
+
+            # Wrap model to fix tool_calls args parsing
+            self.model = ToolCallArgsParsingWrapper(model=base_model)
+            print(f"✅ Applied tool_calls args parsing wrapper")
        except Exception as e:
            raise RuntimeError(f"❌ Failed to initialize AI model: {e}")

@@ -541,7 +546,7 @@ Summary:"""

        # Update mock model date if in dev mode
        if is_dev_mode():
-            self.model.date = today_date
+            self.model.wrapped_model.date = today_date

        # Get job_id from context injector
        job_id = self.context_injector.job_id if self.context_injector else get_config_value("JOB_ID")
@@ -0,0 +1,98 @@
+"""
+Chat model wrapper to fix tool_calls args parsing issues.
+
+Some AI providers (like DeepSeek) return tool_calls.args as JSON strings instead
+of dictionaries, causing Pydantic validation errors. This wrapper monkey-patches
+the model to fix args before AIMessage construction.
+"""
+
+import json
+from typing import Any, List, Optional, Dict
+from functools import wraps
+from langchain_core.messages import AIMessage, BaseMessage
+
+
+class ToolCallArgsParsingWrapper:
+    """
+    Wrapper around ChatOpenAI that fixes tool_calls args parsing.
+
+    This fixes the Pydantic validation error:
+    "Input should be a valid dictionary [type=dict_type, input_value='...', input_type=str]"
+
+    Works by monkey-patching _create_chat_result to parse string args before
+    AIMessage construction.
+    """
+
+    def __init__(self, model: Any, **kwargs):
+        """
+        Initialize wrapper around a chat model.
+
+        Args:
+            model: The chat model to wrap (should be ChatOpenAI instance)
+            **kwargs: Additional parameters (ignored, for compatibility)
+        """
+        self.wrapped_model = model
+        self._patch_model()
+
+    def _patch_model(self):
+        """Monkey-patch the model's _create_chat_result to fix tool_calls args"""
+        if not hasattr(self.wrapped_model, '_create_chat_result'):
+            # Model doesn't have this method (e.g., MockChatModel), skip patching
+            return
+
+        original_create_chat_result = self.wrapped_model._create_chat_result
+
+        @wraps(original_create_chat_result)
+        def patched_create_chat_result(response: Any, generation_info: Optional[Dict] = None):
+            """Patched version that fixes tool_calls args before AIMessage construction"""
+            # Fix tool_calls in the response dict before passing to original method
+            response_dict = response if isinstance(response, dict) else response.model_dump()
+
+            if 'choices' in response_dict:
+                for choice in response_dict['choices']:
+                    if 'message' in choice and 'tool_calls' in choice['message']:
+                        tool_calls = choice['message']['tool_calls']
+                        if tool_calls:
+                            for tool_call in tool_calls:
+                                if 'function' in tool_call and 'arguments' in tool_call['function']:
+                                    args = tool_call['function']['arguments']
+                                    # Parse string arguments to dict
+                                    if isinstance(args, str):
+                                        try:
+                                            tool_call['function']['arguments'] = json.loads(args)
+                                        except json.JSONDecodeError:
+                                            # Keep as string if parsing fails
+                                            pass
+
+            # Call original method with fixed response
+            return original_create_chat_result(response_dict, generation_info)
+
+        # Replace the method
+        self.wrapped_model._create_chat_result = patched_create_chat_result
+
+    @property
+    def _llm_type(self) -> str:
+        """Return identifier for this LLM type"""
+        if hasattr(self.wrapped_model, '_llm_type'):
+            return f"wrapped-{self.wrapped_model._llm_type}"
+        return "wrapped-chat-model"
+
+    def __getattr__(self, name: str):
+        """Proxy all other attributes/methods to the wrapped model"""
+        return getattr(self.wrapped_model, name)
+
+    def bind_tools(self, tools: Any, **kwargs):
+        """
+        Bind tools to the wrapped model.
+
+        Since we patch the model in-place, we can just delegate to the wrapped model.
+        """
+        return self.wrapped_model.bind_tools(tools, **kwargs)
+
+    def bind(self, **kwargs):
+        """
+        Bind settings to the wrapped model.
+
+        Since we patch the model in-place, we can just delegate to the wrapped model.
+        """
+        return self.wrapped_model.bind(**kwargs)