fix: resolve DeepSeek tool_calls args parsing validation error

Added ToolCallArgsParsingWrapper to handle AI providers (like DeepSeek)
that return tool_calls.args as JSON strings instead of dictionaries.

The wrapper monkey-patches ChatOpenAI's _create_chat_result method to
parse string arguments before AIMessage construction, preventing
Pydantic validation errors.

Changes:
- New: agent/chat_model_wrapper.py - Wrapper implementation
- Modified: agent/base_agent/base_agent.py - Wrap model during init
- Modified: CHANGELOG.md - Document fix as v0.4.1
- New: tests/unit/test_chat_model_wrapper.py - Unit tests

Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-05 20:57:17 -05:00
parent e20dce7432
commit 3e50868a4d
4 changed files with 371 additions and 4 deletions

View File

@@ -33,6 +33,7 @@ from tools.deployment_config import (
from agent.context_injector import ContextInjector
from agent.pnl_calculator import DailyPnLCalculator
from agent.reasoning_summarizer import ReasoningSummarizer
from agent.chat_model_wrapper import ToolCallArgsParsingWrapper
# Load environment variables
load_dotenv()
@@ -208,10 +209,10 @@ class BaseAgent:
# Create AI model (mock in DEV mode, real in PROD mode)
if is_dev_mode():
from agent.mock_provider import MockChatModel
self.model = MockChatModel(date="2025-01-01") # Date will be updated per session
base_model = MockChatModel(date="2025-01-01") # Date will be updated per session
print(f"🤖 Using MockChatModel (DEV mode)")
else:
self.model = ChatOpenAI(
base_model = ChatOpenAI(
model=self.basemodel,
base_url=self.openai_base_url,
api_key=self.openai_api_key,
@@ -219,6 +220,10 @@ class BaseAgent:
timeout=30
)
print(f"🤖 Using {self.basemodel} (PROD mode)")
# Wrap model to fix tool_calls args parsing
self.model = ToolCallArgsParsingWrapper(model=base_model)
print(f"✅ Applied tool_calls args parsing wrapper")
except Exception as e:
raise RuntimeError(f"❌ Failed to initialize AI model: {e}")
@@ -541,7 +546,7 @@ Summary:"""
# Update mock model date if in dev mode
if is_dev_mode():
self.model.date = today_date
self.model.wrapped_model.date = today_date
# Get job_id from context injector
job_id = self.context_injector.job_id if self.context_injector else get_config_value("JOB_ID")

View File

@@ -0,0 +1,98 @@
"""
Chat model wrapper to fix tool_calls args parsing issues.
Some AI providers (like DeepSeek) return tool_calls.args as JSON strings instead
of dictionaries, causing Pydantic validation errors. This wrapper monkey-patches
the model to fix args before AIMessage construction.
"""
import json
from typing import Any, List, Optional, Dict
from functools import wraps
from langchain_core.messages import AIMessage, BaseMessage
class ToolCallArgsParsingWrapper:
"""
Wrapper around ChatOpenAI that fixes tool_calls args parsing.
This fixes the Pydantic validation error:
"Input should be a valid dictionary [type=dict_type, input_value='...', input_type=str]"
Works by monkey-patching _create_chat_result to parse string args before
AIMessage construction.
"""
def __init__(self, model: Any, **kwargs):
"""
Initialize wrapper around a chat model.
Args:
model: The chat model to wrap (should be ChatOpenAI instance)
**kwargs: Additional parameters (ignored, for compatibility)
"""
self.wrapped_model = model
self._patch_model()
def _patch_model(self):
"""Monkey-patch the model's _create_chat_result to fix tool_calls args"""
if not hasattr(self.wrapped_model, '_create_chat_result'):
# Model doesn't have this method (e.g., MockChatModel), skip patching
return
original_create_chat_result = self.wrapped_model._create_chat_result
@wraps(original_create_chat_result)
def patched_create_chat_result(response: Any, generation_info: Optional[Dict] = None):
"""Patched version that fixes tool_calls args before AIMessage construction"""
# Fix tool_calls in the response dict before passing to original method
response_dict = response if isinstance(response, dict) else response.model_dump()
if 'choices' in response_dict:
for choice in response_dict['choices']:
if 'message' in choice and 'tool_calls' in choice['message']:
tool_calls = choice['message']['tool_calls']
if tool_calls:
for tool_call in tool_calls:
if 'function' in tool_call and 'arguments' in tool_call['function']:
args = tool_call['function']['arguments']
# Parse string arguments to dict
if isinstance(args, str):
try:
tool_call['function']['arguments'] = json.loads(args)
except json.JSONDecodeError:
# Keep as string if parsing fails
pass
# Call original method with fixed response
return original_create_chat_result(response_dict, generation_info)
# Replace the method
self.wrapped_model._create_chat_result = patched_create_chat_result
@property
def _llm_type(self) -> str:
"""Return identifier for this LLM type"""
if hasattr(self.wrapped_model, '_llm_type'):
return f"wrapped-{self.wrapped_model._llm_type}"
return "wrapped-chat-model"
def __getattr__(self, name: str):
"""Proxy all other attributes/methods to the wrapped model"""
return getattr(self.wrapped_model, name)
def bind_tools(self, tools: Any, **kwargs):
"""
Bind tools to the wrapped model.
Since we patch the model in-place, we can just delegate to the wrapped model.
"""
return self.wrapped_model.bind_tools(tools, **kwargs)
def bind(self, **kwargs):
"""
Bind settings to the wrapped model.
Since we patch the model in-place, we can just delegate to the wrapped model.
"""
return self.wrapped_model.bind(**kwargs)