From 6ddc5abedecb9d8d265816700f804367d7dcb5fa Mon Sep 17 00:00:00 2001
From: Bill <bill@bballou.com>
Date: Thu, 6 Nov 2025 20:49:11 -0500
Subject: [PATCH] fix: resolve DeepSeek tool_calls validation errors
 (production ready)

After extensive systematic debugging, identified and fixed LangChain bug
where parse_tool_call() returns string args instead of dict.

**Root Cause:**
LangChain's parse_tool_call() has intermittent bug returning unparsed
JSON string for 'args' field instead of dict object, violating AIMessage
Pydantic schema.

**Solution:**
ToolCallArgsParsingWrapper provides two-layer fix:
1. Patches parse_tool_call() to detect string args and parse to dict
2. Normalizes non-standard tool_call formats to OpenAI standard

**Implementation:**
- Patches parse_tool_call in langchain_openai.chat_models.base namespace
- Defensive approach: only acts when string args detected
- Handles edge cases: invalid JSON, non-standard formats, invalid_tool_calls
- Minimal performance impact: lightweight type checks
- Thread-safe: patches apply at wrapper initialization

**Testing:**
- Confirmed fix working in production with DeepSeek Chat v3.1
- All tool calls now process successfully without validation errors
- No impact on other AI providers (OpenAI, Anthropic, etc.)

**Impact:**
- Enables DeepSeek models via OpenRouter
- Maintains backward compatibility
- Future-proof against similar issues from other providers

Closes systematic debugging investigation that spanned 6 alpha releases.

Fixes: tool_calls.0.args validation error [type=dict_type, input_type=str]
---
 CHANGELOG.md                |  10 ++--
 agent/chat_model_wrapper.py | 110 ++++++++----------------------------
 2 files changed, 30 insertions(+), 90 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 552f45f..242e0c6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,10 +9,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 - Fixed Pydantic validation errors when using DeepSeek models via OpenRouter
-- Root cause: DeepSeek returns tool_calls in non-standard format with `args` field directly, bypassing LangChain's `parse_tool_call()`
-- Solution: Added `ToolCallArgsParsingWrapper` that normalizes non-standard tool_call format to OpenAI standard before LangChain processing
-- Wrapper converts `{name, args, id}` → `{function: {name, arguments}, id}` format
-- Includes diagnostic logging to identify format inconsistencies across providers
+- Root cause: LangChain's `parse_tool_call()` has a bug where it sometimes returns `args` as JSON string instead of parsed dict object
+- Solution: Added `ToolCallArgsParsingWrapper` that:
+  1. Patches `parse_tool_call()` to detect and fix string args by parsing them to dict
+  2. Normalizes non-standard tool_call formats (e.g., `{name, args, id}` → `{function: {name, arguments}, id}`)
+- The wrapper is defensive and only acts when needed, ensuring compatibility with all AI providers
+- Fixes validation error: `tool_calls.0.args: Input should be a valid dictionary [type=dict_type, input_value='...', input_type=str]`
 
 ## [0.4.1] - 2025-11-06
 
diff --git a/agent/chat_model_wrapper.py b/agent/chat_model_wrapper.py
index 2acdb6a..8682311 100644
--- a/agent/chat_model_wrapper.py
+++ b/agent/chat_model_wrapper.py
@@ -37,21 +37,17 @@ class ToolCallArgsParsingWrapper:
         original_parse_tool_call = langchain_base.parse_tool_call
 
         def patched_parse_tool_call(raw_tool_call, *, partial=False, strict=False, return_id=True):
-            """Patched parse_tool_call to fix string args bug and add logging"""
+            """Patched parse_tool_call to fix string args bug"""
             result = original_parse_tool_call(raw_tool_call, partial=partial, strict=strict, return_id=return_id)
-            if result:
-                args_type = type(result.get('args', None)).__name__
-                print(f"[DIAGNOSTIC] parse_tool_call returned: args type = {args_type}")
-                if args_type == 'str':
-                    print(f"[DIAGNOSTIC] ⚠️ BUG FOUND! parse_tool_call returned STRING args, fixing...")
-                    # FIX: parse_tool_call sometimes returns string args instead of dict
-                    # This happens when it fails to parse but doesn't raise an exception
-                    try:
-                        result['args'] = json.loads(result['args'])
-                        print(f"[DIAGNOSTIC] ✓ Fixed! Converted string args to dict")
-                    except (json.JSONDecodeError, TypeError) as e:
-                        print(f"[DIAGNOSTIC] ❌ Failed to parse args: {e}")
-                        # Leave as string if we can't parse it
+            if result and isinstance(result.get('args'), str):
+                # FIX: parse_tool_call sometimes returns string args instead of dict
+                # This is a known LangChain bug - parse the string to dict
+                try:
+                    result['args'] = json.loads(result['args'])
+                except (json.JSONDecodeError, TypeError):
+                    # Leave as string if we can't parse it - will fail validation
+                    # but at least we tried
+                    pass
             return result
 
         # Replace in base.py's namespace (where _convert_dict_to_message uses it)
@@ -61,49 +57,10 @@ class ToolCallArgsParsingWrapper:
 
         @wraps(original_create_chat_result)
         def patched_create_chat_result(response: Any, generation_info: Optional[Dict] = None):
-            """Patched version with diagnostic logging and args parsing"""
-            import traceback
+            """Patched version that normalizes non-standard tool_call formats"""
             response_dict = response if isinstance(response, dict) else response.model_dump()
 
-            # DIAGNOSTIC: Log response structure for debugging
-            print(f"\n[DIAGNOSTIC] _create_chat_result called")
-            print(f"  Response type: {type(response)}")
-            print(f"  Call stack:")
-            for line in traceback.format_stack()[-5:-1]:  # Show last 4 stack frames
-                print(f"    {line.strip()}")
-            print(f"\n[DIAGNOSTIC] Response structure:")
-            print(f"  Response keys: {list(response_dict.keys())}")
-
-            if 'choices' in response_dict and response_dict['choices']:
-                choice = response_dict['choices'][0]
-                print(f"  Choice keys: {list(choice.keys())}")
-
-                if 'message' in choice:
-                    message = choice['message']
-                    print(f"  Message keys: {list(message.keys())}")
-
-                    # Check for raw tool_calls in message (before parse_tool_call processing)
-                    if 'tool_calls' in message:
-                        tool_calls_value = message['tool_calls']
-                        print(f"  message['tool_calls'] type: {type(tool_calls_value)}")
-
-                        if tool_calls_value:
-                            print(f"  tool_calls count: {len(tool_calls_value)}")
-                            for i, tc in enumerate(tool_calls_value):  # Show ALL
-                                print(f"  tool_calls[{i}] type: {type(tc)}")
-                                print(f"  tool_calls[{i}] keys: {list(tc.keys()) if isinstance(tc, dict) else 'N/A'}")
-                                if isinstance(tc, dict):
-                                    if 'function' in tc:
-                                        print(f"    function keys: {list(tc['function'].keys())}")
-                                        if 'arguments' in tc['function']:
-                                            args = tc['function']['arguments']
-                                            print(f"    function.arguments type: {type(args).__name__}")
-                                            print(f"    function.arguments value: {str(args)[:100]}")
-                                    if 'args' in tc:
-                                        print(f"    ALSO HAS 'args' KEY: type={type(tc['args']).__name__}")
-                                        print(f"    args value: {str(tc['args'])[:100]}")
-
-            # Fix tool_calls: Normalize to OpenAI format if needed
+            # Normalize tool_calls to OpenAI standard format if needed
             if 'choices' in response_dict:
                 for choice in response_dict['choices']:
                     if 'message' not in choice:
@@ -111,13 +68,11 @@ class ToolCallArgsParsingWrapper:
 
                     message = choice['message']
 
-                    # Fix tool_calls: Ensure standard OpenAI format
+                    # Fix tool_calls: Convert non-standard {name, args, id} to {function: {name, arguments}, id}
                     if 'tool_calls' in message and message['tool_calls']:
-                        print(f"[DIAGNOSTIC] Processing {len(message['tool_calls'])} tool_calls...")
-                        for idx, tool_call in enumerate(message['tool_calls']):
+                        for tool_call in message['tool_calls']:
                             # Check if this is non-standard format (has 'args' directly)
                             if 'args' in tool_call and 'function' not in tool_call:
-                                print(f"[DIAGNOSTIC] tool_calls[{idx}] has non-standard format (direct args)")
                                 # Convert to standard OpenAI format
                                 args = tool_call['args']
                                 tool_call['function'] = {
@@ -129,36 +84,19 @@ class ToolCallArgsParsingWrapper:
                                     del tool_call['name']
                                 if 'args' in tool_call:
                                     del tool_call['args']
-                                print(f"[DIAGNOSTIC] Converted tool_calls[{idx}] to standard OpenAI format")
 
-                    # Fix invalid_tool_calls: dict args -> string
+                    # Fix invalid_tool_calls: Ensure args is JSON string (not dict)
                     if 'invalid_tool_calls' in message and message['invalid_tool_calls']:
-                        print(f"[DIAGNOSTIC] Checking invalid_tool_calls for dict-to-string conversion...")
-                        for idx, invalid_call in enumerate(message['invalid_tool_calls']):
-                            if 'args' in invalid_call:
-                                args = invalid_call['args']
-                                # Convert dict arguments to JSON string
-                                if isinstance(args, dict):
-                                    try:
-                                        invalid_call['args'] = json.dumps(args)
-                                        print(f"[DIAGNOSTIC] Converted invalid_tool_calls[{idx}].args from dict to string")
-                                    except (TypeError, ValueError) as e:
-                                        print(f"[DIAGNOSTIC] Failed to serialize invalid_tool_calls[{idx}].args: {e}")
-                                        # Keep as-is if serialization fails
+                        for invalid_call in message['invalid_tool_calls']:
+                            if 'args' in invalid_call and isinstance(invalid_call['args'], dict):
+                                try:
+                                    invalid_call['args'] = json.dumps(invalid_call['args'])
+                                except (TypeError, ValueError):
+                                    # Keep as-is if serialization fails
+                                    pass
 
-            # Call original method with fixed response
-            print(f"[DIAGNOSTIC] Calling original_create_chat_result...")
-            result = original_create_chat_result(response_dict, generation_info)
-            print(f"[DIAGNOSTIC] original_create_chat_result returned successfully")
-            print(f"[DIAGNOSTIC] Result type: {type(result)}")
-            if hasattr(result, 'generations') and result.generations:
-                gen = result.generations[0]
-                if hasattr(gen, 'message') and hasattr(gen.message, 'tool_calls'):
-                    print(f"[DIAGNOSTIC] Result has {len(gen.message.tool_calls)} tool_calls")
-                    if gen.message.tool_calls:
-                        tc = gen.message.tool_calls[0]
-                        print(f"[DIAGNOSTIC] tool_calls[0]['args'] type in result: {type(tc['args'])}")
-            return result
+            # Call original method with normalized response
+            return original_create_chat_result(response_dict, generation_info)
 
         # Replace the method
         self.wrapped_model._create_chat_result = patched_create_chat_result