Compare commits

..

4 Commits

Author SHA1 Message Date
d199b093c1 debug: patch parse_tool_call to identify source of string args
Added global monkey-patch of langchain_core's parse_tool_call to log
the type of 'args' it returns. This will definitively show whether:
1. parse_tool_call is returning string args (bug in langchain_core)
2. Something else is modifying the result after parse_tool_call returns
3. AIMessage construction is getting tool_calls from a different source

This is the critical diagnostic to find the root cause.
2025-11-06 17:42:33 -05:00
483621f9b7 debug: add comprehensive diagnostics to trace error location
Adding detailed logging to:
1. Show call stack when _create_chat_result is called
2. Verify our wrapper is being executed
3. Check result after _convert_dict_to_message processes tool_calls
4. Identify exact point where string args become the problem

This will help determine if error occurs during response processing
or if there's a separate code path bypassing our wrapper.
2025-11-06 12:10:29 -05:00
e8939be04e debug: enhance diagnostic logging to detect args field in tool_calls
Added more detailed logging to identify if DeepSeek responses include
both 'function.arguments' and 'args' fields, or if tool_calls are
objects vs dicts, to understand why parse_tool_call isn't converting
string args to dict as expected.
2025-11-06 12:00:08 -05:00
2e0cf4d507 docs: add v0.5.0 roadmap for performance metrics and status APIs
Added new pre-v1.0 release (v0.5.0) with two new API endpoints:

1. Performance Metrics API (GET /metrics/performance)
   - Query model performance over custom date ranges
   - Returns total return, trade count, win rate, daily P&L stats
   - Enables model comparison and strategy evaluation

2. Status & Coverage Endpoint (GET /status)
   - Comprehensive system status in single endpoint
   - Price data coverage (symbols, date ranges, gaps)
   - Model simulation progress (date ranges, completion %)
   - System health (database, MCP services, disk usage)

Updated version history:
- Added v0.4.0 (current release)
- Added v0.5.0 (planned)
- Renamed v1.3.0 to "Advanced performance metrics"

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-06 11:41:21 -05:00
2 changed files with 60 additions and 14 deletions

View File

@@ -679,11 +679,13 @@ To propose a new feature:
- **v0.1.0** - Initial release with batch execution
- **v0.2.0** - Docker deployment support
- **v0.3.0** - REST API, on-demand downloads, database storage (current)
- **v0.3.0** - REST API, on-demand downloads, database storage
- **v0.4.0** - Daily P&L calculation, day-centric results API, reasoning summaries (current)
- **v0.5.0** - Performance metrics & status APIs (planned)
- **v1.0.0** - Production stability & validation (planned)
- **v1.1.0** - API authentication & security (planned)
- **v1.2.0** - Position history & analytics (planned)
- **v1.3.0** - Performance metrics & analytics (planned)
- **v1.3.0** - Advanced performance metrics & analytics (planned)
- **v1.4.0** - Data management API (planned)
- **v1.5.0** - Web dashboard UI (planned)
- **v1.6.0** - Advanced configuration & customization (planned)
@@ -691,4 +693,4 @@ To propose a new feature:
---
Last updated: 2025-11-01
Last updated: 2025-11-06

View File

@@ -32,14 +32,37 @@ class ToolCallArgsParsingWrapper:
# Model doesn't have this method (e.g., MockChatModel), skip patching
return
# CRITICAL: Also patch parse_tool_call to see what it's returning
from langchain_core.output_parsers import openai_tools
original_parse_tool_call = openai_tools.parse_tool_call
def patched_parse_tool_call(raw_tool_call, *, partial=False, strict=False, return_id=True):
"""Patched parse_tool_call to log what it returns"""
result = original_parse_tool_call(raw_tool_call, partial=partial, strict=strict, return_id=return_id)
if result:
args_type = type(result.get('args', None)).__name__
print(f"[DIAGNOSTIC] parse_tool_call returned: args type = {args_type}")
if args_type == 'str':
print(f"[DIAGNOSTIC] ⚠️ BUG FOUND! parse_tool_call returned STRING args: {result['args']}")
return result
# Replace globally
openai_tools.parse_tool_call = patched_parse_tool_call
original_create_chat_result = self.wrapped_model._create_chat_result
@wraps(original_create_chat_result)
def patched_create_chat_result(response: Any, generation_info: Optional[Dict] = None):
"""Patched version with diagnostic logging and args parsing"""
import traceback
response_dict = response if isinstance(response, dict) else response.model_dump()
# DIAGNOSTIC: Log response structure for debugging
print(f"\n[DIAGNOSTIC] _create_chat_result called")
print(f" Response type: {type(response)}")
print(f" Call stack:")
for line in traceback.format_stack()[-5:-1]: # Show last 4 stack frames
print(f" {line.strip()}")
print(f"\n[DIAGNOSTIC] Response structure:")
print(f" Response keys: {list(response_dict.keys())}")
@@ -51,16 +74,26 @@ class ToolCallArgsParsingWrapper:
message = choice['message']
print(f" Message keys: {list(message.keys())}")
if 'tool_calls' in message and message['tool_calls']:
print(f" tool_calls count: {len(message['tool_calls'])}")
for i, tc in enumerate(message['tool_calls'][:2]): # Show first 2
print(f" tool_calls[{i}] keys: {list(tc.keys())}")
if 'function' in tc:
print(f" function keys: {list(tc['function'].keys())}")
if 'arguments' in tc['function']:
args = tc['function']['arguments']
print(f" arguments type: {type(args).__name__}")
print(f" arguments value (first 100 chars): {str(args)[:100]}")
# Check for raw tool_calls in message (before parse_tool_call processing)
if 'tool_calls' in message:
tool_calls_value = message['tool_calls']
print(f" message['tool_calls'] type: {type(tool_calls_value)}")
if tool_calls_value:
print(f" tool_calls count: {len(tool_calls_value)}")
for i, tc in enumerate(tool_calls_value): # Show ALL
print(f" tool_calls[{i}] type: {type(tc)}")
print(f" tool_calls[{i}] keys: {list(tc.keys()) if isinstance(tc, dict) else 'N/A'}")
if isinstance(tc, dict):
if 'function' in tc:
print(f" function keys: {list(tc['function'].keys())}")
if 'arguments' in tc['function']:
args = tc['function']['arguments']
print(f" function.arguments type: {type(args).__name__}")
print(f" function.arguments value: {str(args)[:100]}")
if 'args' in tc:
print(f" ALSO HAS 'args' KEY: type={type(tc['args']).__name__}")
print(f" args value: {str(tc['args'])[:100]}")
# Fix tool_calls: Normalize to OpenAI format if needed
if 'choices' in response_dict:
@@ -106,7 +139,18 @@ class ToolCallArgsParsingWrapper:
# Keep as-is if serialization fails
# Call original method with fixed response
return original_create_chat_result(response_dict, generation_info)
print(f"[DIAGNOSTIC] Calling original_create_chat_result...")
result = original_create_chat_result(response_dict, generation_info)
print(f"[DIAGNOSTIC] original_create_chat_result returned successfully")
print(f"[DIAGNOSTIC] Result type: {type(result)}")
if hasattr(result, 'generations') and result.generations:
gen = result.generations[0]
if hasattr(gen, 'message') and hasattr(gen.message, 'tool_calls'):
print(f"[DIAGNOSTIC] Result has {len(gen.message.tool_calls)} tool_calls")
if gen.message.tool_calls:
tc = gen.message.tool_calls[0]
print(f"[DIAGNOSTIC] tool_calls[0]['args'] type in result: {type(tc['args'])}")
return result
# Replace the method
self.wrapped_model._create_chat_result = patched_create_chat_result