diff --git a/CHANGELOG.md b/CHANGELOG.md index 19f129a..552f45f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed +- Fixed Pydantic validation errors when using DeepSeek models via OpenRouter +- Root cause: DeepSeek returns tool_calls in non-standard format with `args` field directly, bypassing LangChain's `parse_tool_call()` +- Solution: Added `ToolCallArgsParsingWrapper` that normalizes non-standard tool_call format to OpenAI standard before LangChain processing +- Wrapper converts `{name, args, id}` → `{function: {name, arguments}, id}` format +- Includes diagnostic logging to identify format inconsistencies across providers + ## [0.4.1] - 2025-11-06 ### Fixed diff --git a/ROADMAP.md b/ROADMAP.md index e24d02f..327c7e4 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -4,6 +4,78 @@ This document outlines planned features and improvements for the AI-Trader proje ## Release Planning +### v0.5.0 - Performance Metrics & Status APIs (Planned) + +**Focus:** Enhanced observability and performance tracking + +#### Performance Metrics API +- **Performance Summary Endpoint** - Query model performance over date ranges + - `GET /metrics/performance` - Aggregated performance metrics + - Query parameters: `model`, `start_date`, `end_date` + - Returns comprehensive performance summary: + - Total return (dollar amount and percentage) + - Number of trades executed (buy + sell) + - Win rate (profitable trading days / total trading days) + - Average daily P&L (profit and loss) + - Best/worst trading day (highest/lowest daily P&L) + - Final portfolio value (cash + holdings at market value) + - Number of trading days in queried range + - Starting vs. ending portfolio comparison + - Use cases: + - Compare model performance across different time periods + - Evaluate strategy effectiveness + - Identify top-performing models + - Example: `GET /metrics/performance?model=gpt-4&start_date=2025-01-01&end_date=2025-01-31` + - Filtering options: + - Single model or all models + - Custom date ranges + - Exclude incomplete trading days + - Response format: JSON with clear metric definitions + +#### Status & Coverage Endpoint +- **System Status Summary** - Data availability and simulation progress + - `GET /status` - Comprehensive system status + - Price data coverage section: + - Available symbols (NASDAQ 100 constituents) + - Date range of downloaded price data per symbol + - Total trading days with complete data + - Missing data gaps (symbols without data, date gaps) + - Last data refresh timestamp + - Model simulation status section: + - List of all configured models (enabled/disabled) + - Date ranges simulated per model (first and last trading day) + - Total trading days completed per model + - Most recent simulation date per model + - Completion percentage (simulated days / available data days) + - System health section: + - Database connectivity status + - MCP services status (Math, Search, Trade, LocalPrices) + - API version and deployment mode + - Disk space usage (database size, log size) + - Use cases: + - Verify data availability before triggering simulations + - Identify which models need updates to latest data + - Monitor system health and readiness + - Plan data downloads for missing date ranges + - Example: `GET /status` (no parameters required) + - Benefits: + - Single endpoint for complete system overview + - No need to query multiple endpoints for status + - Clear visibility into data gaps + - Track simulation progress across models + +#### Implementation Details +- Database queries for efficient metric calculation +- Caching for frequently accessed metrics (optional) +- Response time target: <500ms for typical queries +- Comprehensive error handling for missing data + +#### Benefits +- **Better Observability** - Clear view of system state and model performance +- **Data-Driven Decisions** - Quantitative metrics for model comparison +- **Proactive Monitoring** - Identify data gaps before simulations fail +- **User Experience** - Single endpoint to check "what's available and what's been done" + ### v1.0.0 - Production Stability & Validation (Planned) **Focus:** Comprehensive testing, documentation, and production readiness diff --git a/agent/base_agent/base_agent.py b/agent/base_agent/base_agent.py index 7314d11..56af657 100644 --- a/agent/base_agent/base_agent.py +++ b/agent/base_agent/base_agent.py @@ -33,6 +33,7 @@ from tools.deployment_config import ( from agent.context_injector import ContextInjector from agent.pnl_calculator import DailyPnLCalculator from agent.reasoning_summarizer import ReasoningSummarizer +from agent.chat_model_wrapper import ToolCallArgsParsingWrapper # Load environment variables load_dotenv() @@ -211,14 +212,16 @@ class BaseAgent: self.model = MockChatModel(date="2025-01-01") # Date will be updated per session print(f"🤖 Using MockChatModel (DEV mode)") else: - self.model = ChatOpenAI( + base_model = ChatOpenAI( model=self.basemodel, base_url=self.openai_base_url, api_key=self.openai_api_key, max_retries=3, timeout=30 ) - print(f"🤖 Using {self.basemodel} (PROD mode)") + # Wrap model with diagnostic wrapper + self.model = ToolCallArgsParsingWrapper(model=base_model) + print(f"🤖 Using {self.basemodel} (PROD mode) with diagnostic wrapper") except Exception as e: raise RuntimeError(f"❌ Failed to initialize AI model: {e}") diff --git a/agent/chat_model_wrapper.py b/agent/chat_model_wrapper.py index 1c5c986..17f12e5 100644 --- a/agent/chat_model_wrapper.py +++ b/agent/chat_model_wrapper.py @@ -1,24 +1,18 @@ """ -Chat model wrapper - Passthrough wrapper for ChatOpenAI models. +Chat model wrapper to fix tool_calls args parsing issues. -Originally created to fix DeepSeek tool_calls arg parsing issues, but investigation -revealed DeepSeek already returns the correct format (arguments as JSON strings). - -This wrapper is now a simple passthrough that proxies all calls to the underlying model. -Kept for backward compatibility and potential future use. +DeepSeek and other providers return tool_calls.args as JSON strings, which need +to be parsed to dicts before AIMessage construction. """ -from typing import Any +import json +from typing import Any, Optional, Dict +from functools import wraps class ToolCallArgsParsingWrapper: """ - Passthrough wrapper around ChatOpenAI models. - - After systematic debugging, determined that DeepSeek returns tool_calls.arguments - as JSON strings (correct format), so no parsing/conversion is needed. - - This wrapper simply proxies all calls to the wrapped model. + Wrapper that adds diagnostic logging and fixes tool_calls args if needed. """ def __init__(self, model: Any, **kwargs): @@ -30,6 +24,92 @@ class ToolCallArgsParsingWrapper: **kwargs: Additional parameters (ignored, for compatibility) """ self.wrapped_model = model + self._patch_model() + + def _patch_model(self): + """Monkey-patch the model's _create_chat_result to add diagnostics""" + if not hasattr(self.wrapped_model, '_create_chat_result'): + # Model doesn't have this method (e.g., MockChatModel), skip patching + return + + original_create_chat_result = self.wrapped_model._create_chat_result + + @wraps(original_create_chat_result) + def patched_create_chat_result(response: Any, generation_info: Optional[Dict] = None): + """Patched version with diagnostic logging and args parsing""" + response_dict = response if isinstance(response, dict) else response.model_dump() + + # DIAGNOSTIC: Log response structure for debugging + print(f"\n[DIAGNOSTIC] Response structure:") + print(f" Response keys: {list(response_dict.keys())}") + + if 'choices' in response_dict and response_dict['choices']: + choice = response_dict['choices'][0] + print(f" Choice keys: {list(choice.keys())}") + + if 'message' in choice: + message = choice['message'] + print(f" Message keys: {list(message.keys())}") + + if 'tool_calls' in message and message['tool_calls']: + print(f" tool_calls count: {len(message['tool_calls'])}") + for i, tc in enumerate(message['tool_calls'][:2]): # Show first 2 + print(f" tool_calls[{i}] keys: {list(tc.keys())}") + if 'function' in tc: + print(f" function keys: {list(tc['function'].keys())}") + if 'arguments' in tc['function']: + args = tc['function']['arguments'] + print(f" arguments type: {type(args).__name__}") + print(f" arguments value (first 100 chars): {str(args)[:100]}") + + # Fix tool_calls: Normalize to OpenAI format if needed + if 'choices' in response_dict: + for choice in response_dict['choices']: + if 'message' not in choice: + continue + + message = choice['message'] + + # Fix tool_calls: Ensure standard OpenAI format + if 'tool_calls' in message and message['tool_calls']: + print(f"[DIAGNOSTIC] Processing {len(message['tool_calls'])} tool_calls...") + for idx, tool_call in enumerate(message['tool_calls']): + # Check if this is non-standard format (has 'args' directly) + if 'args' in tool_call and 'function' not in tool_call: + print(f"[DIAGNOSTIC] tool_calls[{idx}] has non-standard format (direct args)") + # Convert to standard OpenAI format + args = tool_call['args'] + tool_call['function'] = { + 'name': tool_call.get('name', ''), + 'arguments': args if isinstance(args, str) else json.dumps(args) + } + # Remove non-standard fields + if 'name' in tool_call: + del tool_call['name'] + if 'args' in tool_call: + del tool_call['args'] + print(f"[DIAGNOSTIC] Converted tool_calls[{idx}] to standard OpenAI format") + + # Fix invalid_tool_calls: dict args -> string + if 'invalid_tool_calls' in message and message['invalid_tool_calls']: + print(f"[DIAGNOSTIC] Checking invalid_tool_calls for dict-to-string conversion...") + for idx, invalid_call in enumerate(message['invalid_tool_calls']): + if 'args' in invalid_call: + args = invalid_call['args'] + # Convert dict arguments to JSON string + if isinstance(args, dict): + try: + invalid_call['args'] = json.dumps(args) + print(f"[DIAGNOSTIC] Converted invalid_tool_calls[{idx}].args from dict to string") + except (TypeError, ValueError) as e: + print(f"[DIAGNOSTIC] Failed to serialize invalid_tool_calls[{idx}].args: {e}") + # Keep as-is if serialization fails + + # Call original method with fixed response + return original_create_chat_result(response_dict, generation_info) + + # Replace the method + self.wrapped_model._create_chat_result = patched_create_chat_result @property def _llm_type(self) -> str: