diff --git a/.env.example b/.env.example index daf28ec..a457460 100644 --- a/.env.example +++ b/.env.example @@ -24,6 +24,11 @@ SEARCH_HTTP_PORT=8001 TRADE_HTTP_PORT=8002 GETPRICE_HTTP_PORT=8003 +# API Server Port (exposed on host machine for REST API) +# Container always uses 8080 internally +# Used for Windmill integration and external API access +API_PORT=8080 + # Web Interface Host Port (exposed on host machine) # Container always uses 8888 internally WEB_HTTP_PORT=8888 diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f79d9a..eec0c4f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,92 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.3.0] - 2025-10-31 + +### Added - API Service Transformation +- **REST API Service** - Complete FastAPI implementation for external orchestration + - `POST /simulate/trigger` - Trigger simulation jobs with config, date range, and models + - `GET /simulate/status/{job_id}` - Query job progress and execution details + - `GET /results` - Retrieve simulation results with filtering (job_id, date, model) + - `GET /health` - Service health check with database connectivity verification +- **SQLite Database** - Complete persistence layer replacing JSONL files + - Jobs table - Job metadata and lifecycle tracking + - Job details table - Per model-day execution status + - Positions table - Trading position records with P&L + - Holdings table - Portfolio holdings breakdown + - Reasoning logs table - AI decision reasoning history + - Tool usage table - MCP tool usage statistics +- **Backend Components** + - JobManager - Job lifecycle management with concurrent job prevention + - RuntimeConfigManager - Isolated runtime configs for thread-safe execution + - ModelDayExecutor - Single model-day execution engine + - SimulationWorker - Job orchestration with date-sequential, model-parallel execution +- **Comprehensive Test Suite** + - 102 unit and integration tests (85% coverage) + - 19 database tests (98% coverage) + - 23 job manager tests (98% coverage) + - 10 model executor tests (84% coverage) + - 20 API endpoint tests (81% coverage) + - 20 Pydantic model tests (100% coverage) + - 10 runtime manager tests (89% coverage) +- **Docker Dual-Mode Deployment** + - API server mode - Persistent REST API service with health checks + - Batch mode - One-time simulation execution (backwards compatible) + - Separate entrypoints for each mode + - Health check configuration (30s interval, 3 retries) + - Volume persistence for SQLite database and logs +- **Validation & Testing Tools** + - `scripts/validate_docker_build.sh` - Docker build and startup validation + - `scripts/test_api_endpoints.sh` - Complete API endpoint testing suite + - `scripts/test_batch_mode.sh` - Batch mode execution validation + - TESTING_GUIDE.md - Comprehensive testing procedures and troubleshooting +- **Documentation** + - DOCKER_API.md - API deployment guide with examples + - TESTING_GUIDE.md - Validation procedures and troubleshooting + - API endpoint documentation with request/response examples + - Windmill integration patterns and examples + +### Changed +- **Architecture** - Transformed from batch-only to API service with database persistence +- **Data Storage** - Migrated from JSONL files to SQLite relational database +- **Deployment** - Added dual-mode Docker deployment (API server + batch) +- **Configuration** - Added API_PORT environment variable (default: 8080) +- **Requirements** - Added fastapi>=0.120.0, uvicorn[standard]>=0.27.0, pydantic>=2.0.0 +- **Docker Compose** - Split into two services (ai-trader-api and ai-trader-batch) +- **Dockerfile** - Added port 8080 exposure for API server +- **.env.example** - Added API server configuration + +### Technical Implementation +- **Test-Driven Development** - All components written with tests first +- **Mock-based Testing** - Avoid heavy dependencies in unit tests +- **Pydantic V2** - Type-safe request/response validation +- **Foreign Key Constraints** - Database referential integrity with cascade deletes +- **Thread-safe Execution** - Isolated runtime configs per model-day +- **Background Job Execution** - ThreadPoolExecutor for parallel model execution +- **Automatic Status Transitions** - Job status updates based on model-day completion + +### Performance & Quality +- **Code Coverage** - 85% overall (84.63% measured) + - Database layer: 98% + - Job manager: 98% + - Pydantic models: 100% + - Runtime manager: 89% + - Model executor: 84% + - FastAPI app: 81% +- **Test Execution** - 102 tests in ~2.5 seconds +- **Zero Test Failures** - All tests passing (threading tests excluded) + +### Integration Ready +- **Windmill.dev** - HTTP-based integration with polling support +- **External Orchestration** - RESTful API for workflow automation +- **Monitoring** - Health checks and status tracking +- **Persistence** - SQLite database survives container restarts + +### Backwards Compatibility +- **Batch Mode** - Original batch functionality preserved via Docker profile +- **Configuration** - Existing config files still work +- **Data Migration** - No automatic migration (fresh start recommended) + ## [0.2.0] - 2025-10-31 ### Added @@ -113,6 +199,7 @@ For future releases, use this template: --- -[Unreleased]: https://github.com/Xe138/AI-Trader/compare/v0.2.0...HEAD +[Unreleased]: https://github.com/Xe138/AI-Trader/compare/v0.3.0...HEAD +[0.3.0]: https://github.com/Xe138/AI-Trader/compare/v0.2.0...v0.3.0 [0.2.0]: https://github.com/Xe138/AI-Trader/compare/v0.1.0...v0.2.0 [0.1.0]: https://github.com/Xe138/AI-Trader/releases/tag/v0.1.0 diff --git a/DOCKER_API.md b/DOCKER_API.md new file mode 100644 index 0000000..e8299ff --- /dev/null +++ b/DOCKER_API.md @@ -0,0 +1,347 @@ +# Docker API Server Deployment + +This guide explains how to run AI-Trader as a persistent REST API server using Docker for Windmill.dev integration. + +## Quick Start + +### 1. Environment Setup + +```bash +# Copy environment template +cp .env.example .env + +# Edit .env and add your API keys: +# - OPENAI_API_KEY +# - ALPHAADVANTAGE_API_KEY +# - JINA_API_KEY +``` + +### 2. Start API Server + +```bash +# Start in API mode (default) +docker-compose up -d ai-trader-api + +# View logs +docker-compose logs -f ai-trader-api + +# Check health +curl http://localhost:8080/health +``` + +### 3. Test API Endpoints + +```bash +# Health check +curl http://localhost:8080/health + +# Trigger simulation +curl -X POST http://localhost:8080/simulate/trigger \ + -H "Content-Type: application/json" \ + -d '{ + "config_path": "/app/configs/default_config.json", + "date_range": ["2025-01-16", "2025-01-17"], + "models": ["gpt-4"] + }' + +# Check job status (replace JOB_ID) +curl http://localhost:8080/simulate/status/JOB_ID + +# Query results +curl http://localhost:8080/results?date=2025-01-16 +``` + +## Architecture + +### Two Deployment Modes + +**API Server Mode** (Windmill integration): +- REST API on port 8080 +- Background job execution +- Persistent SQLite database +- Continuous uptime with health checks +- Start with: `docker-compose up -d ai-trader-api` + +**Batch Mode** (one-time simulation): +- Command-line execution +- Runs to completion then exits +- Config file driven +- Start with: `docker-compose --profile batch up ai-trader-batch` + +### Port Configuration + +| Service | Internal Port | Default Host Port | Environment Variable | +|---------|--------------|-------------------|---------------------| +| API Server | 8080 | 8080 | `API_PORT` | +| Math MCP | 8000 | 8000 | `MATH_HTTP_PORT` | +| Search MCP | 8001 | 8001 | `SEARCH_HTTP_PORT` | +| Trade MCP | 8002 | 8002 | `TRADE_HTTP_PORT` | +| Price MCP | 8003 | 8003 | `GETPRICE_HTTP_PORT` | +| Web Dashboard | 8888 | 8888 | `WEB_HTTP_PORT` | + +## API Endpoints + +### POST /simulate/trigger +Trigger a new simulation job. + +**Request:** +```json +{ + "config_path": "/app/configs/default_config.json", + "date_range": ["2025-01-16", "2025-01-17"], + "models": ["gpt-4", "claude-3.7-sonnet"] +} +``` + +**Response:** +```json +{ + "job_id": "550e8400-e29b-41d4-a716-446655440000", + "status": "pending", + "total_model_days": 4, + "message": "Simulation job created and started" +} +``` + +### GET /simulate/status/{job_id} +Get job progress and status. + +**Response:** +```json +{ + "job_id": "550e8400-e29b-41d4-a716-446655440000", + "status": "running", + "progress": { + "total_model_days": 4, + "completed": 2, + "failed": 0, + "pending": 2 + }, + "date_range": ["2025-01-16", "2025-01-17"], + "models": ["gpt-4", "claude-3.7-sonnet"], + "created_at": "2025-01-16T10:00:00Z", + "details": [ + { + "date": "2025-01-16", + "model": "gpt-4", + "status": "completed", + "started_at": "2025-01-16T10:00:05Z", + "completed_at": "2025-01-16T10:05:23Z", + "duration_seconds": 318.5 + } + ] +} +``` + +### GET /results +Query simulation results with optional filters. + +**Parameters:** +- `job_id` (optional): Filter by job UUID +- `date` (optional): Filter by trading date (YYYY-MM-DD) +- `model` (optional): Filter by model signature + +**Response:** +```json +{ + "results": [ + { + "id": 1, + "job_id": "550e8400-e29b-41d4-a716-446655440000", + "date": "2025-01-16", + "model": "gpt-4", + "action_id": 1, + "action_type": "buy", + "symbol": "AAPL", + "amount": 10, + "price": 250.50, + "cash": 7495.00, + "portfolio_value": 10000.00, + "daily_profit": 0.00, + "daily_return_pct": 0.00, + "holdings": [ + {"symbol": "AAPL", "quantity": 10}, + {"symbol": "CASH", "quantity": 7495.00} + ] + } + ], + "count": 1 +} +``` + +### GET /health +Service health check. + +**Response:** +```json +{ + "status": "healthy", + "database": "connected", + "timestamp": "2025-01-16T10:00:00Z" +} +``` + +## Volume Mounts + +Data persists across container restarts via volume mounts: + +```yaml +volumes: + - ./data:/app/data # SQLite database, price data + - ./logs:/app/logs # Application logs + - ./configs:/app/configs # Configuration files +``` + +**Key files:** +- `/app/data/jobs.db` - SQLite database with job history and results +- `/app/data/merged.jsonl` - Cached price data (fetched on first run) +- `/app/logs/` - Application and MCP service logs + +## Configuration + +### Custom Config File + +Place config files in `./configs/` directory: + +```json +{ + "agent_type": "BaseAgent", + "date_range": { + "init_date": "2025-01-01", + "end_date": "2025-01-31" + }, + "models": [ + { + "name": "GPT-4", + "basemodel": "gpt-4", + "signature": "gpt-4", + "enabled": true + } + ], + "agent_config": { + "max_steps": 30, + "initial_cash": 10000.0 + } +} +``` + +Reference in API calls: `/app/configs/your_config.json` + +## Troubleshooting + +### Check Container Status +```bash +docker-compose ps +docker-compose logs ai-trader-api +``` + +### Health Check Failing +```bash +# Check if services started +docker exec ai-trader-api ps aux + +# Test internal health +docker exec ai-trader-api curl http://localhost:8080/health + +# Check MCP services +docker exec ai-trader-api curl http://localhost:8000/health +``` + +### Database Issues +```bash +# View database +docker exec ai-trader-api sqlite3 data/jobs.db ".tables" + +# Reset database (WARNING: deletes all data) +rm ./data/jobs.db +docker-compose restart ai-trader-api +``` + +### Port Conflicts +If ports are already in use, edit `.env`: +```bash +API_PORT=9080 # Change to available port +``` + +## Windmill Integration + +Example Windmill workflow step: + +```python +import httpx + +def trigger_simulation( + api_url: str, + config_path: str, + start_date: str, + end_date: str, + models: list[str] +): + """Trigger AI trading simulation via API.""" + + response = httpx.post( + f"{api_url}/simulate/trigger", + json={ + "config_path": config_path, + "date_range": [start_date, end_date], + "models": models + }, + timeout=30.0 + ) + + response.raise_for_status() + return response.json() + +def check_status(api_url: str, job_id: str): + """Check simulation job status.""" + + response = httpx.get( + f"{api_url}/simulate/status/{job_id}", + timeout=10.0 + ) + + response.raise_for_status() + return response.json() +``` + +## Production Deployment + +### Use Docker Hub Image +```yaml +# docker-compose.yml +services: + ai-trader-api: + image: ghcr.io/xe138/ai-trader:latest + # ... rest of config +``` + +### Build Locally +```yaml +# docker-compose.yml +services: + ai-trader-api: + build: . + # ... rest of config +``` + +### Environment Security +- Never commit `.env` to version control +- Use secrets management in production (Docker secrets, Kubernetes secrets, etc.) +- Rotate API keys regularly + +## Monitoring + +### Prometheus Metrics (Future) +Metrics endpoint planned: `GET /metrics` + +### Log Aggregation +- Container logs: `docker-compose logs -f` +- Application logs: `./logs/api.log` +- MCP service logs: `./logs/mcp_*.log` + +## Scaling Considerations + +- Single-job concurrency enforced by database lock +- For parallel simulations, deploy multiple instances with separate databases +- Consider load balancer for high-availability setup +- Database size grows with number of simulations (plan for cleanup/archival) diff --git a/Dockerfile b/Dockerfile index 8b9a360..6c96b9e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,11 +24,11 @@ RUN mkdir -p /app/scripts && \ # Create necessary directories RUN mkdir -p data logs data/agent_data -# Make entrypoint executable -RUN chmod +x entrypoint.sh +# Make entrypoints executable +RUN chmod +x entrypoint.sh entrypoint-api.sh -# Expose MCP service ports and web dashboard -EXPOSE 8000 8001 8002 8003 8888 +# Expose MCP service ports, API server, and web dashboard +EXPOSE 8000 8001 8002 8003 8080 8888 # Set Python to run unbuffered for real-time logs ENV PYTHONUNBUFFERED=1 diff --git a/README.md b/README.md index f8a67c0..614fa33 100644 --- a/README.md +++ b/README.md @@ -35,15 +35,31 @@ --- -## ๐Ÿ“ Upcoming Updates (This Week) +## โœจ Latest Updates (v0.3.0) -We're excited to announce the following updates coming this week: +**Major Architecture Upgrade - REST API Service** -- โฐ **Hourly Trading Support** - Upgrade to hour-level precision trading -- ๐Ÿš€ **Service Deployment & Parallel Execution** - Deploy production service + parallel model execution -- ๐ŸŽจ **Enhanced Frontend Dashboard** - Add detailed trading log visualization (complete trading process display) +- ๐ŸŒ **REST API Server** - Complete FastAPI implementation for external orchestration + - Trigger simulations via HTTP POST + - Monitor job progress in real-time + - Query results with flexible filtering + - Health checks and monitoring +- ๐Ÿ’พ **SQLite Database** - Full persistence layer with 6 relational tables + - Job tracking and lifecycle management + - Position records with P&L tracking + - AI reasoning logs and tool usage analytics +- ๐Ÿณ **Dual Docker Deployment** - API server mode + Batch mode + - API mode: Persistent REST service with health checks + - Batch mode: One-time simulations (backwards compatible) +- ๐Ÿงช **Comprehensive Testing** - 102 tests with 85% coverage + - Unit tests for all components + - Integration tests for API endpoints + - Validation scripts for Docker deployment +- ๐Ÿ“š **Production Documentation** - Complete deployment guides + - DOCKER_API.md - API deployment and usage + - TESTING_GUIDE.md - Validation procedures -Stay tuned for these exciting improvements! ๐ŸŽ‰ +See [CHANGELOG.md](CHANGELOG.md) for full details. --- @@ -209,12 +225,56 @@ AI-Trader Bench/ ## ๐Ÿš€ Quick Start -### ๐Ÿ“‹ Prerequisites +### ๐Ÿณ **Docker Deployment (Recommended)** -- **Python 3.10+** +**Two deployment modes available:** + +#### ๐ŸŒ API Server Mode (Windmill Integration) +```bash +# 1. Clone and configure +git clone https://github.com/Xe138/AI-Trader.git +cd AI-Trader +cp .env.example .env +# Edit .env and add your API keys + +# 2. Start API server +docker-compose up -d ai-trader-api + +# 3. Test API +curl http://localhost:8080/health + +# 4. Trigger simulation +curl -X POST http://localhost:8080/simulate/trigger \ + -H "Content-Type: application/json" \ + -d '{ + "config_path": "/app/configs/default_config.json", + "date_range": ["2025-01-16", "2025-01-17"], + "models": ["gpt-4"] + }' +``` + +See [DOCKER_API.md](DOCKER_API.md) for complete API documentation. + +#### ๐ŸŽฏ Batch Mode (One-time Simulation) +```bash +# Run single simulation +docker-compose --profile batch up ai-trader-batch + +# With custom config +docker-compose --profile batch run ai-trader-batch configs/custom.json +``` + +--- + +### ๐Ÿ’ป **Local Installation (Development)** + +#### ๐Ÿ“‹ Prerequisites + +- **Python 3.10+** - **API Keys**: OpenAI, Alpha Vantage, Jina AI +- **Optional**: Docker (for containerized deployment) -### โšก One-Click Installation +#### โšก Installation Steps ```bash # 1. Clone project diff --git a/TESTING_GUIDE.md b/TESTING_GUIDE.md new file mode 100644 index 0000000..028507d --- /dev/null +++ b/TESTING_GUIDE.md @@ -0,0 +1,513 @@ +# AI-Trader Testing & Validation Guide + +This guide provides step-by-step instructions for validating the AI-Trader Docker deployment. + +## Prerequisites + +- Docker Desktop installed and running +- `.env` file configured with API keys +- At least 2GB free disk space +- Internet connection for initial price data download + +## Quick Start + +```bash +# 1. Make scripts executable +chmod +x scripts/*.sh + +# 2. Validate Docker build +bash scripts/validate_docker_build.sh + +# 3. Test API endpoints +bash scripts/test_api_endpoints.sh + +# 4. Test batch mode +bash scripts/test_batch_mode.sh +``` + +--- + +## Detailed Testing Procedures + +### Test 1: Docker Build Validation + +**Purpose:** Verify Docker image builds correctly and containers start + +**Command:** +```bash +bash scripts/validate_docker_build.sh +``` + +**What it tests:** +- โœ… Docker and docker-compose installed +- โœ… Docker daemon running +- โœ… `.env` file exists and configured +- โœ… Image builds successfully +- โœ… Container starts in API mode +- โœ… Health endpoint responds +- โœ… No critical errors in logs + +**Expected output:** +``` +========================================== +AI-Trader Docker Build Validation +========================================== + +Step 1: Checking prerequisites... +โœ“ Docker is installed: Docker version 24.0.0 +โœ“ Docker daemon is running +โœ“ docker-compose is installed + +Step 2: Checking environment configuration... +โœ“ .env file exists +โœ“ OPENAI_API_KEY is set +โœ“ ALPHAADVANTAGE_API_KEY is set +โœ“ JINA_API_KEY is set + +Step 3: Building Docker image... +โœ“ Docker image built successfully + +Step 4: Verifying Docker image... +โœ“ Image size: 850MB +โœ“ Exposed ports: 8000/tcp 8001/tcp 8002/tcp 8003/tcp 8080/tcp 8888/tcp + +Step 5: Testing API mode startup... +โœ“ Container started successfully +โœ“ Container is running +โœ“ No critical errors in logs + +Step 6: Testing health endpoint... +โœ“ Health endpoint responding +Health response: {"status":"healthy","database":"connected","timestamp":"..."} +``` + +**If it fails:** +- Check Docker Desktop is running +- Verify `.env` has all required keys +- Check port 8080 is not already in use +- Review logs: `docker logs ai-trader-api` + +--- + +### Test 2: API Endpoint Testing + +**Purpose:** Validate all REST API endpoints work correctly + +**Command:** +```bash +# Ensure API is running first +docker-compose up -d ai-trader-api + +# Run tests +bash scripts/test_api_endpoints.sh +``` + +**What it tests:** +- โœ… GET /health - Service health check +- โœ… POST /simulate/trigger - Job creation +- โœ… GET /simulate/status/{job_id} - Status tracking +- โœ… Job completion monitoring +- โœ… GET /results - Results retrieval +- โœ… Query filtering (by date, model) +- โœ… Concurrent job prevention +- โœ… Error handling (invalid inputs) + +**Expected output:** +``` +========================================== +AI-Trader API Endpoint Testing +========================================== + +โœ“ API is accessible + +Test 1: GET /health +โœ“ Health check passed + +Test 2: POST /simulate/trigger +โœ“ Simulation triggered successfully +Job ID: 550e8400-e29b-41d4-a716-446655440000 + +Test 3: GET /simulate/status/{job_id} +โœ“ Job status retrieved +Job Status: pending + +Test 4: Monitoring job progress +[1/30] Status: running | Progress: {"completed":1,"failed":0,...} +... +โœ“ Job finished with status: completed + +Test 5: GET /results +โœ“ Results retrieved +Result count: 2 + +Test 6: GET /results?date=... +โœ“ Date-filtered results retrieved + +Test 7: GET /results?model=... +โœ“ Model-filtered results retrieved + +Test 8: Concurrent job prevention +โœ“ Concurrent job correctly rejected + +Test 9: Error handling +โœ“ Invalid config path correctly rejected +``` + +**If it fails:** +- Ensure container is running: `docker ps | grep ai-trader-api` +- Check API logs: `docker logs ai-trader-api` +- Verify port 8080 is accessible: `curl http://localhost:8080/health` +- Check MCP services started: `docker exec ai-trader-api ps aux | grep python` + +--- + +### Test 3: Batch Mode Testing + +**Purpose:** Verify one-time simulation execution works + +**Command:** +```bash +bash scripts/test_batch_mode.sh +``` + +**What it tests:** +- โœ… Batch mode container starts +- โœ… Simulation executes to completion +- โœ… Exit code is 0 (success) +- โœ… Position files created +- โœ… Log files generated +- โœ… Price data persists between runs + +**Expected output:** +``` +========================================== +AI-Trader Batch Mode Testing +========================================== + +โœ“ Prerequisites OK +Using config: configs/default_config.json + +Test 1: Building Docker image +โœ“ Image built successfully + +Test 2: Running batch simulation +๐Ÿš€ Starting AI-Trader... +โœ… Environment variables validated +๐Ÿ“Š Fetching and merging price data... +๐Ÿ”ง Starting MCP services... +๐Ÿค– Starting trading agent... +[Trading output...] + +Test 3: Checking exit status +โœ“ Batch simulation completed successfully (exit code: 0) + +Test 4: Verifying output files +โœ“ Found 1 position file(s) +Sample position data: {...} +โœ“ Found 3 log file(s) + +Test 5: Checking price data +โœ“ Price data exists: 100 stocks + +Test 6: Testing data persistence +โœ“ Second run completed successfully +โœ“ Price data was reused +``` + +**If it fails:** +- Check `.env` has valid API keys +- Verify internet connection (for price data) +- Check available disk space +- Review batch logs: `docker logs ai-trader-batch` +- Check data directory permissions + +--- + +## Manual Testing Procedures + +### Test 1: API Health Check + +```bash +# Start API +docker-compose up -d ai-trader-api + +# Test health endpoint +curl http://localhost:8080/health + +# Expected response: +# {"status":"healthy","database":"connected","timestamp":"2025-01-16T10:00:00Z"} +``` + +### Test 2: Trigger Simulation + +```bash +# Trigger job +curl -X POST http://localhost:8080/simulate/trigger \ + -H "Content-Type: application/json" \ + -d '{ + "config_path": "/app/configs/default_config.json", + "date_range": ["2025-01-16", "2025-01-17"], + "models": ["gpt-4"] + }' + +# Expected response: +# { +# "job_id": "550e8400-e29b-41d4-a716-446655440000", +# "status": "pending", +# "total_model_days": 2, +# "message": "Simulation job ... created and started" +# } + +# Save job_id for next steps +JOB_ID="550e8400-e29b-41d4-a716-446655440000" +``` + +### Test 3: Monitor Job Progress + +```bash +# Check status (repeat until completed) +curl http://localhost:8080/simulate/status/$JOB_ID | jq '.' + +# Poll with watch +watch -n 10 "curl -s http://localhost:8080/simulate/status/$JOB_ID | jq '.status, .progress'" +``` + +### Test 4: Retrieve Results + +```bash +# Get all results for job +curl "http://localhost:8080/results?job_id=$JOB_ID" | jq '.' + +# Filter by date +curl "http://localhost:8080/results?date=2025-01-16" | jq '.' + +# Filter by model +curl "http://localhost:8080/results?model=gpt-4" | jq '.' + +# Combine filters +curl "http://localhost:8080/results?job_id=$JOB_ID&date=2025-01-16&model=gpt-4" | jq '.' +``` + +### Test 5: Volume Persistence + +```bash +# Stop container +docker-compose down + +# Verify data persists +ls -lh data/jobs.db +ls -R data/agent_data + +# Restart container +docker-compose up -d ai-trader-api + +# Data should still be accessible via API +curl http://localhost:8080/results | jq '.count' +``` + +--- + +## Troubleshooting + +### Problem: Container won't start + +**Symptoms:** +- `docker ps` shows no ai-trader-api container +- Container exits immediately + +**Debug steps:** +```bash +# Check logs +docker logs ai-trader-api + +# Common issues: +# 1. Missing API keys in .env +# 2. Port 8080 already in use +# 3. Volume permission issues +``` + +**Solutions:** +```bash +# 1. Verify .env +cat .env | grep -E "OPENAI_API_KEY|ALPHAADVANTAGE_API_KEY|JINA_API_KEY" + +# 2. Check port usage +lsof -i :8080 # Linux/Mac +netstat -ano | findstr :8080 # Windows + +# 3. Fix permissions +chmod -R 755 data logs +``` + +### Problem: Health check fails + +**Symptoms:** +- `curl http://localhost:8080/health` returns error +- Container is running but API not responding + +**Debug steps:** +```bash +# Check if API process is running +docker exec ai-trader-api ps aux | grep uvicorn + +# Check internal health +docker exec ai-trader-api curl http://localhost:8080/health + +# Check logs for startup errors +docker logs ai-trader-api | grep -i error +``` + +**Solutions:** +```bash +# If MCP services didn't start: +docker exec ai-trader-api ps aux | grep python + +# If database issues: +docker exec ai-trader-api ls -l /app/data/jobs.db + +# Restart container +docker-compose restart ai-trader-api +``` + +### Problem: Job stays in "pending" status + +**Symptoms:** +- Job triggered but never progresses +- Status remains "pending" indefinitely + +**Debug steps:** +```bash +# Check worker logs +docker logs ai-trader-api | grep -i "worker\|simulation" + +# Check database +docker exec ai-trader-api sqlite3 /app/data/jobs.db "SELECT * FROM job_details;" + +# Check if MCP services are accessible +docker exec ai-trader-api curl http://localhost:8000/health +``` + +**Solutions:** +```bash +# Restart container (jobs resume automatically) +docker-compose restart ai-trader-api + +# Check specific job status +curl http://localhost:8080/simulate/status/$JOB_ID | jq '.details' +``` + +### Problem: Tests timeout + +**Symptoms:** +- `test_api_endpoints.sh` hangs during job monitoring +- Jobs take longer than expected + +**Solutions:** +```bash +# Increase poll timeout in test script +# Edit: MAX_POLLS=60 # Increase from 30 + +# Or monitor job manually +watch -n 30 "curl -s http://localhost:8080/simulate/status/$JOB_ID | jq '.status, .progress'" + +# Check agent logs for slowness +docker logs ai-trader-api | tail -100 +``` + +--- + +## Performance Benchmarks + +### Expected Execution Times + +**Docker Build:** +- First build: 5-10 minutes +- Subsequent builds: 1-2 minutes (with cache) + +**API Startup:** +- Container start: 5-10 seconds +- Health check ready: 15-20 seconds (including MCP services) + +**Single Model-Day Simulation:** +- With existing price data: 2-5 minutes +- First run (fetching price data): 10-15 minutes + +**Complete 2-Date, 2-Model Job:** +- Expected duration: 10-20 minutes +- Depends on AI model response times + +--- + +## Continuous Monitoring + +### Health Check Monitoring + +```bash +# Add to cron for continuous monitoring +*/5 * * * * curl -f http://localhost:8080/health || echo "API down" | mail -s "AI-Trader Alert" admin@example.com +``` + +### Log Rotation + +```bash +# Docker handles log rotation, but monitor size: +docker logs ai-trader-api --tail 100 + +# Clear old logs if needed: +docker logs ai-trader-api > /dev/null 2>&1 +``` + +### Database Size + +```bash +# Monitor database growth +docker exec ai-trader-api du -h /app/data/jobs.db + +# Vacuum periodically +docker exec ai-trader-api sqlite3 /app/data/jobs.db "VACUUM;" +``` + +--- + +## Success Criteria + +### Validation Complete When: + +- โœ… All 3 test scripts pass without errors +- โœ… Health endpoint returns "healthy" status +- โœ… Can trigger and complete simulation job +- โœ… Results are retrievable via API +- โœ… Data persists after container restart +- โœ… Batch mode completes successfully +- โœ… No critical errors in logs + +### Ready for Production When: + +- โœ… All validation tests pass +- โœ… Performance meets expectations +- โœ… Monitoring is configured +- โœ… Backup strategy is in place +- โœ… Documentation is reviewed +- โœ… Team is trained on operations + +--- + +## Next Steps After Validation + +1. **Set up monitoring** - Configure health check alerts +2. **Configure backups** - Backup `/app/data` regularly +3. **Document operations** - Create runbook for team +4. **Set up CI/CD** - Automate testing and deployment +5. **Integrate with Windmill** - Connect workflows to API +6. **Scale if needed** - Deploy multiple instances with load balancer + +--- + +## Support + +For issues not covered in this guide: + +1. Check `DOCKER_API.md` for detailed API documentation +2. Review container logs: `docker logs ai-trader-api` +3. Check database: `docker exec ai-trader-api sqlite3 /app/data/jobs.db ".tables"` +4. Open issue on GitHub with logs and error messages diff --git a/api/__init__.py b/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/api/database.py b/api/database.py new file mode 100644 index 0000000..7341c23 --- /dev/null +++ b/api/database.py @@ -0,0 +1,307 @@ +""" +Database utilities and schema management for AI-Trader API. + +This module provides: +- SQLite connection management +- Database schema initialization (6 tables) +- ACID-compliant transaction support +""" + +import sqlite3 +from pathlib import Path +from typing import Optional +import os + + +def get_db_connection(db_path: str = "data/jobs.db") -> sqlite3.Connection: + """ + Get SQLite database connection with proper configuration. + + Args: + db_path: Path to SQLite database file + + Returns: + Configured SQLite connection + + Configuration: + - Foreign keys enabled for referential integrity + - Row factory for dict-like access + - Check same thread disabled for FastAPI async compatibility + """ + # Ensure data directory exists + db_path_obj = Path(db_path) + db_path_obj.parent.mkdir(parents=True, exist_ok=True) + + conn = sqlite3.connect(db_path, check_same_thread=False) + conn.execute("PRAGMA foreign_keys = ON") + conn.row_factory = sqlite3.Row + + return conn + + +def initialize_database(db_path: str = "data/jobs.db") -> None: + """ + Create all database tables with enhanced schema. + + Tables created: + 1. jobs - High-level job metadata and status + 2. job_details - Per model-day execution tracking + 3. positions - Trading positions and P&L metrics + 4. holdings - Portfolio holdings per position + 5. reasoning_logs - AI decision logs (optional, for detail=full) + 6. tool_usage - Tool usage statistics + + Args: + db_path: Path to SQLite database file + """ + conn = get_db_connection(db_path) + cursor = conn.cursor() + + # Table 1: Jobs - Job metadata and lifecycle + cursor.execute(""" + CREATE TABLE IF NOT EXISTS jobs ( + job_id TEXT PRIMARY KEY, + config_path TEXT NOT NULL, + status TEXT NOT NULL CHECK(status IN ('pending', 'running', 'completed', 'partial', 'failed')), + date_range TEXT NOT NULL, + models TEXT NOT NULL, + created_at TEXT NOT NULL, + started_at TEXT, + updated_at TEXT, + completed_at TEXT, + total_duration_seconds REAL, + error TEXT + ) + """) + + # Table 2: Job Details - Per model-day execution + cursor.execute(""" + CREATE TABLE IF NOT EXISTS job_details ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + job_id TEXT NOT NULL, + date TEXT NOT NULL, + model TEXT NOT NULL, + status TEXT NOT NULL CHECK(status IN ('pending', 'running', 'completed', 'failed')), + started_at TEXT, + completed_at TEXT, + duration_seconds REAL, + error TEXT, + FOREIGN KEY (job_id) REFERENCES jobs(job_id) ON DELETE CASCADE + ) + """) + + # Table 3: Positions - Trading positions and P&L + cursor.execute(""" + CREATE TABLE IF NOT EXISTS positions ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + job_id TEXT NOT NULL, + date TEXT NOT NULL, + model TEXT NOT NULL, + action_id INTEGER NOT NULL, + action_type TEXT CHECK(action_type IN ('buy', 'sell', 'no_trade')), + symbol TEXT, + amount INTEGER, + price REAL, + cash REAL NOT NULL, + portfolio_value REAL NOT NULL, + daily_profit REAL, + daily_return_pct REAL, + cumulative_profit REAL, + cumulative_return_pct REAL, + created_at TEXT NOT NULL, + FOREIGN KEY (job_id) REFERENCES jobs(job_id) ON DELETE CASCADE + ) + """) + + # Table 4: Holdings - Portfolio holdings + cursor.execute(""" + CREATE TABLE IF NOT EXISTS holdings ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + position_id INTEGER NOT NULL, + symbol TEXT NOT NULL, + quantity INTEGER NOT NULL, + FOREIGN KEY (position_id) REFERENCES positions(id) ON DELETE CASCADE + ) + """) + + # Table 5: Reasoning Logs - AI decision logs (optional) + cursor.execute(""" + CREATE TABLE IF NOT EXISTS reasoning_logs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + job_id TEXT NOT NULL, + date TEXT NOT NULL, + model TEXT NOT NULL, + step_number INTEGER NOT NULL, + timestamp TEXT NOT NULL, + role TEXT CHECK(role IN ('user', 'assistant', 'tool')), + content TEXT, + tool_name TEXT, + FOREIGN KEY (job_id) REFERENCES jobs(job_id) ON DELETE CASCADE + ) + """) + + # Table 6: Tool Usage - Tool usage statistics + cursor.execute(""" + CREATE TABLE IF NOT EXISTS tool_usage ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + job_id TEXT NOT NULL, + date TEXT NOT NULL, + model TEXT NOT NULL, + tool_name TEXT NOT NULL, + call_count INTEGER NOT NULL DEFAULT 1, + total_duration_seconds REAL, + FOREIGN KEY (job_id) REFERENCES jobs(job_id) ON DELETE CASCADE + ) + """) + + # Create indexes for performance + _create_indexes(cursor) + + conn.commit() + conn.close() + + +def _create_indexes(cursor: sqlite3.Cursor) -> None: + """Create database indexes for query performance.""" + + # Jobs table indexes + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_jobs_status ON jobs(status) + """) + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_jobs_created_at ON jobs(created_at DESC) + """) + + # Job details table indexes + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_job_details_job_id ON job_details(job_id) + """) + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_job_details_status ON job_details(status) + """) + cursor.execute(""" + CREATE UNIQUE INDEX IF NOT EXISTS idx_job_details_unique + ON job_details(job_id, date, model) + """) + + # Positions table indexes + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_positions_job_id ON positions(job_id) + """) + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_positions_date ON positions(date) + """) + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_positions_model ON positions(model) + """) + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_positions_date_model ON positions(date, model) + """) + cursor.execute(""" + CREATE UNIQUE INDEX IF NOT EXISTS idx_positions_unique + ON positions(job_id, date, model, action_id) + """) + + # Holdings table indexes + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_holdings_position_id ON holdings(position_id) + """) + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_holdings_symbol ON holdings(symbol) + """) + + # Reasoning logs table indexes + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_reasoning_logs_job_date_model + ON reasoning_logs(job_id, date, model) + """) + + # Tool usage table indexes + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_tool_usage_job_date_model + ON tool_usage(job_id, date, model) + """) + + +def drop_all_tables(db_path: str = "data/jobs.db") -> None: + """ + Drop all database tables. USE WITH CAUTION. + + This is primarily for testing and development. + + Args: + db_path: Path to SQLite database file + """ + conn = get_db_connection(db_path) + cursor = conn.cursor() + + tables = [ + 'tool_usage', + 'reasoning_logs', + 'holdings', + 'positions', + 'job_details', + 'jobs' + ] + + for table in tables: + cursor.execute(f"DROP TABLE IF EXISTS {table}") + + conn.commit() + conn.close() + + +def vacuum_database(db_path: str = "data/jobs.db") -> None: + """ + Reclaim disk space after deletions. + + Should be run periodically after cleanup operations. + + Args: + db_path: Path to SQLite database file + """ + conn = get_db_connection(db_path) + conn.execute("VACUUM") + conn.close() + + +def get_database_stats(db_path: str = "data/jobs.db") -> dict: + """ + Get database statistics for monitoring. + + Returns: + Dictionary with table row counts and database size + + Example: + { + "database_size_mb": 12.5, + "jobs": 150, + "job_details": 3000, + "positions": 15000, + "holdings": 45000, + "reasoning_logs": 300000, + "tool_usage": 12000 + } + """ + conn = get_db_connection(db_path) + cursor = conn.cursor() + + stats = {} + + # Get database file size + if os.path.exists(db_path): + size_bytes = os.path.getsize(db_path) + stats["database_size_mb"] = round(size_bytes / (1024 * 1024), 2) + else: + stats["database_size_mb"] = 0 + + # Get row counts for each table + tables = ['jobs', 'job_details', 'positions', 'holdings', 'reasoning_logs', 'tool_usage'] + + for table in tables: + cursor.execute(f"SELECT COUNT(*) FROM {table}") + stats[table] = cursor.fetchone()[0] + + conn.close() + + return stats diff --git a/api/job_manager.py b/api/job_manager.py new file mode 100644 index 0000000..03d812a --- /dev/null +++ b/api/job_manager.py @@ -0,0 +1,625 @@ +""" +Job lifecycle manager for simulation orchestration. + +This module provides: +- Job creation and validation +- Status transitions (state machine) +- Progress tracking across model-days +- Concurrency control (single job at a time) +- Job retrieval and queries +- Cleanup operations +""" + +import sqlite3 +import json +import uuid +from datetime import datetime, timedelta +from typing import Optional, List, Dict, Any +from pathlib import Path +import logging + +from api.database import get_db_connection + +logger = logging.getLogger(__name__) + + +class JobManager: + """ + Manages simulation job lifecycle and orchestration. + + Responsibilities: + - Create jobs with date ranges and model lists + - Track job status (pending โ†’ running โ†’ completed/partial/failed) + - Monitor progress across model-days + - Enforce single-job concurrency + - Provide job queries and retrieval + - Cleanup old jobs + + State Machine: + pending โ†’ running โ†’ completed (all succeeded) + โ†’ partial (some failed) + โ†’ failed (job-level error) + """ + + def __init__(self, db_path: str = "data/jobs.db"): + """ + Initialize JobManager. + + Args: + db_path: Path to SQLite database + """ + self.db_path = db_path + + def create_job( + self, + config_path: str, + date_range: List[str], + models: List[str] + ) -> str: + """ + Create new simulation job. + + Args: + config_path: Path to configuration file + date_range: List of dates to simulate (YYYY-MM-DD) + models: List of model signatures to execute + + Returns: + job_id: UUID of created job + + Raises: + ValueError: If another job is already running/pending + """ + if not self.can_start_new_job(): + raise ValueError("Another simulation job is already running or pending") + + job_id = str(uuid.uuid4()) + created_at = datetime.utcnow().isoformat() + "Z" + + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + try: + # Insert job + cursor.execute(""" + INSERT INTO jobs ( + job_id, config_path, status, date_range, models, created_at + ) + VALUES (?, ?, ?, ?, ?, ?) + """, ( + job_id, + config_path, + "pending", + json.dumps(date_range), + json.dumps(models), + created_at + )) + + # Create job_details for each model-day combination + for date in date_range: + for model in models: + cursor.execute(""" + INSERT INTO job_details ( + job_id, date, model, status + ) + VALUES (?, ?, ?, ?) + """, (job_id, date, model, "pending")) + + conn.commit() + logger.info(f"Created job {job_id} with {len(date_range)} dates and {len(models)} models") + + return job_id + + finally: + conn.close() + + def get_job(self, job_id: str) -> Optional[Dict[str, Any]]: + """ + Get job by ID. + + Args: + job_id: Job UUID + + Returns: + Job data dict or None if not found + """ + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + try: + cursor.execute(""" + SELECT + job_id, config_path, status, date_range, models, + created_at, started_at, updated_at, completed_at, + total_duration_seconds, error + FROM jobs + WHERE job_id = ? + """, (job_id,)) + + row = cursor.fetchone() + if not row: + return None + + return { + "job_id": row[0], + "config_path": row[1], + "status": row[2], + "date_range": json.loads(row[3]), + "models": json.loads(row[4]), + "created_at": row[5], + "started_at": row[6], + "updated_at": row[7], + "completed_at": row[8], + "total_duration_seconds": row[9], + "error": row[10] + } + + finally: + conn.close() + + def get_current_job(self) -> Optional[Dict[str, Any]]: + """ + Get most recent job. + + Returns: + Most recent job data or None if no jobs exist + """ + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + try: + cursor.execute(""" + SELECT + job_id, config_path, status, date_range, models, + created_at, started_at, updated_at, completed_at, + total_duration_seconds, error + FROM jobs + ORDER BY created_at DESC + LIMIT 1 + """) + + row = cursor.fetchone() + if not row: + return None + + return { + "job_id": row[0], + "config_path": row[1], + "status": row[2], + "date_range": json.loads(row[3]), + "models": json.loads(row[4]), + "created_at": row[5], + "started_at": row[6], + "updated_at": row[7], + "completed_at": row[8], + "total_duration_seconds": row[9], + "error": row[10] + } + + finally: + conn.close() + + def find_job_by_date_range(self, date_range: List[str]) -> Optional[Dict[str, Any]]: + """ + Find job with matching date range. + + Args: + date_range: List of dates to match + + Returns: + Job data or None if not found + """ + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + try: + date_range_json = json.dumps(date_range) + + cursor.execute(""" + SELECT + job_id, config_path, status, date_range, models, + created_at, started_at, updated_at, completed_at, + total_duration_seconds, error + FROM jobs + WHERE date_range = ? + ORDER BY created_at DESC + LIMIT 1 + """, (date_range_json,)) + + row = cursor.fetchone() + if not row: + return None + + return { + "job_id": row[0], + "config_path": row[1], + "status": row[2], + "date_range": json.loads(row[3]), + "models": json.loads(row[4]), + "created_at": row[5], + "started_at": row[6], + "updated_at": row[7], + "completed_at": row[8], + "total_duration_seconds": row[9], + "error": row[10] + } + + finally: + conn.close() + + def update_job_status( + self, + job_id: str, + status: str, + error: Optional[str] = None + ) -> None: + """ + Update job status. + + Args: + job_id: Job UUID + status: New status (pending/running/completed/partial/failed) + error: Optional error message + """ + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + try: + updated_at = datetime.utcnow().isoformat() + "Z" + + # Set timestamps based on status + if status == "running": + cursor.execute(""" + UPDATE jobs + SET status = ?, started_at = ?, updated_at = ? + WHERE job_id = ? + """, (status, updated_at, updated_at, job_id)) + + elif status in ("completed", "partial", "failed"): + # Calculate duration + cursor.execute(""" + SELECT started_at FROM jobs WHERE job_id = ? + """, (job_id,)) + + row = cursor.fetchone() + duration_seconds = None + + if row and row[0]: + started_at = datetime.fromisoformat(row[0].replace("Z", "")) + completed_at = datetime.fromisoformat(updated_at.replace("Z", "")) + duration_seconds = (completed_at - started_at).total_seconds() + + cursor.execute(""" + UPDATE jobs + SET status = ?, completed_at = ?, updated_at = ?, + total_duration_seconds = ?, error = ? + WHERE job_id = ? + """, (status, updated_at, updated_at, duration_seconds, error, job_id)) + + else: + # Just update status + cursor.execute(""" + UPDATE jobs + SET status = ?, updated_at = ?, error = ? + WHERE job_id = ? + """, (status, updated_at, error, job_id)) + + conn.commit() + logger.debug(f"Updated job {job_id} status to {status}") + + finally: + conn.close() + + def update_job_detail_status( + self, + job_id: str, + date: str, + model: str, + status: str, + error: Optional[str] = None + ) -> None: + """ + Update model-day status and auto-update job status. + + Args: + job_id: Job UUID + date: Trading date (YYYY-MM-DD) + model: Model signature + status: New status (pending/running/completed/failed) + error: Optional error message + """ + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + try: + updated_at = datetime.utcnow().isoformat() + "Z" + + if status == "running": + cursor.execute(""" + UPDATE job_details + SET status = ?, started_at = ? + WHERE job_id = ? AND date = ? AND model = ? + """, (status, updated_at, job_id, date, model)) + + # Update job to running if not already + cursor.execute(""" + UPDATE jobs + SET status = 'running', started_at = COALESCE(started_at, ?), updated_at = ? + WHERE job_id = ? AND status = 'pending' + """, (updated_at, updated_at, job_id)) + + elif status in ("completed", "failed"): + # Calculate duration for detail + cursor.execute(""" + SELECT started_at FROM job_details + WHERE job_id = ? AND date = ? AND model = ? + """, (job_id, date, model)) + + row = cursor.fetchone() + duration_seconds = None + + if row and row[0]: + started_at = datetime.fromisoformat(row[0].replace("Z", "")) + completed_at = datetime.fromisoformat(updated_at.replace("Z", "")) + duration_seconds = (completed_at - started_at).total_seconds() + + cursor.execute(""" + UPDATE job_details + SET status = ?, completed_at = ?, duration_seconds = ?, error = ? + WHERE job_id = ? AND date = ? AND model = ? + """, (status, updated_at, duration_seconds, error, job_id, date, model)) + + # Check if all details are done + cursor.execute(""" + SELECT + COUNT(*) as total, + SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) as completed, + SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed + FROM job_details + WHERE job_id = ? + """, (job_id,)) + + total, completed, failed = cursor.fetchone() + + if completed + failed == total: + # All done - determine final status + if failed == 0: + final_status = "completed" + elif completed > 0: + final_status = "partial" + else: + final_status = "failed" + + # Calculate job duration + cursor.execute(""" + SELECT started_at FROM jobs WHERE job_id = ? + """, (job_id,)) + + row = cursor.fetchone() + job_duration = None + + if row and row[0]: + started_at = datetime.fromisoformat(row[0].replace("Z", "")) + completed_at = datetime.fromisoformat(updated_at.replace("Z", "")) + job_duration = (completed_at - started_at).total_seconds() + + cursor.execute(""" + UPDATE jobs + SET status = ?, completed_at = ?, updated_at = ?, total_duration_seconds = ? + WHERE job_id = ? + """, (final_status, updated_at, updated_at, job_duration, job_id)) + + conn.commit() + logger.debug(f"Updated job_detail {job_id}/{date}/{model} to {status}") + + finally: + conn.close() + + def get_job_details(self, job_id: str) -> List[Dict[str, Any]]: + """ + Get all model-day execution details for a job. + + Args: + job_id: Job UUID + + Returns: + List of job_detail records with date, model, status, error + """ + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + try: + cursor.execute(""" + SELECT date, model, status, error, started_at, completed_at, duration_seconds + FROM job_details + WHERE job_id = ? + ORDER BY date, model + """, (job_id,)) + + rows = cursor.fetchall() + + details = [] + for row in rows: + details.append({ + "date": row[0], + "model": row[1], + "status": row[2], + "error": row[3], + "started_at": row[4], + "completed_at": row[5], + "duration_seconds": row[6] + }) + + return details + + finally: + conn.close() + + def get_job_progress(self, job_id: str) -> Dict[str, Any]: + """ + Get job progress summary. + + Args: + job_id: Job UUID + + Returns: + Progress dict with total_model_days, completed, failed, current, details + """ + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + try: + cursor.execute(""" + SELECT + COUNT(*) as total, + SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) as completed, + SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed + FROM job_details + WHERE job_id = ? + """, (job_id,)) + + total, completed, failed = cursor.fetchone() + + # Get currently running model-day + cursor.execute(""" + SELECT date, model + FROM job_details + WHERE job_id = ? AND status = 'running' + LIMIT 1 + """, (job_id,)) + + current_row = cursor.fetchone() + current = {"date": current_row[0], "model": current_row[1]} if current_row else None + + # Get all details + cursor.execute(""" + SELECT date, model, status, duration_seconds, error + FROM job_details + WHERE job_id = ? + ORDER BY date, model + """, (job_id,)) + + details = [] + for row in cursor.fetchall(): + details.append({ + "date": row[0], + "model": row[1], + "status": row[2], + "duration_seconds": row[3], + "error": row[4] + }) + + return { + "total_model_days": total, + "completed": completed or 0, + "failed": failed or 0, + "current": current, + "details": details + } + + finally: + conn.close() + + def can_start_new_job(self) -> bool: + """ + Check if new job can be started. + + Returns: + True if no jobs are pending/running, False otherwise + """ + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + try: + cursor.execute(""" + SELECT COUNT(*) + FROM jobs + WHERE status IN ('pending', 'running') + """) + + count = cursor.fetchone()[0] + return count == 0 + + finally: + conn.close() + + def get_running_jobs(self) -> List[Dict[str, Any]]: + """ + Get all running/pending jobs. + + Returns: + List of job dicts + """ + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + try: + cursor.execute(""" + SELECT + job_id, config_path, status, date_range, models, + created_at, started_at, updated_at, completed_at, + total_duration_seconds, error + FROM jobs + WHERE status IN ('pending', 'running') + ORDER BY created_at DESC + """) + + jobs = [] + for row in cursor.fetchall(): + jobs.append({ + "job_id": row[0], + "config_path": row[1], + "status": row[2], + "date_range": json.loads(row[3]), + "models": json.loads(row[4]), + "created_at": row[5], + "started_at": row[6], + "updated_at": row[7], + "completed_at": row[8], + "total_duration_seconds": row[9], + "error": row[10] + }) + + return jobs + + finally: + conn.close() + + def cleanup_old_jobs(self, days: int = 30) -> Dict[str, int]: + """ + Delete jobs older than threshold. + + Args: + days: Delete jobs older than this many days + + Returns: + Dict with jobs_deleted count + """ + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + try: + cutoff_date = (datetime.utcnow() - timedelta(days=days)).isoformat() + "Z" + + # Get count before deletion + cursor.execute(""" + SELECT COUNT(*) + FROM jobs + WHERE created_at < ? AND status IN ('completed', 'partial', 'failed') + """, (cutoff_date,)) + + count = cursor.fetchone()[0] + + # Delete old jobs (foreign key cascade will delete related records) + cursor.execute(""" + DELETE FROM jobs + WHERE created_at < ? AND status IN ('completed', 'partial', 'failed') + """, (cutoff_date,)) + + conn.commit() + logger.info(f"Cleaned up {count} jobs older than {days} days") + + return {"jobs_deleted": count} + + finally: + conn.close() diff --git a/api/main.py b/api/main.py new file mode 100644 index 0000000..eac2550 --- /dev/null +++ b/api/main.py @@ -0,0 +1,366 @@ +""" +FastAPI REST API for AI-Trader simulation service. + +Provides endpoints for: +- Triggering simulation jobs +- Checking job status +- Querying results +- Health checks +""" + +import logging +from typing import Optional, List, Dict, Any +from datetime import datetime +from pathlib import Path + +from fastapi import FastAPI, HTTPException, Query +from fastapi.responses import JSONResponse +from pydantic import BaseModel, Field, field_validator + +from api.job_manager import JobManager +from api.simulation_worker import SimulationWorker +from api.database import get_db_connection +import threading +import time + +logger = logging.getLogger(__name__) + + +# Pydantic models for request/response validation +class SimulateTriggerRequest(BaseModel): + """Request body for POST /simulate/trigger.""" + config_path: str = Field(..., description="Path to configuration file") + date_range: List[str] = Field(..., min_length=1, description="List of trading dates (YYYY-MM-DD)") + models: List[str] = Field(..., min_length=1, description="List of model signatures to simulate") + + @field_validator("date_range") + @classmethod + def validate_date_range(cls, v): + """Validate date format.""" + for date in v: + try: + datetime.strptime(date, "%Y-%m-%d") + except ValueError: + raise ValueError(f"Invalid date format: {date}. Expected YYYY-MM-DD") + return v + + +class SimulateTriggerResponse(BaseModel): + """Response body for POST /simulate/trigger.""" + job_id: str + status: str + total_model_days: int + message: str + + +class JobProgress(BaseModel): + """Job progress information.""" + total_model_days: int + completed: int + failed: int + pending: int + + +class JobStatusResponse(BaseModel): + """Response body for GET /simulate/status/{job_id}.""" + job_id: str + status: str + progress: JobProgress + date_range: List[str] + models: List[str] + created_at: str + started_at: Optional[str] = None + completed_at: Optional[str] = None + total_duration_seconds: Optional[float] = None + error: Optional[str] = None + details: List[Dict[str, Any]] + + +class HealthResponse(BaseModel): + """Response body for GET /health.""" + status: str + database: str + timestamp: str + + +def create_app(db_path: str = "data/jobs.db") -> FastAPI: + """ + Create FastAPI application instance. + + Args: + db_path: Path to SQLite database + + Returns: + Configured FastAPI app + """ + app = FastAPI( + title="AI-Trader Simulation API", + description="REST API for triggering and monitoring AI trading simulations", + version="1.0.0" + ) + + # Store db_path in app state + app.state.db_path = db_path + + @app.post("/simulate/trigger", response_model=SimulateTriggerResponse, status_code=200) + async def trigger_simulation(request: SimulateTriggerRequest): + """ + Trigger a new simulation job. + + Creates a job with specified config, dates, and models. + Job runs asynchronously in background thread. + + Raises: + HTTPException 400: If another job is already running or config invalid + HTTPException 422: If request validation fails + """ + try: + # Validate config path exists + if not Path(request.config_path).exists(): + raise HTTPException( + status_code=400, + detail=f"Config path does not exist: {request.config_path}" + ) + + job_manager = JobManager(db_path=app.state.db_path) + + # Check if can start new job + if not job_manager.can_start_new_job(): + raise HTTPException( + status_code=400, + detail="Another simulation job is already running or pending. Please wait for it to complete." + ) + + # Create job + job_id = job_manager.create_job( + config_path=request.config_path, + date_range=request.date_range, + models=request.models + ) + + # Start worker in background thread (only if not in test mode) + if not getattr(app.state, "test_mode", False): + def run_worker(): + worker = SimulationWorker(job_id=job_id, db_path=app.state.db_path) + worker.run() + + thread = threading.Thread(target=run_worker, daemon=True) + thread.start() + + logger.info(f"Triggered simulation job {job_id}") + + return SimulateTriggerResponse( + job_id=job_id, + status="pending", + total_model_days=len(request.date_range) * len(request.models), + message=f"Simulation job {job_id} created and started" + ) + + except HTTPException: + raise + except ValueError as e: + logger.error(f"Validation error: {e}") + raise HTTPException(status_code=400, detail=str(e)) + except Exception as e: + logger.error(f"Failed to trigger simulation: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") + + @app.get("/simulate/status/{job_id}", response_model=JobStatusResponse) + async def get_job_status(job_id: str): + """ + Get status and progress of a simulation job. + + Args: + job_id: Job UUID + + Returns: + Job status, progress, and model-day details + + Raises: + HTTPException 404: If job not found + """ + try: + job_manager = JobManager(db_path=app.state.db_path) + + # Get job info + job = job_manager.get_job(job_id) + if not job: + raise HTTPException(status_code=404, detail=f"Job {job_id} not found") + + # Get progress + progress = job_manager.get_job_progress(job_id) + + # Get model-day details + details = job_manager.get_job_details(job_id) + + # Calculate pending (total - completed - failed) + pending = progress["total_model_days"] - progress["completed"] - progress["failed"] + + return JobStatusResponse( + job_id=job["job_id"], + status=job["status"], + progress=JobProgress( + total_model_days=progress["total_model_days"], + completed=progress["completed"], + failed=progress["failed"], + pending=pending + ), + date_range=job["date_range"], + models=job["models"], + created_at=job["created_at"], + started_at=job.get("started_at"), + completed_at=job.get("completed_at"), + total_duration_seconds=job.get("total_duration_seconds"), + error=job.get("error"), + details=details + ) + + except HTTPException: + raise + except Exception as e: + logger.error(f"Failed to get job status: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") + + @app.get("/results") + async def get_results( + job_id: Optional[str] = Query(None, description="Filter by job ID"), + date: Optional[str] = Query(None, description="Filter by date (YYYY-MM-DD)"), + model: Optional[str] = Query(None, description="Filter by model signature") + ): + """ + Query simulation results. + + Supports filtering by job_id, date, and/or model. + Returns position data with holdings. + + Args: + job_id: Optional job UUID filter + date: Optional date filter (YYYY-MM-DD) + model: Optional model signature filter + + Returns: + List of position records with holdings + """ + try: + conn = get_db_connection(app.state.db_path) + cursor = conn.cursor() + + # Build query with filters + query = """ + SELECT + p.id, + p.job_id, + p.date, + p.model, + p.action_id, + p.action_type, + p.symbol, + p.amount, + p.price, + p.cash, + p.portfolio_value, + p.daily_profit, + p.daily_return_pct, + p.created_at + FROM positions p + WHERE 1=1 + """ + params = [] + + if job_id: + query += " AND p.job_id = ?" + params.append(job_id) + + if date: + query += " AND p.date = ?" + params.append(date) + + if model: + query += " AND p.model = ?" + params.append(model) + + query += " ORDER BY p.date, p.model, p.action_id" + + cursor.execute(query, params) + rows = cursor.fetchall() + + results = [] + for row in rows: + position_id = row[0] + + # Get holdings for this position + cursor.execute(""" + SELECT symbol, quantity + FROM holdings + WHERE position_id = ? + ORDER BY symbol + """, (position_id,)) + + holdings = [{"symbol": h[0], "quantity": h[1]} for h in cursor.fetchall()] + + results.append({ + "id": row[0], + "job_id": row[1], + "date": row[2], + "model": row[3], + "action_id": row[4], + "action_type": row[5], + "symbol": row[6], + "amount": row[7], + "price": row[8], + "cash": row[9], + "portfolio_value": row[10], + "daily_profit": row[11], + "daily_return_pct": row[12], + "created_at": row[13], + "holdings": holdings + }) + + conn.close() + + return {"results": results, "count": len(results)} + + except Exception as e: + logger.error(f"Failed to query results: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") + + @app.get("/health", response_model=HealthResponse) + async def health_check(): + """ + Health check endpoint. + + Verifies database connectivity and service status. + + Returns: + Health status and timestamp + """ + try: + # Test database connection + conn = get_db_connection(app.state.db_path) + cursor = conn.cursor() + cursor.execute("SELECT 1") + cursor.fetchone() + conn.close() + + database_status = "connected" + + except Exception as e: + logger.error(f"Database health check failed: {e}") + database_status = "disconnected" + + return HealthResponse( + status="healthy" if database_status == "connected" else "unhealthy", + database=database_status, + timestamp=datetime.utcnow().isoformat() + "Z" + ) + + return app + + +# Create default app instance +app = create_app() + + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8080) diff --git a/api/model_day_executor.py b/api/model_day_executor.py new file mode 100644 index 0000000..9f96717 --- /dev/null +++ b/api/model_day_executor.py @@ -0,0 +1,342 @@ +""" +Single model-day execution engine. + +This module provides: +- Isolated execution of one model for one trading day +- Runtime config management per execution +- Result persistence to SQLite (positions, holdings, reasoning) +- Automatic status updates via JobManager +- Cleanup of temporary resources +""" + +import logging +import os +from typing import Dict, Any, Optional, List, TYPE_CHECKING +from pathlib import Path + +from api.runtime_manager import RuntimeConfigManager +from api.job_manager import JobManager +from api.database import get_db_connection + +# Lazy import to avoid loading heavy dependencies during testing +if TYPE_CHECKING: + from agent.base_agent.base_agent import BaseAgent + +logger = logging.getLogger(__name__) + + +class ModelDayExecutor: + """ + Executes a single model for a single trading day. + + Responsibilities: + - Create isolated runtime config + - Initialize and run trading agent + - Persist results to SQLite + - Update job status + - Cleanup resources + + Lifecycle: + 1. __init__() โ†’ Create runtime config + 2. execute() โ†’ Run agent, write results, update status + 3. cleanup โ†’ Delete runtime config + """ + + def __init__( + self, + job_id: str, + date: str, + model_sig: str, + config_path: str, + db_path: str = "data/jobs.db", + data_dir: str = "data" + ): + """ + Initialize ModelDayExecutor. + + Args: + job_id: Job UUID + date: Trading date (YYYY-MM-DD) + model_sig: Model signature + config_path: Path to configuration file + db_path: Path to SQLite database + data_dir: Data directory for runtime configs + """ + self.job_id = job_id + self.date = date + self.model_sig = model_sig + self.config_path = config_path + self.db_path = db_path + self.data_dir = data_dir + + # Create isolated runtime config + self.runtime_manager = RuntimeConfigManager(data_dir=data_dir) + self.runtime_config_path = self.runtime_manager.create_runtime_config( + job_id=job_id, + model_sig=model_sig, + date=date + ) + + self.job_manager = JobManager(db_path=db_path) + + logger.info(f"Initialized executor for {model_sig} on {date} (job: {job_id})") + + def execute(self) -> Dict[str, Any]: + """ + Execute trading session and persist results. + + Returns: + Result dict with success status and metadata + + Process: + 1. Update job_detail status to 'running' + 2. Initialize and run trading agent + 3. Write results to SQLite + 4. Update job_detail status to 'completed' or 'failed' + 5. Cleanup runtime config + + SQLite writes: + - positions: Trading position record + - holdings: Portfolio holdings breakdown + - reasoning_logs: AI reasoning steps (if available) + - tool_usage: Tool usage statistics (if available) + """ + try: + # Update status to running + self.job_manager.update_job_detail_status( + self.job_id, + self.date, + self.model_sig, + "running" + ) + + # Set environment variable for agent to use isolated config + os.environ["RUNTIME_ENV_PATH"] = self.runtime_config_path + + # Initialize agent + agent = self._initialize_agent() + + # Run trading session + logger.info(f"Running trading session for {self.model_sig} on {self.date}") + session_result = agent.run_trading_session(self.date) + + # Persist results to SQLite + self._write_results_to_db(agent, session_result) + + # Update status to completed + self.job_manager.update_job_detail_status( + self.job_id, + self.date, + self.model_sig, + "completed" + ) + + logger.info(f"Successfully completed {self.model_sig} on {self.date}") + + return { + "success": True, + "job_id": self.job_id, + "date": self.date, + "model": self.model_sig, + "session_result": session_result + } + + except Exception as e: + error_msg = f"Execution failed: {str(e)}" + logger.error(f"{self.model_sig} on {self.date}: {error_msg}", exc_info=True) + + # Update status to failed + self.job_manager.update_job_detail_status( + self.job_id, + self.date, + self.model_sig, + "failed", + error=error_msg + ) + + return { + "success": False, + "job_id": self.job_id, + "date": self.date, + "model": self.model_sig, + "error": error_msg + } + + finally: + # Always cleanup runtime config + self.runtime_manager.cleanup_runtime_config(self.runtime_config_path) + + def _initialize_agent(self): + """ + Initialize trading agent with config. + + Returns: + Configured BaseAgent instance + """ + # Lazy import to avoid loading heavy dependencies during testing + from agent.base_agent.base_agent import BaseAgent + + # Load config + import json + with open(self.config_path, 'r') as f: + config = json.load(f) + + # Find model config + model_config = None + for model in config.get("models", []): + if model.get("signature") == self.model_sig: + model_config = model + break + + if not model_config: + raise ValueError(f"Model {self.model_sig} not found in config") + + # Initialize agent + agent = BaseAgent( + model_name=model_config.get("basemodel"), + signature=self.model_sig, + config=config + ) + + # Register agent (creates initial position if needed) + agent.register_agent() + + return agent + + def _write_results_to_db(self, agent, session_result: Dict[str, Any]) -> None: + """ + Write execution results to SQLite. + + Args: + agent: Trading agent instance + session_result: Result from run_trading_session() + + Writes to: + - positions: Position record with action and P&L + - holdings: Current portfolio holdings + - reasoning_logs: AI reasoning steps (if available) + - tool_usage: Tool usage stats (if available) + """ + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + try: + # Get current positions and trade info + positions = agent.get_positions() if hasattr(agent, 'get_positions') else {} + last_trade = agent.get_last_trade() if hasattr(agent, 'get_last_trade') else None + + # Calculate portfolio value + current_prices = agent.get_current_prices() if hasattr(agent, 'get_current_prices') else {} + total_value = self._calculate_portfolio_value(positions, current_prices) + + # Get previous value for P&L calculation + cursor.execute(""" + SELECT portfolio_value + FROM positions + WHERE job_id = ? AND model = ? AND date < ? + ORDER BY date DESC + LIMIT 1 + """, (self.job_id, self.model_sig, self.date)) + + row = cursor.fetchone() + previous_value = row[0] if row else 10000.0 # Initial portfolio value + + daily_profit = total_value - previous_value + daily_return_pct = (daily_profit / previous_value * 100) if previous_value > 0 else 0 + + # Determine action_id (sequence number for this model) + cursor.execute(""" + SELECT COALESCE(MAX(action_id), 0) + 1 + FROM positions + WHERE job_id = ? AND model = ? + """, (self.job_id, self.model_sig)) + + action_id = cursor.fetchone()[0] + + # Insert position record + action_type = last_trade.get("action") if last_trade else "no_trade" + symbol = last_trade.get("symbol") if last_trade else None + amount = last_trade.get("amount") if last_trade else None + price = last_trade.get("price") if last_trade else None + cash = positions.get("CASH", 0.0) + + from datetime import datetime + created_at = datetime.utcnow().isoformat() + "Z" + + cursor.execute(""" + INSERT INTO positions ( + job_id, date, model, action_id, action_type, symbol, + amount, price, cash, portfolio_value, daily_profit, daily_return_pct, created_at + ) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, ( + self.job_id, self.date, self.model_sig, action_id, action_type, + symbol, amount, price, cash, total_value, + daily_profit, daily_return_pct, created_at + )) + + position_id = cursor.lastrowid + + # Insert holdings + for symbol, quantity in positions.items(): + cursor.execute(""" + INSERT INTO holdings (position_id, symbol, quantity) + VALUES (?, ?, ?) + """, (position_id, symbol, float(quantity))) + + # Insert reasoning logs (if available) + if hasattr(agent, 'get_reasoning_steps'): + reasoning_steps = agent.get_reasoning_steps() + for step in reasoning_steps: + cursor.execute(""" + INSERT INTO reasoning_logs ( + job_id, date, model, step_number, timestamp, content + ) + VALUES (?, ?, ?, ?, ?, ?) + """, ( + self.job_id, self.date, self.model_sig, + step.get("step"), created_at, step.get("reasoning") + )) + + # Insert tool usage (if available) + if hasattr(agent, 'get_tool_usage') and hasattr(agent, 'get_tool_usage'): + tool_usage = agent.get_tool_usage() + for tool_name, count in tool_usage.items(): + cursor.execute(""" + INSERT INTO tool_usage ( + job_id, date, model, tool_name, call_count + ) + VALUES (?, ?, ?, ?, ?) + """, (self.job_id, self.date, self.model_sig, tool_name, count)) + + conn.commit() + logger.debug(f"Wrote results to DB for {self.model_sig} on {self.date}") + + finally: + conn.close() + + def _calculate_portfolio_value( + self, + positions: Dict[str, float], + current_prices: Dict[str, float] + ) -> float: + """ + Calculate total portfolio value. + + Args: + positions: Current holdings (symbol: quantity) + current_prices: Current market prices (symbol: price) + + Returns: + Total portfolio value in dollars + """ + total = 0.0 + + for symbol, quantity in positions.items(): + if symbol == "CASH": + total += quantity + else: + price = current_prices.get(symbol, 0.0) + total += quantity * price + + return total diff --git a/api/models.py b/api/models.py new file mode 100644 index 0000000..4e27e4b --- /dev/null +++ b/api/models.py @@ -0,0 +1,459 @@ +""" +Pydantic data models for AI-Trader API. + +This module defines: +- Request models (input validation) +- Response models (output serialization) +- Nested models for complex data structures +""" + +from pydantic import BaseModel, Field +from typing import Optional, List, Dict, Literal, Any +from datetime import datetime + + +# ==================== Request Models ==================== + +class TriggerSimulationRequest(BaseModel): + """Request model for POST /simulate/trigger endpoint.""" + + config_path: str = Field( + default="configs/default_config.json", + description="Path to configuration file" + ) + + class Config: + json_schema_extra = { + "example": { + "config_path": "configs/default_config.json" + } + } + + +class ResultsQueryParams(BaseModel): + """Query parameters for GET /results endpoint.""" + + date: str = Field( + ..., + pattern=r"^\d{4}-\d{2}-\d{2}$", + description="Date in YYYY-MM-DD format" + ) + model: Optional[str] = Field( + None, + description="Model signature filter (optional)" + ) + detail: Literal["minimal", "full"] = Field( + default="minimal", + description="Response detail level" + ) + + class Config: + json_schema_extra = { + "example": { + "date": "2025-01-16", + "model": "gpt-5", + "detail": "minimal" + } + } + + +# ==================== Nested Response Models ==================== + +class JobProgress(BaseModel): + """Progress tracking for simulation jobs.""" + + total_model_days: int = Field( + ..., + description="Total number of model-days to execute" + ) + completed: int = Field( + ..., + description="Number of model-days completed" + ) + failed: int = Field( + ..., + description="Number of model-days that failed" + ) + current: Optional[Dict[str, str]] = Field( + None, + description="Currently executing model-day (if any)" + ) + details: Optional[List[Dict]] = Field( + None, + description="Detailed progress for each model-day" + ) + + class Config: + json_schema_extra = { + "example": { + "total_model_days": 4, + "completed": 2, + "failed": 0, + "current": {"date": "2025-01-16", "model": "gpt-5"}, + "details": [ + { + "date": "2025-01-16", + "model": "gpt-5", + "status": "completed", + "duration_seconds": 45.2 + } + ] + } + } + + +class DailyPnL(BaseModel): + """Daily profit and loss metrics.""" + + profit: float = Field( + ..., + description="Daily profit in dollars" + ) + return_pct: float = Field( + ..., + description="Daily return percentage" + ) + portfolio_value: float = Field( + ..., + description="Total portfolio value" + ) + + class Config: + json_schema_extra = { + "example": { + "profit": 150.50, + "return_pct": 1.51, + "portfolio_value": 10150.50 + } + } + + +class Trade(BaseModel): + """Individual trade record.""" + + id: int = Field( + ..., + description="Trade sequence ID" + ) + action: str = Field( + ..., + description="Trade action (buy/sell)" + ) + symbol: str = Field( + ..., + description="Stock symbol" + ) + amount: int = Field( + ..., + description="Number of shares" + ) + price: Optional[float] = Field( + None, + description="Trade price per share" + ) + total: Optional[float] = Field( + None, + description="Total trade value" + ) + + class Config: + json_schema_extra = { + "example": { + "id": 1, + "action": "buy", + "symbol": "AAPL", + "amount": 10, + "price": 255.88, + "total": 2558.80 + } + } + + +class AIReasoning(BaseModel): + """AI reasoning and decision-making summary.""" + + total_steps: int = Field( + ..., + description="Total reasoning steps taken" + ) + stop_signal_received: bool = Field( + ..., + description="Whether AI sent stop signal" + ) + reasoning_summary: str = Field( + ..., + description="Summary of AI reasoning" + ) + tool_usage: Dict[str, int] = Field( + ..., + description="Tool usage counts" + ) + + class Config: + json_schema_extra = { + "example": { + "total_steps": 15, + "stop_signal_received": True, + "reasoning_summary": "Market analysis indicates...", + "tool_usage": { + "search": 3, + "get_price": 5, + "math": 2, + "trade": 1 + } + } + } + + +class ModelResult(BaseModel): + """Simulation results for a single model on a single date.""" + + model: str = Field( + ..., + description="Model signature" + ) + positions: Dict[str, float] = Field( + ..., + description="Current positions (symbol: quantity)" + ) + daily_pnl: DailyPnL = Field( + ..., + description="Daily P&L metrics" + ) + trades: Optional[List[Trade]] = Field( + None, + description="Trades executed (detail=full only)" + ) + ai_reasoning: Optional[AIReasoning] = Field( + None, + description="AI reasoning summary (detail=full only)" + ) + log_file_path: Optional[str] = Field( + None, + description="Path to detailed log file (detail=full only)" + ) + + class Config: + json_schema_extra = { + "example": { + "model": "gpt-5", + "positions": { + "AAPL": 10, + "MSFT": 5, + "CASH": 7500.0 + }, + "daily_pnl": { + "profit": 150.50, + "return_pct": 1.51, + "portfolio_value": 10150.50 + } + } + } + + +# ==================== Response Models ==================== + +class TriggerSimulationResponse(BaseModel): + """Response model for POST /simulate/trigger endpoint.""" + + job_id: str = Field( + ..., + description="Unique job identifier" + ) + status: str = Field( + ..., + description="Job status (accepted/running/current)" + ) + date_range: List[str] = Field( + ..., + description="Dates to be simulated" + ) + models: List[str] = Field( + ..., + description="Models to execute" + ) + created_at: str = Field( + ..., + description="Job creation timestamp (ISO 8601)" + ) + message: str = Field( + ..., + description="Human-readable status message" + ) + progress: Optional[JobProgress] = Field( + None, + description="Progress (if job already running)" + ) + + class Config: + json_schema_extra = { + "example": { + "job_id": "550e8400-e29b-41d4-a716-446655440000", + "status": "accepted", + "date_range": ["2025-01-16", "2025-01-17"], + "models": ["gpt-5", "claude-3.7-sonnet"], + "created_at": "2025-01-20T14:30:00Z", + "message": "Simulation job queued successfully" + } + } + + +class JobStatusResponse(BaseModel): + """Response model for GET /simulate/status/{job_id} endpoint.""" + + job_id: str = Field( + ..., + description="Job identifier" + ) + status: str = Field( + ..., + description="Job status (pending/running/completed/partial/failed)" + ) + date_range: List[str] = Field( + ..., + description="Dates being simulated" + ) + models: List[str] = Field( + ..., + description="Models being executed" + ) + progress: JobProgress = Field( + ..., + description="Execution progress" + ) + created_at: str = Field( + ..., + description="Job creation timestamp" + ) + updated_at: Optional[str] = Field( + None, + description="Last update timestamp" + ) + completed_at: Optional[str] = Field( + None, + description="Job completion timestamp" + ) + total_duration_seconds: Optional[float] = Field( + None, + description="Total execution duration" + ) + + class Config: + json_schema_extra = { + "example": { + "job_id": "550e8400-e29b-41d4-a716-446655440000", + "status": "running", + "date_range": ["2025-01-16", "2025-01-17"], + "models": ["gpt-5"], + "progress": { + "total_model_days": 2, + "completed": 1, + "failed": 0, + "current": {"date": "2025-01-17", "model": "gpt-5"} + }, + "created_at": "2025-01-20T14:30:00Z" + } + } + + +class ResultsResponse(BaseModel): + """Response model for GET /results endpoint.""" + + date: str = Field( + ..., + description="Trading date" + ) + results: List[ModelResult] = Field( + ..., + description="Results for each model" + ) + + class Config: + json_schema_extra = { + "example": { + "date": "2025-01-16", + "results": [ + { + "model": "gpt-5", + "positions": {"AAPL": 10, "CASH": 7500.0}, + "daily_pnl": { + "profit": 150.50, + "return_pct": 1.51, + "portfolio_value": 10150.50 + } + } + ] + } + } + + +class HealthCheckResponse(BaseModel): + """Response model for GET /health endpoint.""" + + status: str = Field( + ..., + description="Overall health status (healthy/unhealthy)" + ) + timestamp: str = Field( + ..., + description="Health check timestamp" + ) + services: Dict[str, Dict] = Field( + ..., + description="Status of each service" + ) + storage: Dict[str, Any] = Field( + ..., + description="Storage status" + ) + database: Dict[str, Any] = Field( + ..., + description="Database status" + ) + + class Config: + json_schema_extra = { + "example": { + "status": "healthy", + "timestamp": "2025-01-20T14:30:00Z", + "services": { + "mcp_math": {"status": "up", "url": "http://localhost:8000/mcp"}, + "mcp_search": {"status": "up", "url": "http://localhost:8001/mcp"} + }, + "storage": { + "data_directory": "/app/data", + "writable": True, + "free_space_mb": 15234 + }, + "database": { + "status": "connected", + "path": "/app/data/jobs.db" + } + } + } + + +class ErrorResponse(BaseModel): + """Standard error response model.""" + + error: str = Field( + ..., + description="Error code/type" + ) + message: str = Field( + ..., + description="Human-readable error message" + ) + details: Optional[Dict] = Field( + None, + description="Additional error details" + ) + + class Config: + json_schema_extra = { + "example": { + "error": "invalid_date", + "message": "Date must be in YYYY-MM-DD format", + "details": {"provided": "2025/01/16"} + } + } diff --git a/api/runtime_manager.py b/api/runtime_manager.py new file mode 100644 index 0000000..d7880ac --- /dev/null +++ b/api/runtime_manager.py @@ -0,0 +1,131 @@ +""" +Runtime configuration manager for isolated model-day execution. + +This module provides: +- Isolated runtime config file creation per model-day +- Prevention of state collisions between concurrent executions +- Automatic cleanup of temporary config files +""" + +import os +import json +from pathlib import Path +import logging + +logger = logging.getLogger(__name__) + + +class RuntimeConfigManager: + """ + Manages isolated runtime configuration files for concurrent model execution. + + Problem: + Multiple models running concurrently need separate runtime_env.json files + to avoid race conditions on TODAY_DATE, SIGNATURE, IF_TRADE values. + + Solution: + Create temporary runtime config file per model-day execution: + - /app/data/runtime_env_{job_id}_{model}_{date}.json + + Lifecycle: + 1. create_runtime_config() โ†’ Creates temp file + 2. Executor sets RUNTIME_ENV_PATH env var + 3. Agent uses isolated config via get_config_value/write_config_value + 4. cleanup_runtime_config() โ†’ Deletes temp file + """ + + def __init__(self, data_dir: str = "data"): + """ + Initialize RuntimeConfigManager. + + Args: + data_dir: Directory for runtime config files (default: "data") + """ + self.data_dir = Path(data_dir) + self.data_dir.mkdir(parents=True, exist_ok=True) + + def create_runtime_config( + self, + job_id: str, + model_sig: str, + date: str + ) -> str: + """ + Create isolated runtime config file for this execution. + + Args: + job_id: Job UUID + model_sig: Model signature + date: Trading date (YYYY-MM-DD) + + Returns: + Path to created runtime config file + + Example: + config_path = manager.create_runtime_config( + "abc123...", + "gpt-5", + "2025-01-16" + ) + # Returns: "data/runtime_env_abc123_gpt-5_2025-01-16.json" + """ + # Generate unique filename (use first 8 chars of job_id for brevity) + job_id_short = job_id[:8] if len(job_id) > 8 else job_id + filename = f"runtime_env_{job_id_short}_{model_sig}_{date}.json" + config_path = self.data_dir / filename + + # Initialize with default values + initial_config = { + "TODAY_DATE": date, + "SIGNATURE": model_sig, + "IF_TRADE": False, + "JOB_ID": job_id + } + + with open(config_path, "w", encoding="utf-8") as f: + json.dump(initial_config, f, indent=4) + + logger.debug(f"Created runtime config: {config_path}") + return str(config_path) + + def cleanup_runtime_config(self, config_path: str) -> None: + """ + Delete runtime config file after execution. + + Args: + config_path: Path to runtime config file + + Note: + Silently ignores if file doesn't exist (already cleaned up) + """ + try: + if os.path.exists(config_path): + os.unlink(config_path) + logger.debug(f"Cleaned up runtime config: {config_path}") + except Exception as e: + logger.warning(f"Failed to cleanup runtime config {config_path}: {e}") + + def cleanup_all_runtime_configs(self) -> int: + """ + Cleanup all runtime config files (for maintenance/startup). + + Returns: + Number of files deleted + + Use case: + - On API startup to clean stale configs from previous runs + - Periodic maintenance + """ + count = 0 + for config_file in self.data_dir.glob("runtime_env_*.json"): + try: + config_file.unlink() + count += 1 + logger.debug(f"Deleted stale runtime config: {config_file}") + except Exception as e: + logger.warning(f"Failed to delete {config_file}: {e}") + + if count > 0: + logger.info(f"Cleaned up {count} stale runtime config files") + + return count diff --git a/api/simulation_worker.py b/api/simulation_worker.py new file mode 100644 index 0000000..580cbf4 --- /dev/null +++ b/api/simulation_worker.py @@ -0,0 +1,210 @@ +""" +Simulation job orchestration worker. + +This module provides: +- Job execution orchestration +- Date-sequential, model-parallel execution +- Progress tracking and status updates +- Error handling and recovery +""" + +import logging +from typing import Dict, Any, List +from concurrent.futures import ThreadPoolExecutor, as_completed + +from api.job_manager import JobManager +from api.model_day_executor import ModelDayExecutor + +logger = logging.getLogger(__name__) + + +class SimulationWorker: + """ + Orchestrates execution of a simulation job. + + Responsibilities: + - Execute all model-day combinations for a job + - Date-sequential execution (one date at a time) + - Model-parallel execution (all models for a date run concurrently) + - Update job status throughout execution + - Handle failures gracefully + + Execution Strategy: + For each date in job.date_range: + Execute all models in parallel using ThreadPoolExecutor + Wait for all models to complete before moving to next date + + Status Transitions: + pending โ†’ running โ†’ completed (all succeeded) + โ†’ partial (some failed) + โ†’ failed (job-level error) + """ + + def __init__(self, job_id: str, db_path: str = "data/jobs.db", max_workers: int = 4): + """ + Initialize SimulationWorker. + + Args: + job_id: Job UUID to execute + db_path: Path to SQLite database + max_workers: Maximum concurrent model executions per date + """ + self.job_id = job_id + self.db_path = db_path + self.max_workers = max_workers + self.job_manager = JobManager(db_path=db_path) + + logger.info(f"Initialized worker for job {job_id}") + + def run(self) -> Dict[str, Any]: + """ + Execute the simulation job. + + Returns: + Result dict with success status and summary + + Process: + 1. Get job details (dates, models, config) + 2. For each date sequentially: + a. Execute all models in parallel + b. Wait for all to complete + c. Update progress + 3. Determine final job status + 4. Update job with final status + + Error Handling: + - Individual model failures: Mark detail as failed, continue with others + - Job-level errors: Mark entire job as failed + """ + try: + # Get job info + job = self.job_manager.get_job(self.job_id) + if not job: + raise ValueError(f"Job {self.job_id} not found") + + date_range = job["date_range"] + models = job["models"] + config_path = job["config_path"] + + logger.info(f"Starting job {self.job_id}: {len(date_range)} dates, {len(models)} models") + + # Execute date-by-date (sequential) + for date in date_range: + logger.info(f"Processing date {date} with {len(models)} models") + self._execute_date(date, models, config_path) + + # Job completed - determine final status + progress = self.job_manager.get_job_progress(self.job_id) + + if progress["failed"] == 0: + final_status = "completed" + elif progress["completed"] > 0: + final_status = "partial" + else: + final_status = "failed" + + # Note: Job status is already updated by model_day_executor's detail status updates + # We don't need to explicitly call update_job_status here as it's handled automatically + # by the status transition logic in JobManager.update_job_detail_status + + logger.info(f"Job {self.job_id} finished with status: {final_status}") + + return { + "success": True, + "job_id": self.job_id, + "status": final_status, + "total_model_days": progress["total_model_days"], + "completed": progress["completed"], + "failed": progress["failed"] + } + + except Exception as e: + error_msg = f"Job execution failed: {str(e)}" + logger.error(f"Job {self.job_id}: {error_msg}", exc_info=True) + + # Update job to failed + self.job_manager.update_job_status(self.job_id, "failed", error=error_msg) + + return { + "success": False, + "job_id": self.job_id, + "error": error_msg + } + + def _execute_date(self, date: str, models: List[str], config_path: str) -> None: + """ + Execute all models for a single date in parallel. + + Args: + date: Trading date (YYYY-MM-DD) + models: List of model signatures to execute + config_path: Path to configuration file + + Uses ThreadPoolExecutor to run all models concurrently for this date. + Waits for all models to complete before returning. + """ + with ThreadPoolExecutor(max_workers=self.max_workers) as executor: + # Submit all model executions for this date + futures = [] + for model in models: + future = executor.submit( + self._execute_model_day, + date, + model, + config_path + ) + futures.append(future) + + # Wait for all to complete + for future in as_completed(futures): + try: + result = future.result() + if result["success"]: + logger.debug(f"Completed {result['model']} on {result['date']}") + else: + logger.warning(f"Failed {result['model']} on {result['date']}: {result.get('error')}") + except Exception as e: + logger.error(f"Exception in model execution: {e}", exc_info=True) + + def _execute_model_day(self, date: str, model: str, config_path: str) -> Dict[str, Any]: + """ + Execute a single model for a single date. + + Args: + date: Trading date (YYYY-MM-DD) + model: Model signature + config_path: Path to configuration file + + Returns: + Execution result dict + """ + try: + executor = ModelDayExecutor( + job_id=self.job_id, + date=date, + model_sig=model, + config_path=config_path, + db_path=self.db_path + ) + + result = executor.execute() + return result + + except Exception as e: + logger.error(f"Failed to execute {model} on {date}: {e}", exc_info=True) + return { + "success": False, + "job_id": self.job_id, + "date": date, + "model": model, + "error": str(e) + } + + def get_job_info(self) -> Dict[str, Any]: + """ + Get job information. + + Returns: + Job data dict + """ + return self.job_manager.get_job(self.job_id) diff --git a/data/runtime_env_test-job_gpt-5_2025-01-16.json b/data/runtime_env_test-job_gpt-5_2025-01-16.json new file mode 100644 index 0000000..faeb1ca --- /dev/null +++ b/data/runtime_env_test-job_gpt-5_2025-01-16.json @@ -0,0 +1,6 @@ +{ + "TODAY_DATE": "2025-01-16", + "SIGNATURE": "gpt-5", + "IF_TRADE": false, + "JOB_ID": "test-job-123" +} \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index a784a0f..00f5c88 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,9 +1,10 @@ services: - ai-trader: + # Batch mode: Run one-time simulations with config file + ai-trader-batch: image: ghcr.io/xe138/ai-trader:latest # Uncomment to build locally instead of pulling: # build: . - container_name: ai-trader-app + container_name: ai-trader-batch volumes: - ${VOLUME_PATH:-.}/data:/app/data - ${VOLUME_PATH:-.}/logs:/app/logs @@ -35,4 +36,58 @@ services: - "${TRADE_HTTP_PORT:-8002}:8002" - "${GETPRICE_HTTP_PORT:-8003}:8003" - "${WEB_HTTP_PORT:-8888}:8888" - restart: on-failure:3 # Restart max 3 times on failure, prevents endless loops + restart: "no" # Batch jobs should not auto-restart + profiles: + - batch # Only start with: docker-compose --profile batch up + + # API mode: REST API server for Windmill integration + ai-trader-api: + image: ghcr.io/xe138/ai-trader:latest + # Uncomment to build locally instead of pulling: + # build: . + container_name: ai-trader-api + entrypoint: ["./entrypoint-api.sh"] + volumes: + - ${VOLUME_PATH:-.}/data:/app/data + - ${VOLUME_PATH:-.}/logs:/app/logs + - ${VOLUME_PATH:-.}/configs:/app/configs + environment: + # AI Model API Configuration + - OPENAI_API_BASE=${OPENAI_API_BASE} + - OPENAI_API_KEY=${OPENAI_API_KEY} + + # Data Source Configuration + - ALPHAADVANTAGE_API_KEY=${ALPHAADVANTAGE_API_KEY} + - JINA_API_KEY=${JINA_API_KEY} + + # System Configuration + - RUNTIME_ENV_PATH=/app/data/runtime_env.json + + # MCP Service Ports (fixed internally) + - MATH_HTTP_PORT=8000 + - SEARCH_HTTP_PORT=8001 + - TRADE_HTTP_PORT=8002 + - GETPRICE_HTTP_PORT=8003 + + # API Configuration + - API_PORT=${API_PORT:-8080} + + # Agent Configuration + - AGENT_MAX_STEP=${AGENT_MAX_STEP:-30} + ports: + # MCP service ports (internal) + - "${MATH_HTTP_PORT:-8000}:8000" + - "${SEARCH_HTTP_PORT:-8001}:8001" + - "${TRADE_HTTP_PORT:-8002}:8002" + - "${GETPRICE_HTTP_PORT:-8003}:8003" + # API server port (primary interface) + - "${API_PORT:-8080}:8080" + # Web dashboard + - "${WEB_HTTP_PORT:-8888}:8888" + restart: unless-stopped # Keep API server running + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s diff --git a/docs/ENHANCED-SPECIFICATIONS-SUMMARY.md b/docs/ENHANCED-SPECIFICATIONS-SUMMARY.md new file mode 100644 index 0000000..7e84497 --- /dev/null +++ b/docs/ENHANCED-SPECIFICATIONS-SUMMARY.md @@ -0,0 +1,631 @@ +# AI-Trader API Service - Enhanced Specifications Summary + +## Changes from Original Specifications + +Based on user feedback, the specifications have been enhanced with: + +1. **SQLite-backed results storage** (instead of reading position.jsonl on-demand) +2. **Comprehensive Python testing suite** with pytest +3. **Defined testing thresholds** for coverage, performance, and quality gates + +--- + +## Document Index + +### Core Specifications (Original) +1. **[api-specification.md](./api-specification.md)** - REST API endpoints and data models +2. **[job-manager-specification.md](./job-manager-specification.md)** - Job tracking and database layer +3. **[worker-specification.md](./worker-specification.md)** - Background worker architecture +4. **[implementation-specifications.md](./implementation-specifications.md)** - Agent, Docker, Windmill integration + +### Enhanced Specifications (New) +5. **[database-enhanced-specification.md](./database-enhanced-specification.md)** - SQLite results storage +6. **[testing-specification.md](./testing-specification.md)** - Comprehensive testing suite + +### Summary Documents +7. **[README-SPECS.md](./README-SPECS.md)** - Original specifications overview +8. **[ENHANCED-SPECIFICATIONS-SUMMARY.md](./ENHANCED-SPECIFICATIONS-SUMMARY.md)** - This document + +--- + +## Key Enhancement #1: SQLite Results Storage + +### What Changed + +**Before:** +- `/results` endpoint reads `position.jsonl` files on-demand +- File I/O on every API request +- No support for advanced queries (date ranges, aggregations) + +**After:** +- Simulation results written to SQLite during execution +- Fast database queries (10-100x faster than file I/O) +- Advanced analytics: timeseries, leaderboards, aggregations + +### New Database Tables + +```sql +-- Results storage +CREATE TABLE positions ( + id INTEGER PRIMARY KEY, + job_id TEXT, + date TEXT, + model TEXT, + action_id INTEGER, + action_type TEXT, + symbol TEXT, + amount INTEGER, + price REAL, + cash REAL, + portfolio_value REAL, + daily_profit REAL, + daily_return_pct REAL, + cumulative_profit REAL, + cumulative_return_pct REAL, + created_at TEXT, + FOREIGN KEY (job_id) REFERENCES jobs(job_id) +); + +CREATE TABLE holdings ( + id INTEGER PRIMARY KEY, + position_id INTEGER, + symbol TEXT, + quantity INTEGER, + FOREIGN KEY (position_id) REFERENCES positions(id) +); + +CREATE TABLE reasoning_logs ( + id INTEGER PRIMARY KEY, + job_id TEXT, + date TEXT, + model TEXT, + step_number INTEGER, + timestamp TEXT, + role TEXT, + content TEXT, + tool_name TEXT, + FOREIGN KEY (job_id) REFERENCES jobs(job_id) +); + +CREATE TABLE tool_usage ( + id INTEGER PRIMARY KEY, + job_id TEXT, + date TEXT, + model TEXT, + tool_name TEXT, + call_count INTEGER, + total_duration_seconds REAL, + FOREIGN KEY (job_id) REFERENCES jobs(job_id) +); +``` + +### New API Endpoints + +```python +# Enhanced results endpoint (now reads from SQLite) +GET /results?date=2025-01-16&model=gpt-5&detail=minimal|full + +# New analytics endpoints +GET /portfolio/timeseries?model=gpt-5&start_date=2025-01-01&end_date=2025-01-31 +GET /leaderboard?date=2025-01-16 # Rankings by portfolio value +``` + +### Migration Strategy + +**Phase 1:** Dual-write mode +- Agent writes to `position.jsonl` (existing code) +- Executor writes to SQLite after agent completes +- Ensures backward compatibility + +**Phase 2:** Verification +- Compare SQLite data vs JSONL data +- Fix any discrepancies + +**Phase 3:** Switch over +- `/results` endpoint reads from SQLite +- JSONL writes become optional (can deprecate later) + +### Performance Improvement + +| Operation | Before (JSONL) | After (SQLite) | Speedup | +|-----------|----------------|----------------|---------| +| Get results for 1 date | 200-500ms | 20-50ms | **10x faster** | +| Get timeseries (30 days) | 6-15 seconds | 100-300ms | **50x faster** | +| Get leaderboard | 5-10 seconds | 50-100ms | **100x faster** | + +--- + +## Key Enhancement #2: Comprehensive Testing Suite + +### Testing Thresholds + +| Metric | Minimum | Target | Enforcement | +|--------|---------|--------|-------------| +| **Code Coverage** | 85% | 90% | CI fails if below | +| **Critical Path Coverage** | 90% | 95% | Manual review | +| **Unit Test Speed** | <10s | <5s | Benchmark tracking | +| **Integration Test Speed** | <60s | <30s | Benchmark tracking | +| **API Response Times** | <500ms | <200ms | Load testing | + +### Test Suite Structure + +``` +tests/ +โ”œโ”€โ”€ unit/ # 80 tests, <10 seconds +โ”‚ โ”œโ”€โ”€ test_job_manager.py # 95% coverage target +โ”‚ โ”œโ”€โ”€ test_database.py +โ”‚ โ”œโ”€โ”€ test_runtime_manager.py +โ”‚ โ”œโ”€โ”€ test_results_service.py # 95% coverage target +โ”‚ โ””โ”€โ”€ test_models.py +โ”‚ +โ”œโ”€โ”€ integration/ # 30 tests, <60 seconds +โ”‚ โ”œโ”€โ”€ test_api_endpoints.py # Full FastAPI testing +โ”‚ โ”œโ”€โ”€ test_worker.py +โ”‚ โ”œโ”€โ”€ test_executor.py +โ”‚ โ””โ”€โ”€ test_end_to_end.py +โ”‚ +โ”œโ”€โ”€ performance/ # 20 tests +โ”‚ โ”œโ”€โ”€ test_database_benchmarks.py +โ”‚ โ”œโ”€โ”€ test_api_load.py # Locust load testing +โ”‚ โ””โ”€โ”€ test_simulation_timing.py +โ”‚ +โ”œโ”€โ”€ security/ # 10 tests +โ”‚ โ”œโ”€โ”€ test_api_security.py # SQL injection, XSS, path traversal +โ”‚ โ””โ”€โ”€ test_auth.py # Future: API key validation +โ”‚ +โ””โ”€โ”€ e2e/ # 10 tests, Docker required + โ””โ”€โ”€ test_docker_workflow.py # Full Docker compose scenario +``` + +### Quality Gates + +**All PRs must pass:** +1. โœ… All tests passing (unit + integration) +2. โœ… Code coverage โ‰ฅ 85% +3. โœ… No critical security vulnerabilities (Bandit scan) +4. โœ… Linting passes (Ruff or Flake8) +5. โœ… Type checking passes (mypy strict mode) +6. โœ… No performance regressions (ยฑ10% tolerance) + +**Release checklist:** +1. โœ… All quality gates pass +2. โœ… End-to-end tests pass in Docker +3. โœ… Load testing passes (100 concurrent requests) +4. โœ… Security scan passes (OWASP ZAP) +5. โœ… Manual smoke tests complete + +### CI/CD Integration + +```yaml +# .github/workflows/test.yml +name: Test Suite + +on: [push, pull_request] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Run unit tests + run: pytest tests/unit/ --cov=api --cov-fail-under=85 + - name: Run integration tests + run: pytest tests/integration/ + - name: Security scan + run: bandit -r api/ -ll + - name: Upload coverage + uses: codecov/codecov-action@v3 +``` + +### Test Coverage Breakdown + +| Component | Minimum | Target | Tests | +|-----------|---------|--------|-------| +| `api/job_manager.py` | 90% | 95% | 25 tests | +| `api/worker.py` | 85% | 90% | 15 tests | +| `api/executor.py` | 85% | 90% | 12 tests | +| `api/results_service.py` | 90% | 95% | 18 tests | +| `api/database.py` | 95% | 100% | 10 tests | +| `api/runtime_manager.py` | 85% | 90% | 8 tests | +| `api/main.py` | 80% | 85% | 20 tests | +| **Total** | **85%** | **90%** | **~150 tests** | + +--- + +## Updated Implementation Plan + +### Phase 1: API Foundation (Days 1-2) +- [x] Create `api/` directory structure +- [ ] Implement `api/models.py` with Pydantic models +- [ ] Implement `api/database.py` with **enhanced schema** (6 tables) +- [ ] Implement `api/job_manager.py` with job CRUD operations +- [ ] **NEW:** Write unit tests for job_manager (target: 95% coverage) +- [ ] Test database operations manually + +**Testing Deliverables:** +- 25 unit tests for job_manager +- 10 unit tests for database utilities +- 85%+ coverage for Phase 1 code + +--- + +### Phase 2: Worker & Executor (Days 3-4) +- [ ] Implement `api/runtime_manager.py` +- [ ] Implement `api/executor.py` for single model-day execution +- [ ] **NEW:** Add SQLite write logic to executor (`_store_results_to_db()`) +- [ ] Implement `api/worker.py` for job orchestration +- [ ] **NEW:** Write unit tests for worker and executor (target: 85% coverage) +- [ ] Test runtime config isolation + +**Testing Deliverables:** +- 15 unit tests for worker +- 12 unit tests for executor +- 8 unit tests for runtime_manager +- 85%+ coverage for Phase 2 code + +--- + +### Phase 3: Results Service & FastAPI Endpoints (Days 5-6) +- [ ] **NEW:** Implement `api/results_service.py` (SQLite-backed) + - [ ] `get_results(date, model, detail)` + - [ ] `get_portfolio_timeseries(model, start_date, end_date)` + - [ ] `get_leaderboard(date)` +- [ ] Implement `api/main.py` with all endpoints + - [ ] `/simulate/trigger` with background tasks + - [ ] `/simulate/status/{job_id}` + - [ ] `/simulate/current` + - [ ] `/results` (now reads from SQLite) + - [ ] **NEW:** `/portfolio/timeseries` + - [ ] **NEW:** `/leaderboard` + - [ ] `/health` with MCP checks +- [ ] **NEW:** Write unit tests for results_service (target: 95% coverage) +- [ ] **NEW:** Write integration tests for API endpoints (target: 80% coverage) +- [ ] Test all endpoints with Postman/curl + +**Testing Deliverables:** +- 18 unit tests for results_service +- 20 integration tests for API endpoints +- Performance benchmarks for database queries +- 85%+ coverage for Phase 3 code + +--- + +### Phase 4: Docker Integration (Day 7) +- [ ] Update `Dockerfile` +- [ ] Create `docker-entrypoint-api.sh` +- [ ] Create `requirements-api.txt` +- [ ] Update `docker-compose.yml` +- [ ] Test Docker build +- [ ] Test container startup and health checks +- [ ] **NEW:** Run E2E tests in Docker environment +- [ ] Test end-to-end simulation via API in Docker + +**Testing Deliverables:** +- 10 E2E tests with Docker +- Docker health check validation +- Performance testing in containerized environment + +--- + +### Phase 5: Windmill Integration (Days 8-9) +- [ ] Create Windmill scripts (trigger, poll, store) +- [ ] **UPDATED:** Modify `store_simulation_results.py` to use new `/results` endpoint +- [ ] Test scripts locally against Docker API +- [ ] Deploy scripts to Windmill instance +- [ ] Create Windmill workflow +- [ ] Test workflow end-to-end +- [ ] Create Windmill dashboard (using new `/portfolio/timeseries` and `/leaderboard` endpoints) +- [ ] Document Windmill setup process + +**Testing Deliverables:** +- Integration tests for Windmill scripts +- End-to-end workflow validation +- Dashboard functionality verification + +--- + +### Phase 6: Testing, Security & Documentation (Day 10) +- [ ] **NEW:** Run full test suite and verify all thresholds met + - [ ] Code coverage โ‰ฅ 85% + - [ ] All ~150 tests passing + - [ ] Performance benchmarks within limits +- [ ] **NEW:** Security testing + - [ ] Bandit scan (Python security issues) + - [ ] SQL injection tests + - [ ] Input validation tests + - [ ] OWASP ZAP scan (optional) +- [ ] **NEW:** Load testing with Locust + - [ ] 100 concurrent users + - [ ] API endpoints within performance thresholds +- [ ] Integration tests for complete workflow +- [ ] Update README.md with API usage +- [ ] Create API documentation (Swagger/OpenAPI - auto-generated by FastAPI) +- [ ] Create deployment guide +- [ ] Create troubleshooting guide +- [ ] **NEW:** Generate test coverage report + +**Testing Deliverables:** +- Full test suite execution report +- Security scan results +- Load testing results +- Coverage report (HTML + XML) +- CI/CD pipeline configuration + +--- + +## New Files Created + +### Database & Results +- `api/results_service.py` - SQLite-backed results retrieval +- `api/import_historical_data.py` - Migration script for existing position.jsonl files + +### Testing Suite +- `tests/conftest.py` - Shared pytest fixtures +- `tests/unit/test_job_manager.py` - 25 tests +- `tests/unit/test_database.py` - 10 tests +- `tests/unit/test_runtime_manager.py` - 8 tests +- `tests/unit/test_results_service.py` - 18 tests +- `tests/unit/test_models.py` - 5 tests +- `tests/integration/test_api_endpoints.py` - 20 tests +- `tests/integration/test_worker.py` - 15 tests +- `tests/integration/test_executor.py` - 12 tests +- `tests/integration/test_end_to_end.py` - 5 tests +- `tests/performance/test_database_benchmarks.py` - 10 tests +- `tests/performance/test_api_load.py` - Locust load testing +- `tests/security/test_api_security.py` - 10 tests +- `tests/e2e/test_docker_workflow.py` - 10 tests +- `pytest.ini` - Test configuration +- `requirements-dev.txt` - Testing dependencies + +### CI/CD +- `.github/workflows/test.yml` - GitHub Actions workflow + +--- + +## Updated File Structure + +``` +AI-Trader/ +โ”œโ”€โ”€ api/ +โ”‚ โ”œโ”€โ”€ __init__.py +โ”‚ โ”œโ”€โ”€ main.py # FastAPI application +โ”‚ โ”œโ”€โ”€ models.py # Pydantic request/response models +โ”‚ โ”œโ”€โ”€ job_manager.py # Job lifecycle management +โ”‚ โ”œโ”€โ”€ database.py # SQLite utilities (enhanced schema) +โ”‚ โ”œโ”€โ”€ worker.py # Background simulation worker +โ”‚ โ”œโ”€โ”€ executor.py # Single model-day execution (+ SQLite writes) +โ”‚ โ”œโ”€โ”€ runtime_manager.py # Runtime config isolation +โ”‚ โ”œโ”€โ”€ results_service.py # NEW: SQLite-backed results retrieval +โ”‚ โ””โ”€โ”€ import_historical_data.py # NEW: JSONL โ†’ SQLite migration +โ”‚ +โ”œโ”€โ”€ tests/ # NEW: Comprehensive test suite +โ”‚ โ”œโ”€โ”€ conftest.py +โ”‚ โ”œโ”€โ”€ unit/ # 80 tests, <10s +โ”‚ โ”œโ”€โ”€ integration/ # 30 tests, <60s +โ”‚ โ”œโ”€โ”€ performance/ # 20 tests +โ”‚ โ”œโ”€โ”€ security/ # 10 tests +โ”‚ โ””โ”€โ”€ e2e/ # 10 tests +โ”‚ +โ”œโ”€โ”€ docs/ +โ”‚ โ”œโ”€โ”€ api-specification.md +โ”‚ โ”œโ”€โ”€ job-manager-specification.md +โ”‚ โ”œโ”€โ”€ worker-specification.md +โ”‚ โ”œโ”€โ”€ implementation-specifications.md +โ”‚ โ”œโ”€โ”€ database-enhanced-specification.md # NEW +โ”‚ โ”œโ”€โ”€ testing-specification.md # NEW +โ”‚ โ”œโ”€โ”€ README-SPECS.md +โ”‚ โ””โ”€โ”€ ENHANCED-SPECIFICATIONS-SUMMARY.md # NEW (this file) +โ”‚ +โ”œโ”€โ”€ data/ +โ”‚ โ”œโ”€โ”€ jobs.db # SQLite database (6 tables) +โ”‚ โ”œโ”€โ”€ runtime_env*.json # Runtime configs (temporary) +โ”‚ โ”œโ”€โ”€ agent_data/ # Existing position/log data +โ”‚ โ””โ”€โ”€ merged.jsonl # Existing price data +โ”‚ +โ”œโ”€โ”€ pytest.ini # NEW: Test configuration +โ”œโ”€โ”€ requirements-dev.txt # NEW: Testing dependencies +โ”œโ”€โ”€ .github/workflows/test.yml # NEW: CI/CD pipeline +โ””โ”€โ”€ ... (existing files) +``` + +--- + +## Benefits Summary + +### Performance +- **10-100x faster** results queries (SQLite vs file I/O) +- **Advanced analytics** - timeseries, leaderboards, aggregations in milliseconds +- **Optimized indexes** for common queries + +### Quality +- **85% minimum coverage** enforced by CI/CD +- **150 comprehensive tests** across unit, integration, performance, security +- **Quality gates** prevent regressions +- **Type safety** with mypy strict mode + +### Maintainability +- **SQLite single source of truth** - easier backup, restore, migration +- **Automated testing** catches bugs early +- **CI/CD integration** provides fast feedback on every commit +- **Security scanning** prevents vulnerabilities + +### Analytics Capabilities + +**New queries enabled by SQLite:** + +```python +# Portfolio timeseries for charting +GET /portfolio/timeseries?model=gpt-5&start_date=2025-01-01&end_date=2025-01-31 + +# Model leaderboard +GET /leaderboard?date=2025-01-31 + +# Advanced filtering (future) +SELECT * FROM positions +WHERE daily_return_pct > 2.0 +ORDER BY portfolio_value DESC; + +# Aggregations (future) +SELECT model, AVG(daily_return_pct) as avg_return +FROM positions +GROUP BY model +ORDER BY avg_return DESC; +``` + +--- + +## Migration from Original Spec + +If you've already started implementation based on original specs: + +### Step 1: Database Schema Migration +```sql +-- Run enhanced schema creation +-- See database-enhanced-specification.md Section 2.1 +``` + +### Step 2: Add Results Service +```bash +# Create new file +touch api/results_service.py +# Implement as per database-enhanced-specification.md Section 4.1 +``` + +### Step 3: Update Executor +```python +# In api/executor.py, add after agent.run_trading_session(): +self._store_results_to_db(job_id, date, model_sig) +``` + +### Step 4: Update API Endpoints +```python +# In api/main.py, update /results endpoint to use ResultsService +from api.results_service import ResultsService +results_service = ResultsService() + +@app.get("/results") +async def get_results(...): + return results_service.get_results(date, model, detail) +``` + +### Step 5: Add Test Suite +```bash +mkdir -p tests/{unit,integration,performance,security,e2e} +# Create test files as per testing-specification.md Section 4-8 +``` + +### Step 6: Configure CI/CD +```bash +mkdir -p .github/workflows +# Create test.yml as per testing-specification.md Section 10.1 +``` + +--- + +## Testing Execution Guide + +### Run Unit Tests +```bash +pytest tests/unit/ -v --cov=api --cov-report=term-missing +``` + +### Run Integration Tests +```bash +pytest tests/integration/ -v +``` + +### Run All Tests (Except E2E) +```bash +pytest tests/ -v --ignore=tests/e2e/ --cov=api --cov-report=html +``` + +### Run E2E Tests (Requires Docker) +```bash +pytest tests/e2e/ -v -s +``` + +### Run Performance Benchmarks +```bash +pytest tests/performance/ --benchmark-only +``` + +### Run Security Tests +```bash +pytest tests/security/ -v +bandit -r api/ -ll +``` + +### Generate Coverage Report +```bash +pytest tests/unit/ tests/integration/ --cov=api --cov-report=html +open htmlcov/index.html # View in browser +``` + +### Run Load Tests +```bash +locust -f tests/performance/test_api_load.py --host=http://localhost:8080 +# Open http://localhost:8089 for Locust UI +``` + +--- + +## Questions & Next Steps + +### Review Checklist + +Please review: +1. โœ… **Enhanced database schema** with 6 tables for comprehensive results storage +2. โœ… **Migration strategy** for backward compatibility (dual-write mode) +3. โœ… **Testing thresholds** (85% coverage minimum, performance benchmarks) +4. โœ… **Test suite structure** (150 tests across 5 categories) +5. โœ… **CI/CD integration** with quality gates +6. โœ… **Updated implementation plan** (10 days, 6 phases) + +### Questions to Consider + +1. **Database migration timing:** Start with dual-write mode immediately, or add in Phase 2? +2. **Testing priorities:** Should we implement tests alongside features (TDD) or after each phase? +3. **CI/CD platform:** GitHub Actions (as specified) or different platform? +4. **Performance baselines:** Should we run benchmarks before implementation to track improvement? +5. **Security priorities:** Which security tests are MVP vs nice-to-have? + +### Ready to Implement? + +**Option A:** Approve specifications and begin Phase 1 implementation +- Create API directory structure +- Implement enhanced database schema +- Write unit tests for database layer +- Target: 2 days, 90%+ coverage for database code + +**Option B:** Request modifications to specifications +- Clarify any unclear requirements +- Adjust testing thresholds +- Modify implementation timeline + +**Option C:** Implement in parallel workstreams +- Workstream 1: Core API (Phases 1-3) +- Workstream 2: Testing suite (parallel with Phase 1-3) +- Workstream 3: Docker + Windmill (Phases 4-5) +- Benefits: Faster delivery, more parallelization +- Requires: Clear interfaces between components + +--- + +## Summary + +**Enhanced specifications** add: +1. ๐Ÿ—„๏ธ **SQLite results storage** - 10-100x faster queries, advanced analytics +2. ๐Ÿงช **Comprehensive testing** - 150 tests, 85% coverage, quality gates +3. ๐Ÿ”’ **Security testing** - SQL injection, XSS, input validation +4. โšก **Performance benchmarks** - Catch regressions early +5. ๐Ÿš€ **CI/CD pipeline** - Automated quality checks on every commit + +**Total effort:** Still ~10 days, but with significantly higher code quality and confidence in deployments. + +**Risk mitigation:** Extensive testing catches bugs before production, preventing costly hotfixes. + +**Long-term value:** Maintainable, well-tested codebase enables rapid feature development. + +--- + +Ready to proceed? Please provide feedback or approval to begin implementation! diff --git a/docs/README-SPECS.md b/docs/README-SPECS.md new file mode 100644 index 0000000..c9fa584 --- /dev/null +++ b/docs/README-SPECS.md @@ -0,0 +1,436 @@ +# AI-Trader API Service - Technical Specifications Summary + +## Overview + +This directory contains comprehensive technical specifications for transforming the AI-Trader batch simulation system into an API service compatible with Windmill automation. + +## Specification Documents + +### 1. [API Specification](./api-specification.md) +**Purpose:** Defines all API endpoints, request/response formats, and data models + +**Key Contents:** +- **5 REST Endpoints:** + - `POST /simulate/trigger` - Queue catch-up simulation job + - `GET /simulate/status/{job_id}` - Poll job progress + - `GET /simulate/current` - Get latest job + - `GET /results` - Retrieve simulation results (minimal/full detail) + - `GET /health` - Service health check +- **Pydantic Models** for type-safe request/response handling +- **Error Handling** strategies and HTTP status codes +- **SQLite Schema** for jobs and job_details tables +- **Configuration Management** via environment variables + +**Status Codes:** 200 OK, 202 Accepted, 400 Bad Request, 404 Not Found, 409 Conflict, 503 Service Unavailable + +--- + +### 2. [Job Manager Specification](./job-manager-specification.md) +**Purpose:** Details the job tracking and database layer + +**Key Contents:** +- **SQLite Database Schema:** + - `jobs` table - High-level job metadata + - `job_details` table - Per model-day execution tracking +- **JobManager Class Interface:** + - `create_job()` - Create new simulation job + - `get_job()` - Retrieve job by ID + - `update_job_status()` - State transitions (pending โ†’ running โ†’ completed/partial/failed) + - `get_job_progress()` - Detailed progress metrics + - `can_start_new_job()` - Concurrency control +- **State Machine:** Job status transitions and business logic +- **Concurrency Control:** Single-job execution enforcement +- **Testing Strategy:** Unit tests with temporary databases + +**Key Feature:** Independent model execution - one model's failure doesn't block others (results in "partial" status) + +--- + +### 3. [Background Worker Specification](./worker-specification.md) +**Purpose:** Defines async job execution architecture + +**Key Contents:** +- **Execution Pattern:** Date-sequential, Model-parallel + - All models for Date 1 run in parallel + - Date 2 starts only after all models finish Date 1 + - Ensures position.jsonl integrity (no concurrent writes) +- **SimulationWorker Class:** + - Orchestrates job execution + - Manages date sequencing + - Handles job-level errors +- **ModelDayExecutor Class:** + - Executes single model-day simulation + - Updates job_detail status + - Isolates runtime configuration +- **RuntimeConfigManager:** + - Creates temporary runtime_env_{job_id}_{model}_{date}.json files + - Prevents state collisions between concurrent models + - Cleans up after execution +- **Error Handling:** Graceful failure (models continue despite peer failures) +- **Logging:** Structured JSON logging with job/model/date context + +**Performance:** 3 models ร— 5 days = ~7-15 minutes (vs. ~22-45 minutes sequential) + +--- + +### 4. [Implementation Specification](./implementation-specifications.md) +**Purpose:** Complete implementation guide covering Agent, Docker, and Windmill + +**Key Contents:** + +#### Part 1: BaseAgent Refactoring +- **Analysis:** Existing `run_trading_session()` already compatible with API mode +- **Required Changes:** โœ… NONE! Existing code works as-is +- **Worker Integration:** Calls `agent.run_trading_session(date)` directly + +#### Part 2: Docker Configuration +- **Modified Dockerfile:** Adds FastAPI dependencies, new entrypoint +- **docker-entrypoint-api.sh:** Starts MCP services โ†’ launches uvicorn +- **Health Checks:** Verifies MCP services and database connectivity +- **Volume Mounts:** `./data`, `./configs` for persistence + +#### Part 3: Windmill Integration +- **Flow 1: trigger_simulation.ts** - Daily cron triggers API +- **Flow 2: poll_simulation_status.ts** - Polls every 5 min until complete +- **Flow 3: store_simulation_results.py** - Stores results in Windmill DB +- **Dashboard:** Charts and tables showing portfolio performance +- **Workflow Orchestration:** Complete YAML workflow definition + +#### Part 4: File Structure +- New `api/` directory with 7 modules +- New `windmill/` directory with scripts and dashboard +- New `docs/` directory (this folder) +- `data/jobs.db` for job tracking + +#### Part 5: Implementation Checklist +10-day implementation plan broken into 6 phases + +--- + +## Architecture Highlights + +### Request Flow + +``` +1. Windmill โ†’ POST /simulate/trigger +2. API creates job in SQLite (status: pending) +3. API queues BackgroundTask +4. API returns 202 Accepted with job_id + โ†“ +5. Worker starts (status: running) +6. For each date sequentially: + For each model in parallel: + - Create isolated runtime config + - Execute agent.run_trading_session(date) + - Update job_detail status +7. Worker finishes (status: completed/partial/failed) + โ†“ +8. Windmill polls GET /simulate/status/{job_id} +9. When complete: Windmill calls GET /results?date=X +10. Windmill stores results in internal DB +11. Windmill dashboard displays performance +``` + +### Data Flow + +``` +Input: configs/default_config.json + โ†“ +API: Calculates date_range (last position โ†’ today) + โ†“ +Worker: Executes simulations + โ†“ +Output: data/agent_data/{model}/position/position.jsonl + data/agent_data/{model}/log/{date}/log.jsonl + data/jobs.db (job tracking) + โ†“ +API: Reads position.jsonl + calculates P&L + โ†“ +Windmill: Stores in internal DB โ†’ Dashboard visualization +``` + +--- + +## Key Design Decisions + +### 1. Pattern B: Lazy On-Demand Processing +- **Chosen:** Windmill controls simulation timing via API calls +- **Benefit:** Centralized scheduling in Windmill +- **Tradeoff:** First Windmill call of the day triggers long-running job + +### 2. SQLite vs. PostgreSQL +- **Chosen:** SQLite for MVP +- **Rationale:** Low concurrency (1 job at a time), simple deployment +- **Future:** PostgreSQL for production with multiple concurrent jobs + +### 3. Date-Sequential, Model-Parallel Execution +- **Chosen:** Dates run sequentially, models run in parallel per date +- **Rationale:** Prevents position.jsonl race conditions, faster than fully sequential +- **Performance:** ~50% faster than sequential (3 models in parallel) + +### 4. Independent Model Failures +- **Chosen:** One model's failure doesn't block others +- **Benefit:** Partial results better than no results +- **Implementation:** Job status becomes "partial" if any model fails + +### 5. Minimal BaseAgent Changes +- **Chosen:** No modifications to agent code +- **Rationale:** Existing `run_trading_session()` is perfect API interface +- **Benefit:** Maintains backward compatibility with batch mode + +--- + +## Implementation Prerequisites + +### Required Environment Variables +```bash +OPENAI_API_BASE=... +OPENAI_API_KEY=... +ALPHAADVANTAGE_API_KEY=... +JINA_API_KEY=... +RUNTIME_ENV_PATH=/app/data/runtime_env.json +MATH_HTTP_PORT=8000 +SEARCH_HTTP_PORT=8001 +TRADE_HTTP_PORT=8002 +GETPRICE_HTTP_PORT=8003 +API_HOST=0.0.0.0 +API_PORT=8080 +``` + +### Required Python Packages (new) +``` +fastapi==0.109.0 +uvicorn[standard]==0.27.0 +pydantic==2.5.3 +``` + +### Docker Requirements +- Docker Engine 20.10+ +- Docker Compose 2.0+ +- 2GB RAM minimum for container +- 10GB disk space for data + +### Windmill Requirements +- Windmill instance (self-hosted or cloud) +- Network access from Windmill to AI-Trader API +- Windmill CLI for deployment (optional) + +--- + +## Testing Strategy + +### Unit Tests +- `tests/test_job_manager.py` - Database operations +- `tests/test_worker.py` - Job execution logic +- `tests/test_executor.py` - Model-day execution + +### Integration Tests +- `tests/test_api_endpoints.py` - FastAPI endpoint behavior +- `tests/test_end_to_end.py` - Full workflow (trigger โ†’ execute โ†’ retrieve) + +### Manual Testing +- Docker container startup +- Health check endpoint +- Windmill workflow execution +- Dashboard visualization + +--- + +## Performance Expectations + +### Single Model-Day Execution +- **Duration:** 30-60 seconds (varies by AI model latency) +- **Bottlenecks:** AI API calls, MCP tool latency + +### Multi-Model Job +- **Example:** 3 models ร— 5 days = 15 model-days +- **Parallel Execution:** ~7-15 minutes +- **Sequential Execution:** ~22-45 minutes +- **Speedup:** ~3x (number of models) + +### API Response Times +- `/simulate/trigger`: < 1 second (just queues job) +- `/simulate/status`: < 100ms (SQLite query) +- `/results?detail=minimal`: < 500ms (file read + JSON parsing) +- `/results?detail=full`: < 2 seconds (parse log files) + +--- + +## Security Considerations + +### MVP Security +- **Network Isolation:** Docker network (no public exposure) +- **No Authentication:** Assumes Windmill โ†’ API is trusted network + +### Future Enhancements +- API key authentication (`X-API-Key` header) +- Rate limiting per client +- HTTPS/TLS encryption +- Input sanitization for path traversal prevention + +--- + +## Deployment Steps + +### 1. Build Docker Image +```bash +docker-compose build +``` + +### 2. Start API Service +```bash +docker-compose up -d +``` + +### 3. Verify Health +```bash +curl http://localhost:8080/health +``` + +### 4. Test Trigger +```bash +curl -X POST http://localhost:8080/simulate/trigger \ + -H "Content-Type: application/json" \ + -d '{"config_path": "configs/default_config.json"}' +``` + +### 5. Deploy Windmill Scripts +```bash +wmill script push windmill/trigger_simulation.ts +wmill script push windmill/poll_simulation_status.ts +wmill script push windmill/store_simulation_results.py +``` + +### 6. Create Windmill Workflow +- Import `windmill/daily_simulation_workflow.yaml` +- Configure resource `ai_trader_api` with API URL +- Set cron schedule (daily 6 AM) + +### 7. Create Windmill Dashboard +- Import `windmill/dashboard.json` +- Verify data visualization + +--- + +## Troubleshooting Guide + +### Issue: Health check fails +**Symptoms:** `curl http://localhost:8080/health` returns 503 + +**Possible Causes:** +1. MCP services not running +2. Database file permission error +3. API server not started + +**Solutions:** +```bash +# Check MCP services +docker-compose exec ai-trader curl http://localhost:8000/health + +# Check API logs +docker-compose logs -f ai-trader + +# Restart container +docker-compose restart +``` + +### Issue: Job stuck in "running" status +**Symptoms:** Job never completes, status remains "running" + +**Possible Causes:** +1. Agent execution crashed +2. Model API timeout +3. Worker process died + +**Solutions:** +```bash +# Check job details for error messages +curl http://localhost:8080/simulate/status/{job_id} + +# Check container logs +docker-compose logs -f ai-trader + +# If API restarted, stale jobs are marked as failed on startup +docker-compose restart +``` + +### Issue: Windmill can't reach API +**Symptoms:** Connection refused from Windmill scripts + +**Solutions:** +- Verify Windmill and AI-Trader on same Docker network +- Check firewall rules +- Use container name (ai-trader) instead of localhost in Windmill resource +- Verify API_PORT environment variable + +--- + +## Migration from Batch Mode + +### For Users Currently Running Batch Mode + +**Option 1: Dual Mode (Recommended)** +- Keep existing `main.py` for manual testing +- Add new API mode for production automation +- Use different config files for each mode + +**Option 2: API-Only** +- Replace batch execution entirely +- All simulations via API calls +- More consistent with production workflow + +### Migration Checklist +- [ ] Backup existing `data/` directory +- [ ] Update `.env` with API configuration +- [ ] Test API mode in separate environment first +- [ ] Gradually migrate Windmill workflows +- [ ] Monitor logs for errors +- [ ] Validate results match batch mode output + +--- + +## Next Steps + +1. **Review Specifications** + - Read all 4 specification documents + - Ask clarifying questions + - Approve design before implementation + +2. **Implementation Phase 1** (Days 1-2) + - Set up `api/` directory structure + - Implement database and job_manager + - Write unit tests + +3. **Implementation Phase 2** (Days 3-4) + - Implement worker and executor + - Test with mock agents + +4. **Implementation Phase 3** (Days 5-6) + - Implement FastAPI endpoints + - Test with Postman/curl + +5. **Implementation Phase 4** (Day 7) + - Docker integration + - End-to-end testing + +6. **Implementation Phase 5** (Days 8-9) + - Windmill integration + - Dashboard creation + +7. **Implementation Phase 6** (Day 10) + - Final testing + - Documentation + +--- + +## Questions or Feedback? + +Please review all specifications and provide feedback on: +1. API endpoint design +2. Database schema +3. Execution pattern (date-sequential, model-parallel) +4. Error handling approach +5. Windmill integration workflow +6. Any concerns or suggested improvements + +**Ready to proceed with implementation?** Confirm approval of specifications to begin Phase 1. diff --git a/docs/api-specification.md b/docs/api-specification.md new file mode 100644 index 0000000..73a8acc --- /dev/null +++ b/docs/api-specification.md @@ -0,0 +1,837 @@ +# AI-Trader API Service - Technical Specification + +## 1. API Endpoints Specification + +### 1.1 POST /simulate/trigger + +**Purpose:** Trigger a catch-up simulation from the last completed date to the most recent trading day. + +**Request:** +```http +POST /simulate/trigger HTTP/1.1 +Content-Type: application/json + +{ + "config_path": "configs/default_config.json" // Optional: defaults to configs/default_config.json +} +``` + +**Response (202 Accepted):** +```json +{ + "job_id": "550e8400-e29b-41d4-a716-446655440000", + "status": "accepted", + "date_range": ["2025-01-16", "2025-01-17", "2025-01-20"], + "models": ["claude-3.7-sonnet", "gpt-5"], + "created_at": "2025-01-20T14:30:00Z", + "message": "Simulation job queued successfully" +} +``` + +**Response (200 OK - Job Already Running):** +```json +{ + "job_id": "550e8400-e29b-41d4-a716-446655440000", + "status": "running", + "date_range": ["2025-01-16", "2025-01-17", "2025-01-20"], + "models": ["claude-3.7-sonnet", "gpt-5"], + "progress": { + "total_model_days": 6, + "completed": 3, + "failed": 0, + "current": { + "date": "2025-01-17", + "model": "gpt-5" + } + }, + "created_at": "2025-01-20T14:25:00Z", + "message": "Simulation already in progress" +} +``` + +**Response (200 OK - Already Up To Date):** +```json +{ + "status": "current", + "message": "Simulation already up-to-date", + "last_simulation_date": "2025-01-20", + "next_trading_day": "2025-01-21" +} +``` + +**Response (409 Conflict):** +```json +{ + "error": "conflict", + "message": "Different simulation already running", + "current_job_id": "previous-job-uuid", + "current_date_range": ["2025-01-10", "2025-01-15"] +} +``` + +**Business Logic:** +1. Load configuration from `config_path` (or default) +2. Determine last completed date from each model's `position.jsonl` +3. Calculate date range: `max(last_dates) + 1 day` โ†’ `most_recent_trading_day` +4. Filter for weekdays only (Monday-Friday) +5. If date_range is empty, return "already up-to-date" +6. Check for existing jobs with same date range โ†’ return existing job +7. Check for running jobs with different date range โ†’ return 409 +8. Create new job in SQLite with status=`pending` +9. Queue background task to execute simulation +10. Return 202 with job details + +--- + +### 1.2 GET /simulate/status/{job_id} + +**Purpose:** Poll the status and progress of a simulation job. + +**Request:** +```http +GET /simulate/status/550e8400-e29b-41d4-a716-446655440000 HTTP/1.1 +``` + +**Response (200 OK - Running):** +```json +{ + "job_id": "550e8400-e29b-41d4-a716-446655440000", + "status": "running", + "date_range": ["2025-01-16", "2025-01-17", "2025-01-20"], + "models": ["claude-3.7-sonnet", "gpt-5"], + "progress": { + "total_model_days": 6, + "completed": 3, + "failed": 0, + "current": { + "date": "2025-01-17", + "model": "gpt-5" + }, + "details": [ + {"date": "2025-01-16", "model": "claude-3.7-sonnet", "status": "completed", "duration_seconds": 45.2}, + {"date": "2025-01-16", "model": "gpt-5", "status": "completed", "duration_seconds": 38.7}, + {"date": "2025-01-17", "model": "claude-3.7-sonnet", "status": "completed", "duration_seconds": 42.1}, + {"date": "2025-01-17", "model": "gpt-5", "status": "running", "duration_seconds": null} + ] + }, + "created_at": "2025-01-20T14:25:00Z", + "updated_at": "2025-01-20T14:27:15Z" +} +``` + +**Response (200 OK - Completed):** +```json +{ + "job_id": "550e8400-e29b-41d4-a716-446655440000", + "status": "completed", + "date_range": ["2025-01-16", "2025-01-17", "2025-01-20"], + "models": ["claude-3.7-sonnet", "gpt-5"], + "progress": { + "total_model_days": 6, + "completed": 6, + "failed": 0, + "details": [ + {"date": "2025-01-16", "model": "claude-3.7-sonnet", "status": "completed", "duration_seconds": 45.2}, + {"date": "2025-01-16", "model": "gpt-5", "status": "completed", "duration_seconds": 38.7}, + {"date": "2025-01-17", "model": "claude-3.7-sonnet", "status": "completed", "duration_seconds": 42.1}, + {"date": "2025-01-17", "model": "gpt-5", "status": "completed", "duration_seconds": 40.3}, + {"date": "2025-01-20", "model": "claude-3.7-sonnet", "status": "completed", "duration_seconds": 43.8}, + {"date": "2025-01-20", "model": "gpt-5", "status": "completed", "duration_seconds": 39.1} + ] + }, + "created_at": "2025-01-20T14:25:00Z", + "completed_at": "2025-01-20T14:29:45Z", + "total_duration_seconds": 285.0 +} +``` + +**Response (200 OK - Partial Failure):** +```json +{ + "job_id": "550e8400-e29b-41d4-a716-446655440000", + "status": "partial", + "date_range": ["2025-01-16", "2025-01-17", "2025-01-20"], + "models": ["claude-3.7-sonnet", "gpt-5"], + "progress": { + "total_model_days": 6, + "completed": 4, + "failed": 2, + "details": [ + {"date": "2025-01-16", "model": "claude-3.7-sonnet", "status": "completed", "duration_seconds": 45.2}, + {"date": "2025-01-16", "model": "gpt-5", "status": "completed", "duration_seconds": 38.7}, + {"date": "2025-01-17", "model": "claude-3.7-sonnet", "status": "failed", "error": "MCP service timeout after 3 retries", "duration_seconds": null}, + {"date": "2025-01-17", "model": "gpt-5", "status": "completed", "duration_seconds": 40.3}, + {"date": "2025-01-20", "model": "claude-3.7-sonnet", "status": "completed", "duration_seconds": 43.8}, + {"date": "2025-01-20", "model": "gpt-5", "status": "failed", "error": "AI model API timeout", "duration_seconds": null} + ] + }, + "created_at": "2025-01-20T14:25:00Z", + "completed_at": "2025-01-20T14:29:45Z" +} +``` + +**Response (404 Not Found):** +```json +{ + "error": "not_found", + "message": "Job not found", + "job_id": "invalid-job-id" +} +``` + +**Business Logic:** +1. Query SQLite jobs table for job_id +2. If not found, return 404 +3. Return job metadata + progress from job_details table +4. Status transitions: `pending` โ†’ `running` โ†’ `completed`/`partial`/`failed` + +--- + +### 1.3 GET /simulate/current + +**Purpose:** Get the most recent simulation job (for Windmill to discover job_id). + +**Request:** +```http +GET /simulate/current HTTP/1.1 +``` + +**Response (200 OK):** +```json +{ + "job_id": "550e8400-e29b-41d4-a716-446655440000", + "status": "running", + "date_range": ["2025-01-16", "2025-01-17"], + "models": ["claude-3.7-sonnet", "gpt-5"], + "progress": { + "total_model_days": 4, + "completed": 2, + "failed": 0 + }, + "created_at": "2025-01-20T14:25:00Z" +} +``` + +**Response (404 Not Found):** +```json +{ + "error": "not_found", + "message": "No simulation jobs found" +} +``` + +**Business Logic:** +1. Query SQLite: `SELECT * FROM jobs ORDER BY created_at DESC LIMIT 1` +2. Return job details with progress summary + +--- + +### 1.4 GET /results + +**Purpose:** Retrieve simulation results for a specific date and model. + +**Request:** +```http +GET /results?date=2025-01-15&model=gpt-5&detail=minimal HTTP/1.1 +``` + +**Query Parameters:** +- `date` (required): Trading date in YYYY-MM-DD format +- `model` (optional): Model signature (if omitted, returns all models) +- `detail` (optional): Response detail level + - `minimal` (default): Positions + daily P&L + - `full`: + trade history + AI reasoning logs + tool usage stats + +**Response (200 OK - minimal):** +```json +{ + "date": "2025-01-15", + "results": [ + { + "model": "gpt-5", + "positions": { + "AAPL": 10, + "MSFT": 5, + "NVDA": 0, + "CASH": 8500.00 + }, + "daily_pnl": { + "profit": 150.50, + "return_pct": 1.5, + "portfolio_value": 10150.50 + } + } + ] +} +``` + +**Response (200 OK - full):** +```json +{ + "date": "2025-01-15", + "results": [ + { + "model": "gpt-5", + "positions": { + "AAPL": 10, + "MSFT": 5, + "CASH": 8500.00 + }, + "daily_pnl": { + "profit": 150.50, + "return_pct": 1.5, + "portfolio_value": 10150.50 + }, + "trades": [ + { + "id": 1, + "action": "buy", + "symbol": "AAPL", + "amount": 10, + "price": 255.88, + "total": 2558.80 + } + ], + "ai_reasoning": { + "total_steps": 15, + "stop_signal_received": true, + "reasoning_summary": "Market analysis indicated strong buy signal for AAPL...", + "tool_usage": { + "search": 3, + "get_price": 5, + "math": 2, + "trade": 1 + } + }, + "log_file_path": "data/agent_data/gpt-5/log/2025-01-15/log.jsonl" + } + ] +} +``` + +**Response (400 Bad Request):** +```json +{ + "error": "invalid_date", + "message": "Date must be in YYYY-MM-DD format" +} +``` + +**Response (404 Not Found):** +```json +{ + "error": "no_data", + "message": "No simulation data found for date 2025-01-15 and model gpt-5" +} +``` + +**Business Logic:** +1. Validate date format +2. Read `position.jsonl` for specified model(s) and date +3. For `detail=minimal`: Return positions + calculate daily P&L +4. For `detail=full`: + - Parse `log.jsonl` to extract reasoning summary + - Count tool usage from log messages + - Extract trades from position file +5. Return aggregated results + +--- + +### 1.5 GET /health + +**Purpose:** Health check endpoint for Docker and monitoring. + +**Request:** +```http +GET /health HTTP/1.1 +``` + +**Response (200 OK):** +```json +{ + "status": "healthy", + "timestamp": "2025-01-20T14:30:00Z", + "services": { + "mcp_math": {"status": "up", "url": "http://localhost:8000/mcp"}, + "mcp_search": {"status": "up", "url": "http://localhost:8001/mcp"}, + "mcp_trade": {"status": "up", "url": "http://localhost:8002/mcp"}, + "mcp_getprice": {"status": "up", "url": "http://localhost:8003/mcp"} + }, + "storage": { + "data_directory": "/app/data", + "writable": true, + "free_space_mb": 15234 + }, + "database": { + "status": "connected", + "path": "/app/data/jobs.db" + } +} +``` + +**Response (503 Service Unavailable):** +```json +{ + "status": "unhealthy", + "timestamp": "2025-01-20T14:30:00Z", + "services": { + "mcp_math": {"status": "down", "url": "http://localhost:8000/mcp", "error": "Connection refused"}, + "mcp_search": {"status": "up", "url": "http://localhost:8001/mcp"}, + "mcp_trade": {"status": "up", "url": "http://localhost:8002/mcp"}, + "mcp_getprice": {"status": "up", "url": "http://localhost:8003/mcp"} + }, + "storage": { + "data_directory": "/app/data", + "writable": true + }, + "database": { + "status": "connected" + } +} +``` + +--- + +## 2. Data Models + +### 2.1 SQLite Schema + +**Table: jobs** +```sql +CREATE TABLE jobs ( + job_id TEXT PRIMARY KEY, + config_path TEXT NOT NULL, + status TEXT NOT NULL CHECK(status IN ('pending', 'running', 'completed', 'partial', 'failed')), + date_range TEXT NOT NULL, -- JSON array of dates + models TEXT NOT NULL, -- JSON array of model signatures + created_at TEXT NOT NULL, + started_at TEXT, + completed_at TEXT, + total_duration_seconds REAL, + error TEXT +); + +CREATE INDEX idx_jobs_status ON jobs(status); +CREATE INDEX idx_jobs_created_at ON jobs(created_at DESC); +``` + +**Table: job_details** +```sql +CREATE TABLE job_details ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + job_id TEXT NOT NULL, + date TEXT NOT NULL, + model TEXT NOT NULL, + status TEXT NOT NULL CHECK(status IN ('pending', 'running', 'completed', 'failed')), + started_at TEXT, + completed_at TEXT, + duration_seconds REAL, + error TEXT, + FOREIGN KEY (job_id) REFERENCES jobs(job_id) ON DELETE CASCADE +); + +CREATE INDEX idx_job_details_job_id ON job_details(job_id); +CREATE INDEX idx_job_details_status ON job_details(status); +``` + +### 2.2 Pydantic Models + +**Request Models:** +```python +from pydantic import BaseModel, Field +from typing import Optional, Literal + +class TriggerSimulationRequest(BaseModel): + config_path: Optional[str] = Field(default="configs/default_config.json", description="Path to configuration file") + +class ResultsQueryParams(BaseModel): + date: str = Field(..., pattern=r"^\d{4}-\d{2}-\d{2}$", description="Date in YYYY-MM-DD format") + model: Optional[str] = Field(None, description="Model signature filter") + detail: Literal["minimal", "full"] = Field(default="minimal", description="Response detail level") +``` + +**Response Models:** +```python +class JobProgress(BaseModel): + total_model_days: int + completed: int + failed: int + current: Optional[dict] = None # {"date": str, "model": str} + details: Optional[list] = None # List of JobDetailResponse + +class TriggerSimulationResponse(BaseModel): + job_id: str + status: str + date_range: list[str] + models: list[str] + created_at: str + message: str + progress: Optional[JobProgress] = None + +class JobStatusResponse(BaseModel): + job_id: str + status: str + date_range: list[str] + models: list[str] + progress: JobProgress + created_at: str + updated_at: Optional[str] = None + completed_at: Optional[str] = None + total_duration_seconds: Optional[float] = None + +class DailyPnL(BaseModel): + profit: float + return_pct: float + portfolio_value: float + +class Trade(BaseModel): + id: int + action: str + symbol: str + amount: int + price: Optional[float] = None + total: Optional[float] = None + +class AIReasoning(BaseModel): + total_steps: int + stop_signal_received: bool + reasoning_summary: str + tool_usage: dict[str, int] + +class ModelResult(BaseModel): + model: str + positions: dict[str, float] + daily_pnl: DailyPnL + trades: Optional[list[Trade]] = None + ai_reasoning: Optional[AIReasoning] = None + log_file_path: Optional[str] = None + +class ResultsResponse(BaseModel): + date: str + results: list[ModelResult] +``` + +--- + +## 3. Configuration Management + +### 3.1 Environment Variables + +Required environment variables remain the same as batch mode: +```bash +# OpenAI API Configuration +OPENAI_API_BASE=https://api.openai.com/v1 +OPENAI_API_KEY=sk-... + +# Alpha Vantage API +ALPHAADVANTAGE_API_KEY=... + +# Jina Search API +JINA_API_KEY=... + +# Runtime Config Path (now shared by API and worker) +RUNTIME_ENV_PATH=/app/data/runtime_env.json + +# MCP Service Ports +MATH_HTTP_PORT=8000 +SEARCH_HTTP_PORT=8001 +TRADE_HTTP_PORT=8002 +GETPRICE_HTTP_PORT=8003 + +# API Server Configuration +API_HOST=0.0.0.0 +API_PORT=8080 + +# Job Configuration +MAX_CONCURRENT_JOBS=1 # Only one simulation job at a time +``` + +### 3.2 Runtime State Management + +**Challenge:** Multiple model-days running concurrently need isolated `runtime_env.json` state. + +**Solution:** Per-job runtime config files +- `runtime_env_base.json` - Template +- `runtime_env_{job_id}_{model}_{date}.json` - Job-specific runtime config +- Worker passes custom `RUNTIME_ENV_PATH` to each simulation execution + +**Modified `write_config_value()` and `get_config_value()`:** +- Accept optional `runtime_path` parameter +- Worker manages lifecycle: create โ†’ use โ†’ cleanup + +--- + +## 4. Error Handling + +### 4.1 Error Response Format + +All errors follow this structure: +```json +{ + "error": "error_code", + "message": "Human-readable error description", + "details": { + // Optional additional context + } +} +``` + +### 4.2 HTTP Status Codes + +- `200 OK` - Successful request +- `202 Accepted` - Job queued successfully +- `400 Bad Request` - Invalid input parameters +- `404 Not Found` - Resource not found (job, results) +- `409 Conflict` - Concurrent job conflict +- `500 Internal Server Error` - Unexpected server error +- `503 Service Unavailable` - Health check failed + +### 4.3 Retry Strategy for Workers + +Models run independently - failure of one model doesn't block others: +```python +async def run_model_day(job_id: str, date: str, model_config: dict): + try: + # Execute simulation for this model-day + await agent.run_trading_session(date) + update_job_detail_status(job_id, date, model, "completed") + except Exception as e: + # Log error, update status to failed, continue with next model-day + update_job_detail_status(job_id, date, model, "failed", error=str(e)) + # Do NOT raise - let other models continue +``` + +--- + +## 5. Concurrency & Locking + +### 5.1 Job Execution Policy + +**Rule:** Maximum 1 running job at a time (configurable via `MAX_CONCURRENT_JOBS`) + +**Enforcement:** +```python +def can_start_new_job() -> bool: + running_jobs = db.query( + "SELECT COUNT(*) FROM jobs WHERE status IN ('pending', 'running')" + ).fetchone()[0] + return running_jobs < MAX_CONCURRENT_JOBS +``` + +### 5.2 Position File Concurrency + +**Challenge:** Multiple model-days writing to same model's `position.jsonl` + +**Solution:** Sequential execution per model +```python +# For each date in date_range: +# For each model in parallel: โ† Models run in parallel +# Execute model-day sequentially โ† Dates for same model run sequentially +``` + +**Execution Pattern:** +``` +Date 2025-01-16: + - Model A (running) + - Model B (running) + - Model C (running) + +Date 2025-01-17: โ† Starts only after all models finish 2025-01-16 + - Model A (running) + - Model B (running) + - Model C (running) +``` + +**Rationale:** +- Models write to different position files โ†’ No conflict +- Same model's dates run sequentially โ†’ No race condition on position.jsonl +- Date-level parallelism across models โ†’ Faster overall execution + +--- + +## 6. Performance Considerations + +### 6.1 Execution Time Estimates + +Based on current implementation: +- Single model-day: ~30-60 seconds (depends on AI model latency + tool calls) +- 3 models ร— 5 days = 15 model-days โ‰ˆ 7.5-15 minutes (parallel execution) + +### 6.2 Timeout Configuration + +**API Request Timeout:** +- `/simulate/trigger`: 10 seconds (just queue job) +- `/simulate/status`: 5 seconds (read from DB) +- `/results`: 30 seconds (file I/O + parsing) + +**Worker Timeout:** +- Per model-day: 5 minutes (inherited from `max_retries` ร— `base_delay`) +- Entire job: No timeout (job runs until all model-days complete or fail) + +### 6.3 Optimization Opportunities (Future) + +1. **Results caching:** Store computed daily_pnl in SQLite to avoid recomputation +2. **Parallel date execution:** If position file locking is implemented, run dates in parallel +3. **Streaming responses:** For `/simulate/status`, use SSE to push updates instead of polling + +--- + +## 7. Logging & Observability + +### 7.1 Structured Logging + +All API logs use JSON format: +```json +{ + "timestamp": "2025-01-20T14:30:00Z", + "level": "INFO", + "logger": "api.worker", + "message": "Starting simulation for model-day", + "job_id": "550e8400-...", + "date": "2025-01-16", + "model": "gpt-5" +} +``` + +### 7.2 Log Levels + +- `DEBUG` - Detailed execution flow (tool calls, price fetches) +- `INFO` - Job lifecycle events (created, started, completed) +- `WARNING` - Recoverable errors (retry attempts) +- `ERROR` - Model-day failures (logged but job continues) +- `CRITICAL` - System failures (MCP services down, DB corruption) + +### 7.3 Audit Trail + +All job state transitions logged to `api_audit.log`: +```json +{ + "timestamp": "2025-01-20T14:30:00Z", + "event": "job_created", + "job_id": "550e8400-...", + "user": "windmill-service", // Future: from auth header + "details": {"date_range": [...], "models": [...]} +} +``` + +--- + +## 8. Security Considerations + +### 8.1 Authentication (Future) + +For MVP, API relies on network isolation (Docker network). Future enhancements: +- API key authentication via header: `X-API-Key: ` +- JWT tokens for Windmill integration +- Rate limiting per API key + +### 8.2 Input Validation + +- All date parameters validated with regex: `^\d{4}-\d{2}-\d{2}$` +- Config paths restricted to `configs/` directory (prevent path traversal) +- Model signatures sanitized (alphanumeric + hyphens only) + +### 8.3 File Access Controls + +- Results API only reads from `data/agent_data/` directory +- Config API only reads from `configs/` directory +- No arbitrary file read via API parameters + +--- + +## 9. Deployment Configuration + +### 9.1 Docker Compose + +```yaml +version: '3.8' + +services: + ai-trader-api: + build: + context: . + dockerfile: Dockerfile + ports: + - "8080:8080" + volumes: + - ./data:/app/data + - ./configs:/app/configs + env_file: + - .env + environment: + - MODE=api + - API_PORT=8080 + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + restart: unless-stopped +``` + +### 9.2 Dockerfile Modifications + +```dockerfile +# ... existing layers ... + +# Install API dependencies +COPY requirements-api.txt /app/ +RUN pip install --no-cache-dir -r requirements-api.txt + +# Copy API application code +COPY api/ /app/api/ + +# Copy entrypoint script +COPY docker-entrypoint.sh /app/ +RUN chmod +x /app/docker-entrypoint.sh + +EXPOSE 8080 + +CMD ["/app/docker-entrypoint.sh"] +``` + +### 9.3 Entrypoint Script + +```bash +#!/bin/bash +set -e + +echo "Starting MCP services..." +cd /app/agent_tools +python start_mcp_services.py & +MCP_PID=$! + +echo "Waiting for MCP services to be ready..." +sleep 10 + +echo "Starting API server..." +cd /app +uvicorn api.main:app --host ${API_HOST:-0.0.0.0} --port ${API_PORT:-8080} --workers 1 + +# Cleanup on exit +trap "kill $MCP_PID 2>/dev/null || true" EXIT +``` + +--- + +## 10. API Versioning (Future) + +For v2 and beyond: +- URL prefix: `/api/v1/simulate/trigger`, `/api/v2/simulate/trigger` +- Header-based: `Accept: application/vnd.ai-trader.v1+json` + +MVP uses unversioned endpoints (implied v1). + +--- + +## Next Steps + +After reviewing this specification, we'll proceed to: +1. **Component 2:** Job Manager & SQLite Schema Implementation +2. **Component 3:** Background Worker Architecture +3. **Component 4:** BaseAgent Refactoring for Single-Day Execution +4. **Component 5:** Docker & Deployment Configuration +5. **Component 6:** Windmill Integration Flows + +Please review this API specification and provide feedback or approval to continue. diff --git a/docs/database-enhanced-specification.md b/docs/database-enhanced-specification.md new file mode 100644 index 0000000..e7abcb5 --- /dev/null +++ b/docs/database-enhanced-specification.md @@ -0,0 +1,911 @@ +# Enhanced Database Specification - Results Storage in SQLite + +## 1. Overview + +**Change from Original Spec:** Instead of reading `position.jsonl` on-demand, simulation results are written to SQLite during execution for faster retrieval and queryability. + +**Benefits:** +- **Faster `/results` endpoint** - No file I/O on every request +- **Advanced querying** - Filter by date range, model, performance metrics +- **Aggregations** - Portfolio timeseries, leaderboards, statistics +- **Data integrity** - Single source of truth with ACID guarantees +- **Backup/restore** - Single database file instead of scattered JSONL files + +**Tradeoff:** Additional database writes during simulation (minimal performance impact) + +--- + +## 2. Enhanced Database Schema + +### 2.1 Complete Table Structure + +```sql +-- Job tracking tables (from original spec) +CREATE TABLE IF NOT EXISTS jobs ( + job_id TEXT PRIMARY KEY, + config_path TEXT NOT NULL, + status TEXT NOT NULL CHECK(status IN ('pending', 'running', 'completed', 'partial', 'failed')), + date_range TEXT NOT NULL, + models TEXT NOT NULL, + created_at TEXT NOT NULL, + started_at TEXT, + completed_at TEXT, + total_duration_seconds REAL, + error TEXT +); + +CREATE TABLE IF NOT EXISTS job_details ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + job_id TEXT NOT NULL, + date TEXT NOT NULL, + model TEXT NOT NULL, + status TEXT NOT NULL CHECK(status IN ('pending', 'running', 'completed', 'failed')), + started_at TEXT, + completed_at TEXT, + duration_seconds REAL, + error TEXT, + FOREIGN KEY (job_id) REFERENCES jobs(job_id) ON DELETE CASCADE +); + +-- NEW: Simulation results storage +CREATE TABLE IF NOT EXISTS positions ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + job_id TEXT NOT NULL, + date TEXT NOT NULL, + model TEXT NOT NULL, + action_id INTEGER NOT NULL, -- Sequence number within that day + action_type TEXT CHECK(action_type IN ('buy', 'sell', 'no_trade')), + symbol TEXT, + amount INTEGER, + price REAL, + cash REAL NOT NULL, + portfolio_value REAL NOT NULL, + daily_profit REAL, + daily_return_pct REAL, + cumulative_profit REAL, + cumulative_return_pct REAL, + created_at TEXT NOT NULL, + FOREIGN KEY (job_id) REFERENCES jobs(job_id) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS holdings ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + position_id INTEGER NOT NULL, + symbol TEXT NOT NULL, + quantity INTEGER NOT NULL, + FOREIGN KEY (position_id) REFERENCES positions(id) ON DELETE CASCADE +); + +-- NEW: AI reasoning logs (optional - for detail=full) +CREATE TABLE IF NOT EXISTS reasoning_logs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + job_id TEXT NOT NULL, + date TEXT NOT NULL, + model TEXT NOT NULL, + step_number INTEGER NOT NULL, + timestamp TEXT NOT NULL, + role TEXT CHECK(role IN ('user', 'assistant', 'tool')), + content TEXT, + tool_name TEXT, + FOREIGN KEY (job_id) REFERENCES jobs(job_id) ON DELETE CASCADE +); + +-- NEW: Tool usage statistics +CREATE TABLE IF NOT EXISTS tool_usage ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + job_id TEXT NOT NULL, + date TEXT NOT NULL, + model TEXT NOT NULL, + tool_name TEXT NOT NULL, + call_count INTEGER NOT NULL DEFAULT 1, + total_duration_seconds REAL, + FOREIGN KEY (job_id) REFERENCES jobs(job_id) ON DELETE CASCADE +); + +-- Indexes for performance +CREATE INDEX IF NOT EXISTS idx_jobs_status ON jobs(status); +CREATE INDEX IF NOT EXISTS idx_jobs_created_at ON jobs(created_at DESC); +CREATE INDEX IF NOT EXISTS idx_job_details_job_id ON job_details(job_id); +CREATE INDEX IF NOT EXISTS idx_job_details_status ON job_details(status); +CREATE UNIQUE INDEX IF NOT EXISTS idx_job_details_unique ON job_details(job_id, date, model); + +CREATE INDEX IF NOT EXISTS idx_positions_job_id ON positions(job_id); +CREATE INDEX IF NOT EXISTS idx_positions_date ON positions(date); +CREATE INDEX IF NOT EXISTS idx_positions_model ON positions(model); +CREATE INDEX IF NOT EXISTS idx_positions_date_model ON positions(date, model); +CREATE UNIQUE INDEX IF NOT EXISTS idx_positions_unique ON positions(job_id, date, model, action_id); + +CREATE INDEX IF NOT EXISTS idx_holdings_position_id ON holdings(position_id); +CREATE INDEX IF NOT EXISTS idx_holdings_symbol ON holdings(symbol); + +CREATE INDEX IF NOT EXISTS idx_reasoning_logs_job_date_model ON reasoning_logs(job_id, date, model); +CREATE INDEX IF NOT EXISTS idx_tool_usage_job_date_model ON tool_usage(job_id, date, model); +``` + +--- + +### 2.2 Table Relationships + +``` +jobs (1) โ”€โ”€โ”ฌโ”€โ”€> (N) job_details + โ”‚ + โ”œโ”€โ”€> (N) positions โ”€โ”€> (N) holdings + โ”‚ + โ”œโ”€โ”€> (N) reasoning_logs + โ”‚ + โ””โ”€โ”€> (N) tool_usage +``` + +--- + +### 2.3 Data Examples + +#### positions table +``` +id | job_id | date | model | action_id | action_type | symbol | amount | price | cash | portfolio_value | daily_profit | daily_return_pct | cumulative_profit | cumulative_return_pct | created_at +---|------------|------------|-------|-----------|-------------|--------|--------|--------|---------|-----------------|--------------|------------------|-------------------|----------------------|------------ +1 | abc-123... | 2025-01-16 | gpt-5 | 0 | no_trade | NULL | NULL | NULL | 10000.0 | 10000.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2025-01-16T09:30:00Z +2 | abc-123... | 2025-01-16 | gpt-5 | 1 | buy | AAPL | 10 | 255.88 | 7441.2 | 10000.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2025-01-16T09:35:12Z +3 | abc-123... | 2025-01-17 | gpt-5 | 0 | no_trade | NULL | NULL | NULL | 7441.2 | 10150.5 | 150.5 | 1.51 | 150.5 | 1.51 | 2025-01-17T09:30:00Z +4 | abc-123... | 2025-01-17 | gpt-5 | 1 | sell | AAPL | 5 | 262.24 | 8752.4 | 10150.5 | 150.5 | 1.51 | 150.5 | 1.51 | 2025-01-17T09:42:38Z +``` + +#### holdings table +``` +id | position_id | symbol | quantity +---|-------------|--------|---------- +1 | 2 | AAPL | 10 +2 | 3 | AAPL | 10 +3 | 4 | AAPL | 5 +``` + +#### tool_usage table +``` +id | job_id | date | model | tool_name | call_count | total_duration_seconds +---|------------|------------|-------|------------|------------|----------------------- +1 | abc-123... | 2025-01-16 | gpt-5 | get_price | 5 | 2.3 +2 | abc-123... | 2025-01-16 | gpt-5 | search | 3 | 12.7 +3 | abc-123... | 2025-01-16 | gpt-5 | trade | 1 | 0.8 +4 | abc-123... | 2025-01-16 | gpt-5 | math | 2 | 0.1 +``` + +--- + +## 3. Data Migration from position.jsonl + +### 3.1 Migration Strategy + +**During execution:** Write to BOTH SQLite AND position.jsonl for backward compatibility + +**Migration path:** +1. **Phase 1:** Dual-write mode (write to both SQLite and JSONL) +2. **Phase 2:** Verify SQLite data matches JSONL +3. **Phase 3:** Switch `/results` endpoint to read from SQLite +4. **Phase 4:** (Optional) Deprecate JSONL writes + +**Import existing data:** One-time migration script to populate SQLite from existing position.jsonl files + +--- + +### 3.2 Import Script + +```python +# api/import_historical_data.py + +import json +import sqlite3 +from pathlib import Path +from datetime import datetime +from api.database import get_db_connection + +def import_position_jsonl( + model_signature: str, + position_file: Path, + job_id: str = "historical-import" +) -> int: + """ + Import existing position.jsonl data into SQLite. + + Args: + model_signature: Model signature (e.g., "gpt-5") + position_file: Path to position.jsonl + job_id: Job ID to associate with (use "historical-import" for existing data) + + Returns: + Number of records imported + """ + conn = get_db_connection() + cursor = conn.cursor() + + imported_count = 0 + initial_cash = 10000.0 + + with open(position_file, 'r') as f: + for line in f: + if not line.strip(): + continue + + record = json.loads(line) + date = record['date'] + action_id = record['id'] + action = record.get('this_action', {}) + positions = record.get('positions', {}) + + # Extract action details + action_type = action.get('action', 'no_trade') + symbol = action.get('symbol', None) + amount = action.get('amount', None) + price = None # Not stored in original position.jsonl + + # Extract holdings + cash = positions.get('CASH', 0.0) + holdings = {k: v for k, v in positions.items() if k != 'CASH' and v > 0} + + # Calculate portfolio value (approximate - need price data) + portfolio_value = cash # Base value + + # Calculate profits (need previous record) + daily_profit = 0.0 + daily_return_pct = 0.0 + cumulative_profit = cash - initial_cash # Simplified + cumulative_return_pct = (cumulative_profit / initial_cash) * 100 + + # Insert position record + cursor.execute(""" + INSERT INTO positions ( + job_id, date, model, action_id, action_type, symbol, amount, price, + cash, portfolio_value, daily_profit, daily_return_pct, + cumulative_profit, cumulative_return_pct, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, ( + job_id, date, model_signature, action_id, action_type, symbol, amount, price, + cash, portfolio_value, daily_profit, daily_return_pct, + cumulative_profit, cumulative_return_pct, datetime.utcnow().isoformat() + "Z" + )) + + position_id = cursor.lastrowid + + # Insert holdings + for sym, qty in holdings.items(): + cursor.execute(""" + INSERT INTO holdings (position_id, symbol, quantity) + VALUES (?, ?, ?) + """, (position_id, sym, qty)) + + imported_count += 1 + + conn.commit() + conn.close() + + return imported_count + + +def import_all_historical_data(base_path: Path = Path("data/agent_data")) -> dict: + """ + Import all existing position.jsonl files from data/agent_data/. + + Returns: + Summary dict with import counts per model + """ + summary = {} + + for model_dir in base_path.iterdir(): + if not model_dir.is_dir(): + continue + + model_signature = model_dir.name + position_file = model_dir / "position" / "position.jsonl" + + if not position_file.exists(): + continue + + print(f"Importing {model_signature}...") + count = import_position_jsonl(model_signature, position_file) + summary[model_signature] = count + print(f" Imported {count} records") + + return summary + + +if __name__ == "__main__": + print("Starting historical data import...") + summary = import_all_historical_data() + print(f"\nImport complete: {summary}") + print(f"Total records: {sum(summary.values())}") +``` + +--- + +## 4. Updated Results Service + +### 4.1 ResultsService Class + +```python +# api/results_service.py + +from typing import List, Dict, Optional +from datetime import datetime +from api.database import get_db_connection + +class ResultsService: + """ + Service for retrieving simulation results from SQLite. + + Replaces on-demand reading of position.jsonl files. + """ + + def __init__(self, db_path: str = "data/jobs.db"): + self.db_path = db_path + + def get_results( + self, + date: str, + model: Optional[str] = None, + detail: str = "minimal" + ) -> Dict: + """ + Get simulation results for specified date and model(s). + + Args: + date: Trading date (YYYY-MM-DD) + model: Optional model signature filter + detail: "minimal" or "full" + + Returns: + { + "date": str, + "results": [ + { + "model": str, + "positions": {...}, + "daily_pnl": {...}, + "trades": [...], // if detail=full + "ai_reasoning": {...} // if detail=full + } + ] + } + """ + conn = get_db_connection(self.db_path) + + # Get all models for this date (or specific model) + if model: + models = [model] + else: + cursor = conn.cursor() + cursor.execute(""" + SELECT DISTINCT model FROM positions WHERE date = ? + """, (date,)) + models = [row[0] for row in cursor.fetchall()] + + results = [] + + for mdl in models: + result = self._get_model_result(conn, date, mdl, detail) + if result: + results.append(result) + + conn.close() + + return { + "date": date, + "results": results + } + + def _get_model_result( + self, + conn, + date: str, + model: str, + detail: str + ) -> Optional[Dict]: + """Get result for single model on single date""" + cursor = conn.cursor() + + # Get latest position for this date (highest action_id) + cursor.execute(""" + SELECT + cash, portfolio_value, daily_profit, daily_return_pct, + cumulative_profit, cumulative_return_pct + FROM positions + WHERE date = ? AND model = ? + ORDER BY action_id DESC + LIMIT 1 + """, (date, model)) + + row = cursor.fetchone() + if not row: + return None + + cash, portfolio_value, daily_profit, daily_return_pct, cumulative_profit, cumulative_return_pct = row + + # Get holdings for latest position + cursor.execute(""" + SELECT h.symbol, h.quantity + FROM holdings h + JOIN positions p ON h.position_id = p.id + WHERE p.date = ? AND p.model = ? + ORDER BY p.action_id DESC + LIMIT 100 -- One position worth of holdings + """, (date, model)) + + holdings = {row[0]: row[1] for row in cursor.fetchall()} + holdings['CASH'] = cash + + result = { + "model": model, + "positions": holdings, + "daily_pnl": { + "profit": daily_profit, + "return_pct": daily_return_pct, + "portfolio_value": portfolio_value + }, + "cumulative_pnl": { + "profit": cumulative_profit, + "return_pct": cumulative_return_pct + } + } + + # Add full details if requested + if detail == "full": + result["trades"] = self._get_trades(cursor, date, model) + result["ai_reasoning"] = self._get_reasoning(cursor, date, model) + result["tool_usage"] = self._get_tool_usage(cursor, date, model) + + return result + + def _get_trades(self, cursor, date: str, model: str) -> List[Dict]: + """Get all trades executed on this date""" + cursor.execute(""" + SELECT action_id, action_type, symbol, amount, price + FROM positions + WHERE date = ? AND model = ? AND action_type IN ('buy', 'sell') + ORDER BY action_id + """, (date, model)) + + trades = [] + for row in cursor.fetchall(): + trades.append({ + "id": row[0], + "action": row[1], + "symbol": row[2], + "amount": row[3], + "price": row[4], + "total": row[3] * row[4] if row[3] and row[4] else None + }) + + return trades + + def _get_reasoning(self, cursor, date: str, model: str) -> Dict: + """Get AI reasoning summary""" + cursor.execute(""" + SELECT COUNT(*) as total_steps, + COUNT(CASE WHEN role = 'assistant' THEN 1 END) as assistant_messages, + COUNT(CASE WHEN role = 'tool' THEN 1 END) as tool_messages + FROM reasoning_logs + WHERE date = ? AND model = ? + """, (date, model)) + + row = cursor.fetchone() + total_steps = row[0] if row else 0 + + # Get reasoning summary (last assistant message with FINISH_SIGNAL) + cursor.execute(""" + SELECT content FROM reasoning_logs + WHERE date = ? AND model = ? AND role = 'assistant' + AND content LIKE '%%' + ORDER BY step_number DESC + LIMIT 1 + """, (date, model)) + + row = cursor.fetchone() + reasoning_summary = row[0] if row else "No reasoning summary available" + + return { + "total_steps": total_steps, + "stop_signal_received": "" in reasoning_summary, + "reasoning_summary": reasoning_summary[:500] # Truncate for brevity + } + + def _get_tool_usage(self, cursor, date: str, model: str) -> Dict[str, int]: + """Get tool usage counts""" + cursor.execute(""" + SELECT tool_name, call_count + FROM tool_usage + WHERE date = ? AND model = ? + """, (date, model)) + + return {row[0]: row[1] for row in cursor.fetchall()} + + def get_portfolio_timeseries( + self, + model: str, + start_date: Optional[str] = None, + end_date: Optional[str] = None + ) -> List[Dict]: + """ + Get portfolio value over time for a model. + + Returns: + [ + {"date": "2025-01-16", "portfolio_value": 10000.0, "daily_return_pct": 0.0}, + {"date": "2025-01-17", "portfolio_value": 10150.5, "daily_return_pct": 1.51}, + ... + ] + """ + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + query = """ + SELECT date, portfolio_value, daily_return_pct, cumulative_return_pct + FROM ( + SELECT date, portfolio_value, daily_return_pct, cumulative_return_pct, + ROW_NUMBER() OVER (PARTITION BY date ORDER BY action_id DESC) as rn + FROM positions + WHERE model = ? + ) + WHERE rn = 1 + """ + + params = [model] + + if start_date: + query += " AND date >= ?" + params.append(start_date) + if end_date: + query += " AND date <= ?" + params.append(end_date) + + query += " ORDER BY date ASC" + + cursor.execute(query, params) + + timeseries = [] + for row in cursor.fetchall(): + timeseries.append({ + "date": row[0], + "portfolio_value": row[1], + "daily_return_pct": row[2], + "cumulative_return_pct": row[3] + }) + + conn.close() + return timeseries + + def get_leaderboard(self, date: Optional[str] = None) -> List[Dict]: + """ + Get model performance leaderboard. + + Args: + date: Optional date filter (latest results if not specified) + + Returns: + [ + {"model": "gpt-5", "portfolio_value": 10500, "cumulative_return_pct": 5.0, "rank": 1}, + {"model": "claude-3.7-sonnet", "portfolio_value": 10300, "cumulative_return_pct": 3.0, "rank": 2}, + ... + ] + """ + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + if date: + # Specific date leaderboard + cursor.execute(""" + SELECT model, portfolio_value, cumulative_return_pct + FROM ( + SELECT model, portfolio_value, cumulative_return_pct, + ROW_NUMBER() OVER (PARTITION BY model ORDER BY action_id DESC) as rn + FROM positions + WHERE date = ? + ) + WHERE rn = 1 + ORDER BY portfolio_value DESC + """, (date,)) + else: + # Latest results for each model + cursor.execute(""" + SELECT model, portfolio_value, cumulative_return_pct + FROM ( + SELECT model, portfolio_value, cumulative_return_pct, + ROW_NUMBER() OVER (PARTITION BY model ORDER BY date DESC, action_id DESC) as rn + FROM positions + ) + WHERE rn = 1 + ORDER BY portfolio_value DESC + """) + + leaderboard = [] + rank = 1 + for row in cursor.fetchall(): + leaderboard.append({ + "rank": rank, + "model": row[0], + "portfolio_value": row[1], + "cumulative_return_pct": row[2] + }) + rank += 1 + + conn.close() + return leaderboard +``` + +--- + +## 5. Updated Executor - Write to SQLite + +```python +# api/executor.py (additions to existing code) + +class ModelDayExecutor: + # ... existing code ... + + async def run_model_day( + self, + job_id: str, + date: str, + model_config: Dict[str, Any], + agent_class: type, + config: Dict[str, Any] + ) -> None: + """Execute simulation for one model on one date""" + + # ... existing execution code ... + + try: + # Execute trading session + await agent.run_trading_session(date) + + # NEW: Extract and store results in SQLite + self._store_results_to_db(job_id, date, model_sig) + + # Mark as completed + self.job_manager.update_job_detail_status( + job_id, date, model_sig, "completed" + ) + + except Exception as e: + # ... error handling ... + + def _store_results_to_db(self, job_id: str, date: str, model: str) -> None: + """ + Extract data from position.jsonl and log.jsonl, store in SQLite. + + This runs after agent.run_trading_session() completes. + """ + from api.database import get_db_connection + from pathlib import Path + import json + + conn = get_db_connection() + cursor = conn.cursor() + + # Read position.jsonl for this model + position_file = Path(f"data/agent_data/{model}/position/position.jsonl") + + if not position_file.exists(): + logger.warning(f"Position file not found: {position_file}") + return + + # Find records for this date + with open(position_file, 'r') as f: + for line in f: + if not line.strip(): + continue + + record = json.loads(line) + if record['date'] != date: + continue # Skip other dates + + # Extract fields + action_id = record['id'] + action = record.get('this_action', {}) + positions = record.get('positions', {}) + + action_type = action.get('action', 'no_trade') + symbol = action.get('symbol') + amount = action.get('amount') + price = None # TODO: Get from price data if needed + + cash = positions.get('CASH', 0.0) + holdings = {k: v for k, v in positions.items() if k != 'CASH' and v > 0} + + # Calculate portfolio value (simplified - improve with actual prices) + portfolio_value = cash # + sum(holdings value) + + # Calculate daily P&L (compare to previous day's closing value) + # TODO: Implement proper P&L calculation + + # Insert position + cursor.execute(""" + INSERT INTO positions ( + job_id, date, model, action_id, action_type, symbol, amount, price, + cash, portfolio_value, daily_profit, daily_return_pct, + cumulative_profit, cumulative_return_pct, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, ( + job_id, date, model, action_id, action_type, symbol, amount, price, + cash, portfolio_value, 0.0, 0.0, # TODO: Calculate P&L + 0.0, 0.0, # TODO: Calculate cumulative P&L + datetime.utcnow().isoformat() + "Z" + )) + + position_id = cursor.lastrowid + + # Insert holdings + for sym, qty in holdings.items(): + cursor.execute(""" + INSERT INTO holdings (position_id, symbol, quantity) + VALUES (?, ?, ?) + """, (position_id, sym, qty)) + + # Parse log.jsonl for reasoning (if detail=full is needed later) + # TODO: Implement log parsing and storage in reasoning_logs table + + conn.commit() + conn.close() + + logger.info(f"Stored results for {model} on {date} in SQLite") +``` + +--- + +## 6. Migration Path + +### 6.1 Backward Compatibility + +**Keep position.jsonl writes** to ensure existing tools/scripts continue working: + +```python +# In agent/base_agent/base_agent.py - no changes needed +# position.jsonl writing continues as normal + +# In api/executor.py - AFTER position.jsonl is written +await agent.run_trading_session(date) # Writes to position.jsonl +self._store_results_to_db(job_id, date, model_sig) # Copies to SQLite +``` + +### 6.2 Gradual Migration + +**Week 1:** Deploy with dual-write (JSONL + SQLite) +**Week 2:** Verify data consistency, fix any discrepancies +**Week 3:** Switch `/results` endpoint to read from SQLite +**Week 4:** (Optional) Remove JSONL writes + +--- + +## 7. Updated API Endpoints + +### 7.1 Enhanced `/results` Endpoint + +```python +# api/main.py + +from api.results_service import ResultsService + +results_service = ResultsService() + +@app.get("/results") +async def get_results( + date: str, + model: Optional[str] = None, + detail: str = "minimal" +): + """Get simulation results from SQLite (fast!)""" + # Validate date format + try: + datetime.strptime(date, "%Y-%m-%d") + except ValueError: + raise HTTPException(status_code=400, detail="Invalid date format (use YYYY-MM-DD)") + + results = results_service.get_results(date, model, detail) + + if not results["results"]: + raise HTTPException(status_code=404, detail=f"No data found for date {date}") + + return results +``` + +### 7.2 New Endpoints for Advanced Queries + +```python +@app.get("/portfolio/timeseries") +async def get_portfolio_timeseries( + model: str, + start_date: Optional[str] = None, + end_date: Optional[str] = None +): + """Get portfolio value over time for a model""" + timeseries = results_service.get_portfolio_timeseries(model, start_date, end_date) + + if not timeseries: + raise HTTPException(status_code=404, detail=f"No data found for model {model}") + + return { + "model": model, + "timeseries": timeseries + } + + +@app.get("/leaderboard") +async def get_leaderboard(date: Optional[str] = None): + """Get model performance leaderboard""" + leaderboard = results_service.get_leaderboard(date) + + return { + "date": date or "latest", + "leaderboard": leaderboard + } +``` + +--- + +## 8. Database Maintenance + +### 8.1 Cleanup Old Data + +```python +# api/job_manager.py (add method) + +def cleanup_old_data(self, days: int = 90) -> dict: + """ + Delete jobs and associated data older than specified days. + + Returns: + Summary of deleted records + """ + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + cutoff_date = (datetime.utcnow() - timedelta(days=days)).isoformat() + "Z" + + # Count records before deletion + cursor.execute("SELECT COUNT(*) FROM jobs WHERE created_at < ?", (cutoff_date,)) + jobs_to_delete = cursor.fetchone()[0] + + cursor.execute(""" + SELECT COUNT(*) FROM positions + WHERE job_id IN (SELECT job_id FROM jobs WHERE created_at < ?) + """, (cutoff_date,)) + positions_to_delete = cursor.fetchone()[0] + + # Delete (CASCADE will handle related tables) + cursor.execute("DELETE FROM jobs WHERE created_at < ?", (cutoff_date,)) + + conn.commit() + conn.close() + + return { + "cutoff_date": cutoff_date, + "jobs_deleted": jobs_to_delete, + "positions_deleted": positions_to_delete + } +``` + +### 8.2 Vacuum Database + +```python +def vacuum_database(self) -> None: + """Reclaim disk space after deletes""" + conn = get_db_connection(self.db_path) + conn.execute("VACUUM") + conn.close() +``` + +--- + +## Summary + +**Enhanced database schema** with 6 tables: +- `jobs`, `job_details` (job tracking) +- `positions`, `holdings` (simulation results) +- `reasoning_logs`, `tool_usage` (AI details) + +**Benefits:** +- โšก **10-100x faster** `/results` queries (no file I/O) +- ๐Ÿ“Š **Advanced analytics** - timeseries, leaderboards, aggregations +- ๐Ÿ”’ **Data integrity** - ACID compliance, foreign keys +- ๐Ÿ—„๏ธ **Single source of truth** - all data in one place + +**Migration strategy:** Dual-write (JSONL + SQLite) for backward compatibility + +**Next:** Comprehensive testing suite specification diff --git a/docs/implementation-specifications.md b/docs/implementation-specifications.md new file mode 100644 index 0000000..d1394d8 --- /dev/null +++ b/docs/implementation-specifications.md @@ -0,0 +1,873 @@ +# Implementation Specifications: Agent, Docker, and Windmill Integration + +## Part 1: BaseAgent Refactoring + +### 1.1 Current State Analysis + +**Current `base_agent.py` structure:** +- `run_date_range(init_date, end_date)` - Loops through all dates +- `run_trading_session(today_date)` - Executes single day +- `get_trading_dates()` - Calculates dates from position.jsonl + +**What works well:** +- `run_trading_session()` is already isolated for single-day execution โœ… +- Agent initialization is separate from execution โœ… +- Position tracking via position.jsonl โœ… + +**What needs modification:** +- `runtime_env.json` management (move to RuntimeConfigManager) +- `get_trading_dates()` logic (move to API layer for date range calculation) + +### 1.2 Required Changes + +#### Change 1: No modifications needed to core execution logic + +**Rationale:** `BaseAgent.run_trading_session(today_date)` already supports single-day execution. The worker will call this method directly. + +```python +# Current code (already suitable for API mode): +async def run_trading_session(self, today_date: str) -> None: + """Run single day trading session""" + # This method is perfect as-is for worker to call +``` + +**Action:** โœ… No changes needed + +--- + +#### Change 2: Make runtime config path injectable + +**Current issue:** +```python +# In base_agent.py, uses global config +from tools.general_tools import get_config_value, write_config_value +``` + +**Problem:** `get_config_value()` reads from `os.environ["RUNTIME_ENV_PATH"]`, which the worker will override per execution. + +**Solution:** Already works! The worker sets `RUNTIME_ENV_PATH` before calling agent methods: + +```python +# In executor.py +os.environ["RUNTIME_ENV_PATH"] = runtime_config_path +await agent.run_trading_session(date) +``` + +**Action:** โœ… No changes needed (env var override is sufficient) + +--- + +#### Change 3: Optional - Separate agent initialization from date-range logic + +**Current code in `main.py`:** +```python +# Creates agent +agent = AgentClass(...) +await agent.initialize() + +# Runs all dates +await agent.run_date_range(INIT_DATE, END_DATE) +``` + +**For API mode:** +```python +# Worker creates agent +agent = AgentClass(...) +await agent.initialize() + +# Worker calls run_trading_session directly for each date +for date in date_range: + await agent.run_trading_session(date) +``` + +**Action:** โœ… Worker will not use `run_date_range()` method. No changes needed to agent. + +--- + +### 1.3 Summary: BaseAgent Changes + +**Result:** **NO CODE CHANGES REQUIRED** to `base_agent.py`! + +The existing architecture is already compatible with the API worker pattern: +- `run_trading_session()` is the perfect interface +- Runtime config is managed via environment variables +- Position tracking works as-is + +**Only change needed:** Worker must call `agent.register_agent()` if position file doesn't exist (already handled by `get_trading_dates()` logic). + +--- + +## Part 2: Docker Configuration + +### 2.1 Current Docker Setup + +**Existing files:** +- `Dockerfile` - Multi-stage build for batch mode +- `docker-compose.yml` - Service definition +- `docker-entrypoint.sh` - Launches data fetch + main.py + +### 2.2 Modified Dockerfile + +```dockerfile +# Existing stages remain the same... +FROM python:3.10-slim + +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements +COPY requirements.txt requirements-api.txt ./ +RUN pip install --no-cache-dir -r requirements.txt +RUN pip install --no-cache-dir -r requirements-api.txt + +# Copy application code +COPY . /app + +# Create data directories +RUN mkdir -p /app/data /app/configs + +# Copy and set permissions for entrypoint +COPY docker-entrypoint-api.sh /app/ +RUN chmod +x /app/docker-entrypoint-api.sh + +# Expose API port +EXPOSE 8080 + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \ + CMD curl -f http://localhost:8080/health || exit 1 + +# Run API service +CMD ["/app/docker-entrypoint-api.sh"] +``` + +### 2.3 New requirements-api.txt + +``` +fastapi==0.109.0 +uvicorn[standard]==0.27.0 +pydantic==2.5.3 +pydantic-settings==2.1.0 +python-multipart==0.0.6 +``` + +### 2.4 New docker-entrypoint-api.sh + +```bash +#!/bin/bash +set -e + +echo "==================================" +echo "AI-Trader API Service Starting" +echo "==================================" + +# Cleanup stale runtime configs from previous runs +echo "Cleaning up stale runtime configs..." +python3 -c "from api.runtime_manager import RuntimeConfigManager; RuntimeConfigManager().cleanup_all_runtime_configs()" + +# Start MCP services in background +echo "Starting MCP services..." +cd /app/agent_tools +python3 start_mcp_services.py & +MCP_PID=$! + +# Wait for MCP services to be ready +echo "Waiting for MCP services to initialize..." +sleep 10 + +# Verify MCP services are running +echo "Verifying MCP services..." +for port in ${MATH_HTTP_PORT:-8000} ${SEARCH_HTTP_PORT:-8001} ${TRADE_HTTP_PORT:-8002} ${GETPRICE_HTTP_PORT:-8003}; do + if ! curl -f -s http://localhost:$port/health > /dev/null 2>&1; then + echo "WARNING: MCP service on port $port not responding" + else + echo "โœ“ MCP service on port $port is healthy" + fi +done + +# Start API server +echo "Starting FastAPI server..." +cd /app + +# Use environment variables for host and port +API_HOST=${API_HOST:-0.0.0.0} +API_PORT=${API_PORT:-8080} + +echo "API will be available at http://${API_HOST}:${API_PORT}" +echo "==================================" + +# Start uvicorn with single worker (for simplicity in MVP) +exec uvicorn api.main:app \ + --host ${API_HOST} \ + --port ${API_PORT} \ + --workers 1 \ + --log-level info + +# Cleanup function (called on exit) +trap "echo 'Shutting down...'; kill $MCP_PID 2>/dev/null || true" EXIT SIGTERM SIGINT +``` + +### 2.5 Updated docker-compose.yml + +```yaml +version: '3.8' + +services: + ai-trader: + build: + context: . + dockerfile: Dockerfile + container_name: ai-trader-api + ports: + - "8080:8080" + volumes: + - ./data:/app/data + - ./configs:/app/configs + - ./logs:/app/logs + env_file: + - .env + environment: + - API_HOST=0.0.0.0 + - API_PORT=8080 + - RUNTIME_ENV_PATH=/app/data/runtime_env.json + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + restart: unless-stopped + networks: + - ai-trader-network + +networks: + ai-trader-network: + driver: bridge +``` + +### 2.6 Environment Variables Reference + +```bash +# .env file example for API mode + +# OpenAI Configuration +OPENAI_API_BASE=https://api.openai.com/v1 +OPENAI_API_KEY=sk-... + +# API Keys +ALPHAADVANTAGE_API_KEY=your_alpha_vantage_key +JINA_API_KEY=your_jina_key + +# MCP Service Ports +MATH_HTTP_PORT=8000 +SEARCH_HTTP_PORT=8001 +TRADE_HTTP_PORT=8002 +GETPRICE_HTTP_PORT=8003 + +# API Configuration +API_HOST=0.0.0.0 +API_PORT=8080 + +# Runtime Config +RUNTIME_ENV_PATH=/app/data/runtime_env.json + +# Job Configuration +MAX_CONCURRENT_JOBS=1 +``` + +### 2.7 Docker Commands Reference + +```bash +# Build image +docker-compose build + +# Start service +docker-compose up + +# Start in background +docker-compose up -d + +# View logs +docker-compose logs -f + +# Check health +docker-compose ps + +# Stop service +docker-compose down + +# Restart service +docker-compose restart + +# Execute command in running container +docker-compose exec ai-trader python3 -c "from api.job_manager import JobManager; jm = JobManager(); print(jm.get_current_job())" + +# Access container shell +docker-compose exec ai-trader bash +``` + +--- + +## Part 3: Windmill Integration + +### 3.1 Windmill Overview + +Windmill (windmill.dev) is a workflow automation platform that can: +- Schedule cron jobs +- Execute TypeScript/Python scripts +- Store state between runs +- Build UI dashboards + +**Integration approach:** +1. Windmill cron job triggers simulation daily +2. Windmill polls for job completion +3. Windmill retrieves results and stores in internal database +4. Windmill dashboard displays performance metrics + +### 3.2 Flow 1: Daily Simulation Trigger + +**File:** `windmill/trigger_simulation.ts` + +```typescript +import { Resource } from "https://deno.land/x/windmill@v1.0.0/mod.ts"; + +export async function main( + ai_trader_api: Resource<"ai_trader_api"> +) { + const apiUrl = ai_trader_api.base_url; // e.g., "http://ai-trader:8080" + + // Trigger simulation + const response = await fetch(`${apiUrl}/simulate/trigger`, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + config_path: "configs/default_config.json" + }), + }); + + if (!response.ok) { + throw new Error(`API error: ${response.status} ${response.statusText}`); + } + + const data = await response.json(); + + // Handle different response types + if (data.status === "current") { + console.log("Simulation already up-to-date"); + return { + action: "skipped", + message: data.message, + last_date: data.last_simulation_date + }; + } + + // Store job_id in Windmill state for poller to pick up + await Deno.writeTextFile( + `/tmp/current_job_id.txt`, + data.job_id + ); + + console.log(`Simulation triggered: ${data.job_id}`); + console.log(`Date range: ${data.date_range.join(", ")}`); + console.log(`Models: ${data.models.join(", ")}`); + + return { + action: "triggered", + job_id: data.job_id, + date_range: data.date_range, + models: data.models, + status: data.status + }; +} +``` + +**Windmill Resource Configuration:** +```json +{ + "resource_type": "ai_trader_api", + "base_url": "http://ai-trader:8080" +} +``` + +**Schedule:** Every day at 6:00 AM + +--- + +### 3.3 Flow 2: Job Status Poller + +**File:** `windmill/poll_simulation_status.ts` + +```typescript +import { Resource } from "https://deno.land/x/windmill@v1.0.0/mod.ts"; + +export async function main( + ai_trader_api: Resource<"ai_trader_api">, + job_id?: string +) { + const apiUrl = ai_trader_api.base_url; + + // Get job_id from parameter or from current job file + let jobId = job_id; + if (!jobId) { + try { + jobId = await Deno.readTextFile("/tmp/current_job_id.txt"); + } catch { + // No current job + return { + status: "no_job", + message: "No active simulation job" + }; + } + } + + // Poll status + const response = await fetch(`${apiUrl}/simulate/status/${jobId}`); + + if (!response.ok) { + if (response.status === 404) { + return { + status: "not_found", + message: "Job not found", + job_id: jobId + }; + } + throw new Error(`API error: ${response.status}`); + } + + const data = await response.json(); + + console.log(`Job ${jobId}: ${data.status}`); + console.log(`Progress: ${data.progress.completed}/${data.progress.total_model_days} model-days`); + + // If job is complete, retrieve results + if (data.status === "completed" || data.status === "partial") { + console.log("Job finished, retrieving results..."); + + const results = []; + for (const date of data.date_range) { + const resultsResponse = await fetch( + `${apiUrl}/results?date=${date}&detail=minimal` + ); + + if (resultsResponse.ok) { + const dateResults = await resultsResponse.json(); + results.push(dateResults); + } + } + + // Clean up job_id file + try { + await Deno.remove("/tmp/current_job_id.txt"); + } catch { + // Ignore + } + + return { + status: data.status, + job_id: jobId, + completed_at: data.completed_at, + duration_seconds: data.total_duration_seconds, + results: results + }; + } + + // Job still running + return { + status: data.status, + job_id: jobId, + progress: data.progress, + started_at: data.created_at + }; +} +``` + +**Schedule:** Every 5 minutes (will skip if no active job) + +--- + +### 3.4 Flow 3: Results Retrieval and Storage + +**File:** `windmill/store_simulation_results.py` + +```python +import wmill +from datetime import datetime + +def main( + job_results: dict, + database: str = "simulation_results" +): + """ + Store simulation results in Windmill's internal database. + + Args: + job_results: Output from poll_simulation_status flow + database: Database name for storage + """ + if job_results.get("status") not in ("completed", "partial"): + return {"message": "Job not complete, skipping storage"} + + # Extract results + job_id = job_results["job_id"] + results = job_results.get("results", []) + + stored_count = 0 + + for date_result in results: + date = date_result["date"] + + for model_result in date_result["results"]: + model = model_result["model"] + positions = model_result["positions"] + pnl = model_result["daily_pnl"] + + # Store in Windmill database + record = { + "job_id": job_id, + "date": date, + "model": model, + "cash": positions.get("CASH", 0), + "portfolio_value": pnl["portfolio_value"], + "daily_profit": pnl["profit"], + "daily_return_pct": pnl["return_pct"], + "stored_at": datetime.utcnow().isoformat() + } + + # Use Windmill's internal storage + wmill.set_variable( + path=f"{database}/{model}/{date}", + value=record + ) + + stored_count += 1 + + return { + "stored_count": stored_count, + "job_id": job_id, + "message": f"Stored {stored_count} model-day results" + } +``` + +--- + +### 3.5 Windmill Dashboard Example + +**File:** `windmill/dashboard.json` (Windmill App Builder) + +```json +{ + "grid": [ + { + "type": "table", + "id": "performance_table", + "configuration": { + "title": "Model Performance Summary", + "data_source": { + "type": "script", + "path": "f/simulation_results/get_latest_performance" + }, + "columns": [ + {"field": "model", "header": "Model"}, + {"field": "latest_date", "header": "Latest Date"}, + {"field": "portfolio_value", "header": "Portfolio Value"}, + {"field": "total_return_pct", "header": "Total Return %"}, + {"field": "daily_return_pct", "header": "Daily Return %"} + ] + } + }, + { + "type": "chart", + "id": "portfolio_chart", + "configuration": { + "title": "Portfolio Value Over Time", + "chart_type": "line", + "data_source": { + "type": "script", + "path": "f/simulation_results/get_timeseries" + }, + "x_axis": "date", + "y_axis": "portfolio_value", + "series": "model" + } + } + ] +} +``` + +**Supporting Script:** `windmill/get_latest_performance.py` + +```python +import wmill + +def main(database: str = "simulation_results"): + """Get latest performance for each model""" + + # Query Windmill variables + all_vars = wmill.list_variables(path_prefix=f"{database}/") + + # Group by model + models = {} + for var in all_vars: + parts = var["path"].split("/") + if len(parts) >= 3: + model = parts[1] + date = parts[2] + + value = wmill.get_variable(var["path"]) + + if model not in models: + models[model] = [] + models[model].append(value) + + # Compute summary for each model + summary = [] + for model, records in models.items(): + # Sort by date + records.sort(key=lambda x: x["date"], reverse=True) + latest = records[0] + + # Calculate total return + initial_value = 10000 # Initial cash + total_return_pct = ((latest["portfolio_value"] - initial_value) / initial_value) * 100 + + summary.append({ + "model": model, + "latest_date": latest["date"], + "portfolio_value": latest["portfolio_value"], + "total_return_pct": round(total_return_pct, 2), + "daily_return_pct": latest["daily_return_pct"] + }) + + return summary +``` + +--- + +### 3.6 Windmill Workflow Orchestration + +**Main Workflow:** `windmill/daily_simulation_workflow.yaml` + +```yaml +name: Daily AI Trader Simulation +description: Trigger simulation, poll status, and store results + +triggers: + - type: cron + schedule: "0 6 * * *" # Every day at 6 AM + +steps: + - id: trigger + name: Trigger Simulation + script: f/ai_trader/trigger_simulation + outputs: + - job_id + - action + + - id: wait + name: Wait for Job Start + type: sleep + duration: 10s + + - id: poll_loop + name: Poll Until Complete + type: loop + max_iterations: 60 # Poll for up to 5 hours (60 ร— 5min) + interval: 5m + script: f/ai_trader/poll_simulation_status + inputs: + job_id: ${{ steps.trigger.outputs.job_id }} + break_condition: | + ${{ steps.poll_loop.outputs.status in ['completed', 'partial', 'failed'] }} + + - id: store_results + name: Store Results in Database + script: f/ai_trader/store_simulation_results + inputs: + job_results: ${{ steps.poll_loop.outputs }} + condition: | + ${{ steps.poll_loop.outputs.status in ['completed', 'partial'] }} + + - id: notify + name: Send Notification + type: email + to: admin@example.com + subject: "AI Trader Simulation Complete" + body: | + Simulation completed for ${{ steps.poll_loop.outputs.job_id }} + Status: ${{ steps.poll_loop.outputs.status }} + Duration: ${{ steps.poll_loop.outputs.duration_seconds }}s +``` + +--- + +### 3.7 Testing Windmill Integration Locally + +**1. Start AI-Trader API:** +```bash +docker-compose up -d +``` + +**2. Test trigger endpoint:** +```bash +curl -X POST http://localhost:8080/simulate/trigger \ + -H "Content-Type: application/json" \ + -d '{"config_path": "configs/default_config.json"}' +``` + +**3. Test status polling:** +```bash +JOB_ID="" +curl http://localhost:8080/simulate/status/$JOB_ID +``` + +**4. Test results retrieval:** +```bash +curl "http://localhost:8080/results?date=2025-01-16&model=gpt-5&detail=minimal" +``` + +**5. Deploy to Windmill:** +```bash +# Install Windmill CLI +npm install -g windmill-cli + +# Login to your Windmill instance +wmill login https://your-windmill-instance.com + +# Deploy scripts +wmill script push windmill/trigger_simulation.ts +wmill script push windmill/poll_simulation_status.ts +wmill script push windmill/store_simulation_results.py + +# Deploy workflow +wmill flow push windmill/daily_simulation_workflow.yaml +``` + +--- + +## Part 4: Complete File Structure + +After implementation, the project structure will be: + +``` +AI-Trader/ +โ”œโ”€โ”€ api/ +โ”‚ โ”œโ”€โ”€ __init__.py +โ”‚ โ”œโ”€โ”€ main.py # FastAPI application +โ”‚ โ”œโ”€โ”€ models.py # Pydantic request/response models +โ”‚ โ”œโ”€โ”€ job_manager.py # Job lifecycle management +โ”‚ โ”œโ”€โ”€ database.py # SQLite utilities +โ”‚ โ”œโ”€โ”€ worker.py # Background simulation worker +โ”‚ โ”œโ”€โ”€ executor.py # Single model-day execution +โ”‚ โ””โ”€โ”€ runtime_manager.py # Runtime config isolation +โ”‚ +โ”œโ”€โ”€ docs/ +โ”‚ โ”œโ”€โ”€ api-specification.md +โ”‚ โ”œโ”€โ”€ job-manager-specification.md +โ”‚ โ”œโ”€โ”€ worker-specification.md +โ”‚ โ””โ”€โ”€ implementation-specifications.md +โ”‚ +โ”œโ”€โ”€ windmill/ +โ”‚ โ”œโ”€โ”€ trigger_simulation.ts +โ”‚ โ”œโ”€โ”€ poll_simulation_status.ts +โ”‚ โ”œโ”€โ”€ store_simulation_results.py +โ”‚ โ”œโ”€โ”€ get_latest_performance.py +โ”‚ โ”œโ”€โ”€ daily_simulation_workflow.yaml +โ”‚ โ””โ”€โ”€ dashboard.json +โ”‚ +โ”œโ”€โ”€ agent/ +โ”‚ โ””โ”€โ”€ base_agent/ +โ”‚ โ””โ”€โ”€ base_agent.py # NO CHANGES NEEDED +โ”‚ +โ”œโ”€โ”€ agent_tools/ +โ”‚ โ””โ”€โ”€ ... (existing MCP tools) +โ”‚ +โ”œโ”€โ”€ data/ +โ”‚ โ”œโ”€โ”€ jobs.db # SQLite database (created automatically) +โ”‚ โ”œโ”€โ”€ runtime_env*.json # Runtime configs (temporary) +โ”‚ โ”œโ”€โ”€ agent_data/ # Existing position/log data +โ”‚ โ””โ”€โ”€ merged.jsonl # Existing price data +โ”‚ +โ”œโ”€โ”€ Dockerfile # Updated for API mode +โ”œโ”€โ”€ docker-compose.yml # Updated service definition +โ”œโ”€โ”€ docker-entrypoint-api.sh # New API entrypoint +โ”œโ”€โ”€ requirements-api.txt # FastAPI dependencies +โ”œโ”€โ”€ .env # Environment configuration +โ””โ”€โ”€ main.py # Existing (used by worker) +``` + +--- + +## Part 5: Implementation Checklist + +### Phase 1: API Foundation (Days 1-2) +- [ ] Create `api/` directory structure +- [ ] Implement `api/models.py` with Pydantic models +- [ ] Implement `api/database.py` with SQLite utilities +- [ ] Implement `api/job_manager.py` with job CRUD operations +- [ ] Write unit tests for job_manager +- [ ] Test database operations manually + +### Phase 2: Worker & Executor (Days 3-4) +- [ ] Implement `api/runtime_manager.py` +- [ ] Implement `api/executor.py` for single model-day execution +- [ ] Implement `api/worker.py` for job orchestration +- [ ] Test worker with mock agent +- [ ] Test runtime config isolation + +### Phase 3: FastAPI Endpoints (Days 5-6) +- [ ] Implement `api/main.py` with all endpoints +- [ ] Implement `/simulate/trigger` with background tasks +- [ ] Implement `/simulate/status/{job_id}` +- [ ] Implement `/simulate/current` +- [ ] Implement `/results` with detail levels +- [ ] Implement `/health` with MCP checks +- [ ] Test all endpoints with Postman/curl + +### Phase 4: Docker Integration (Day 7) +- [ ] Update `Dockerfile` +- [ ] Create `docker-entrypoint-api.sh` +- [ ] Create `requirements-api.txt` +- [ ] Update `docker-compose.yml` +- [ ] Test Docker build +- [ ] Test container startup and health checks +- [ ] Test end-to-end simulation via API in Docker + +### Phase 5: Windmill Integration (Days 8-9) +- [ ] Create Windmill scripts (trigger, poll, store) +- [ ] Test scripts locally against Docker API +- [ ] Deploy scripts to Windmill instance +- [ ] Create Windmill workflow +- [ ] Test workflow end-to-end +- [ ] Create Windmill dashboard +- [ ] Document Windmill setup process + +### Phase 6: Testing & Documentation (Day 10) +- [ ] Integration tests for complete workflow +- [ ] Load testing (multiple concurrent requests) +- [ ] Error scenario testing (MCP down, API timeout) +- [ ] Update README.md with API usage +- [ ] Create API documentation (Swagger/OpenAPI) +- [ ] Create deployment guide +- [ ] Create troubleshooting guide + +--- + +## Summary + +This comprehensive specification covers: + +1. **BaseAgent Refactoring:** Minimal changes needed (existing code compatible) +2. **Docker Configuration:** API service mode with health checks and proper entrypoint +3. **Windmill Integration:** Complete workflow automation with TypeScript/Python scripts +4. **File Structure:** Clear organization of new API components +5. **Implementation Checklist:** Step-by-step plan for 10-day implementation + +**Total estimated implementation time:** 10 working days for MVP + +**Next Step:** Review all specifications (api-specification.md, job-manager-specification.md, worker-specification.md, and this document) and approve before beginning implementation. diff --git a/docs/job-manager-specification.md b/docs/job-manager-specification.md new file mode 100644 index 0000000..f1f846d --- /dev/null +++ b/docs/job-manager-specification.md @@ -0,0 +1,963 @@ +# Job Manager & Database Specification + +## 1. Overview + +The Job Manager is responsible for: +1. **Job lifecycle management** - Creating, tracking, updating job status +2. **Database operations** - SQLite CRUD operations for jobs and job_details +3. **Concurrency control** - Ensuring only one simulation runs at a time +4. **State persistence** - Maintaining job state across API restarts + +--- + +## 2. Database Schema + +### 2.1 SQLite Database Location + +``` +data/jobs.db +``` + +**Rationale:** Co-located with simulation data for easy volume mounting + +### 2.2 Table: jobs + +**Purpose:** Track high-level job metadata and status + +```sql +CREATE TABLE IF NOT EXISTS jobs ( + job_id TEXT PRIMARY KEY, + config_path TEXT NOT NULL, + status TEXT NOT NULL CHECK(status IN ('pending', 'running', 'completed', 'partial', 'failed')), + date_range TEXT NOT NULL, -- JSON array: ["2025-01-16", "2025-01-17"] + models TEXT NOT NULL, -- JSON array: ["claude-3.7-sonnet", "gpt-5"] + created_at TEXT NOT NULL, -- ISO 8601: "2025-01-20T14:30:00Z" + started_at TEXT, -- When first model-day started + completed_at TEXT, -- When last model-day finished + total_duration_seconds REAL, + error TEXT -- Top-level error message if job failed +); + +-- Indexes for performance +CREATE INDEX IF NOT EXISTS idx_jobs_status ON jobs(status); +CREATE INDEX IF NOT EXISTS idx_jobs_created_at ON jobs(created_at DESC); +``` + +**Field Details:** +- `job_id`: UUID v4 (e.g., `550e8400-e29b-41d4-a716-446655440000`) +- `status`: Current job state + - `pending`: Job created, not started yet + - `running`: At least one model-day is executing + - `completed`: All model-days succeeded + - `partial`: Some model-days succeeded, some failed + - `failed`: All model-days failed (rare edge case) +- `date_range`: JSON string for easy querying +- `models`: JSON string of enabled model signatures + +### 2.3 Table: job_details + +**Purpose:** Track individual model-day execution status + +```sql +CREATE TABLE IF NOT EXISTS job_details ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + job_id TEXT NOT NULL, + date TEXT NOT NULL, -- "2025-01-16" + model TEXT NOT NULL, -- "gpt-5" + status TEXT NOT NULL CHECK(status IN ('pending', 'running', 'completed', 'failed')), + started_at TEXT, + completed_at TEXT, + duration_seconds REAL, + error TEXT, -- Error message if this model-day failed + FOREIGN KEY (job_id) REFERENCES jobs(job_id) ON DELETE CASCADE +); + +-- Indexes +CREATE INDEX IF NOT EXISTS idx_job_details_job_id ON job_details(job_id); +CREATE INDEX IF NOT EXISTS idx_job_details_status ON job_details(status); +CREATE UNIQUE INDEX IF NOT EXISTS idx_job_details_unique ON job_details(job_id, date, model); +``` + +**Field Details:** +- Each row represents one model-day (e.g., `gpt-5` on `2025-01-16`) +- `UNIQUE INDEX` prevents duplicate execution entries +- `ON DELETE CASCADE` ensures orphaned records are cleaned up + +### 2.4 Example Data + +**jobs table:** +``` +job_id | config_path | status | date_range | models | created_at | started_at | completed_at | total_duration_seconds +--------------------------------------|--------------------------|-----------|-----------------------------------|---------------------------------|----------------------|----------------------|----------------------|---------------------- +550e8400-e29b-41d4-a716-446655440000 | configs/default_config.json | completed | ["2025-01-16","2025-01-17"] | ["gpt-5","claude-3.7-sonnet"] | 2025-01-20T14:25:00Z | 2025-01-20T14:25:10Z | 2025-01-20T14:29:45Z | 275.3 +``` + +**job_details table:** +``` +id | job_id | date | model | status | started_at | completed_at | duration_seconds | error +---|--------------------------------------|------------|--------------------|-----------|----------------------|----------------------|------------------|------ +1 | 550e8400-e29b-41d4-a716-446655440000 | 2025-01-16 | gpt-5 | completed | 2025-01-20T14:25:10Z | 2025-01-20T14:25:48Z | 38.2 | NULL +2 | 550e8400-e29b-41d4-a716-446655440000 | 2025-01-16 | claude-3.7-sonnet | completed | 2025-01-20T14:25:10Z | 2025-01-20T14:25:55Z | 45.1 | NULL +3 | 550e8400-e29b-41d4-a716-446655440000 | 2025-01-17 | gpt-5 | completed | 2025-01-20T14:25:56Z | 2025-01-20T14:26:36Z | 40.0 | NULL +4 | 550e8400-e29b-41d4-a716-446655440000 | 2025-01-17 | claude-3.7-sonnet | completed | 2025-01-20T14:25:56Z | 2025-01-20T14:26:42Z | 46.5 | NULL +``` + +--- + +## 3. Job Manager Class + +### 3.1 File Structure + +``` +api/ +โ”œโ”€โ”€ job_manager.py # Core JobManager class +โ”œโ”€โ”€ database.py # SQLite connection and utilities +โ””โ”€โ”€ models.py # Pydantic models +``` + +### 3.2 JobManager Interface + +```python +# api/job_manager.py + +from datetime import datetime +from typing import Optional, List, Dict, Tuple +import uuid +import json +from api.database import get_db_connection + +class JobManager: + """Manages simulation job lifecycle and database operations""" + + def __init__(self, db_path: str = "data/jobs.db"): + self.db_path = db_path + self._initialize_database() + + def _initialize_database(self) -> None: + """Create tables if they don't exist""" + conn = get_db_connection(self.db_path) + # Execute CREATE TABLE statements from section 2.2 and 2.3 + conn.close() + + # ========== Job Creation ========== + + def create_job( + self, + config_path: str, + date_range: List[str], + models: List[str] + ) -> str: + """ + Create a new simulation job. + + Args: + config_path: Path to config file + date_range: List of trading dates to simulate + models: List of model signatures to run + + Returns: + job_id: UUID of created job + + Raises: + ValueError: If another job is already running + """ + # 1. Check if any jobs are currently running + if not self.can_start_new_job(): + raise ValueError("Another simulation job is already running") + + # 2. Generate job ID + job_id = str(uuid.uuid4()) + + # 3. Create job record + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + cursor.execute(""" + INSERT INTO jobs ( + job_id, config_path, status, date_range, models, created_at + ) VALUES (?, ?, ?, ?, ?, ?) + """, ( + job_id, + config_path, + "pending", + json.dumps(date_range), + json.dumps(models), + datetime.utcnow().isoformat() + "Z" + )) + + # 4. Create job_details records for each model-day + for date in date_range: + for model in models: + cursor.execute(""" + INSERT INTO job_details ( + job_id, date, model, status + ) VALUES (?, ?, ?, ?) + """, (job_id, date, model, "pending")) + + conn.commit() + conn.close() + + return job_id + + # ========== Job Retrieval ========== + + def get_job(self, job_id: str) -> Optional[Dict]: + """ + Get job metadata by ID. + + Returns: + Job dict with keys: job_id, config_path, status, date_range (list), + models (list), created_at, started_at, completed_at, total_duration_seconds + + Returns None if job not found. + """ + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + cursor.execute("SELECT * FROM jobs WHERE job_id = ?", (job_id,)) + row = cursor.fetchone() + conn.close() + + if row is None: + return None + + return { + "job_id": row[0], + "config_path": row[1], + "status": row[2], + "date_range": json.loads(row[3]), + "models": json.loads(row[4]), + "created_at": row[5], + "started_at": row[6], + "completed_at": row[7], + "total_duration_seconds": row[8], + "error": row[9] + } + + def get_current_job(self) -> Optional[Dict]: + """Get most recent job (for /simulate/current endpoint)""" + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + cursor.execute(""" + SELECT * FROM jobs + ORDER BY created_at DESC + LIMIT 1 + """) + row = cursor.fetchone() + conn.close() + + if row is None: + return None + + return self._row_to_job_dict(row) + + def get_running_jobs(self) -> List[Dict]: + """Get all running or pending jobs""" + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + cursor.execute(""" + SELECT * FROM jobs + WHERE status IN ('pending', 'running') + ORDER BY created_at DESC + """) + rows = cursor.fetchall() + conn.close() + + return [self._row_to_job_dict(row) for row in rows] + + # ========== Job Status Updates ========== + + def update_job_status( + self, + job_id: str, + status: str, + error: Optional[str] = None + ) -> None: + """Update job status (pending โ†’ running โ†’ completed/partial/failed)""" + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + updates = {"status": status} + + if status == "running" and self.get_job(job_id)["status"] == "pending": + updates["started_at"] = datetime.utcnow().isoformat() + "Z" + + if status in ("completed", "partial", "failed"): + updates["completed_at"] = datetime.utcnow().isoformat() + "Z" + # Calculate total duration + job = self.get_job(job_id) + if job["started_at"]: + started = datetime.fromisoformat(job["started_at"].replace("Z", "")) + completed = datetime.utcnow() + updates["total_duration_seconds"] = (completed - started).total_seconds() + + if error: + updates["error"] = error + + # Build dynamic UPDATE query + set_clause = ", ".join([f"{k} = ?" for k in updates.keys()]) + values = list(updates.values()) + [job_id] + + cursor.execute(f""" + UPDATE jobs + SET {set_clause} + WHERE job_id = ? + """, values) + + conn.commit() + conn.close() + + def update_job_detail_status( + self, + job_id: str, + date: str, + model: str, + status: str, + error: Optional[str] = None + ) -> None: + """Update individual model-day status""" + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + updates = {"status": status} + + # Get current detail status to determine if this is a status transition + cursor.execute(""" + SELECT status, started_at FROM job_details + WHERE job_id = ? AND date = ? AND model = ? + """, (job_id, date, model)) + row = cursor.fetchone() + + if row: + current_status = row[0] + + if status == "running" and current_status == "pending": + updates["started_at"] = datetime.utcnow().isoformat() + "Z" + + if status in ("completed", "failed"): + updates["completed_at"] = datetime.utcnow().isoformat() + "Z" + # Calculate duration if started_at exists + if row[1]: # started_at + started = datetime.fromisoformat(row[1].replace("Z", "")) + completed = datetime.utcnow() + updates["duration_seconds"] = (completed - started).total_seconds() + + if error: + updates["error"] = error + + # Build UPDATE query + set_clause = ", ".join([f"{k} = ?" for k in updates.keys()]) + values = list(updates.values()) + [job_id, date, model] + + cursor.execute(f""" + UPDATE job_details + SET {set_clause} + WHERE job_id = ? AND date = ? AND model = ? + """, values) + + conn.commit() + conn.close() + + # After updating detail, check if overall job status needs update + self._update_job_status_from_details(job_id) + + def _update_job_status_from_details(self, job_id: str) -> None: + """ + Recalculate job status based on job_details statuses. + + Logic: + - If any detail is 'running' โ†’ job is 'running' + - If all details are 'completed' โ†’ job is 'completed' + - If some details are 'completed' and some 'failed' โ†’ job is 'partial' + - If all details are 'failed' โ†’ job is 'failed' + - If all details are 'pending' โ†’ job is 'pending' + """ + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + cursor.execute(""" + SELECT status, COUNT(*) + FROM job_details + WHERE job_id = ? + GROUP BY status + """, (job_id,)) + + status_counts = {row[0]: row[1] for row in cursor.fetchall()} + conn.close() + + # Determine overall job status + if status_counts.get("running", 0) > 0: + new_status = "running" + elif status_counts.get("pending", 0) > 0: + # Some details still pending, job is either pending or running + current_job = self.get_job(job_id) + new_status = current_job["status"] # Keep current status + elif status_counts.get("failed", 0) > 0 and status_counts.get("completed", 0) > 0: + new_status = "partial" + elif status_counts.get("failed", 0) > 0: + new_status = "failed" + else: + new_status = "completed" + + self.update_job_status(job_id, new_status) + + # ========== Job Progress ========== + + def get_job_progress(self, job_id: str) -> Dict: + """ + Get detailed progress for a job. + + Returns: + { + "total_model_days": int, + "completed": int, + "failed": int, + "current": {"date": str, "model": str} | None, + "details": [ + {"date": str, "model": str, "status": str, "duration_seconds": float | None, "error": str | None}, + ... + ] + } + """ + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + # Get all details for this job + cursor.execute(""" + SELECT date, model, status, started_at, completed_at, duration_seconds, error + FROM job_details + WHERE job_id = ? + ORDER BY date ASC, model ASC + """, (job_id,)) + + rows = cursor.fetchall() + conn.close() + + if not rows: + return { + "total_model_days": 0, + "completed": 0, + "failed": 0, + "current": None, + "details": [] + } + + total = len(rows) + completed = sum(1 for row in rows if row[2] == "completed") + failed = sum(1 for row in rows if row[2] == "failed") + + # Find currently running model-day + current = None + for row in rows: + if row[2] == "running": + current = {"date": row[0], "model": row[1]} + break + + # Build details list + details = [] + for row in rows: + details.append({ + "date": row[0], + "model": row[1], + "status": row[2], + "started_at": row[3], + "completed_at": row[4], + "duration_seconds": row[5], + "error": row[6] + }) + + return { + "total_model_days": total, + "completed": completed, + "failed": failed, + "current": current, + "details": details + } + + # ========== Concurrency Control ========== + + def can_start_new_job(self) -> bool: + """Check if a new job can be started (max 1 concurrent job)""" + running_jobs = self.get_running_jobs() + return len(running_jobs) == 0 + + def find_job_by_date_range(self, date_range: List[str]) -> Optional[Dict]: + """Find job with exact matching date range (for idempotency check)""" + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + # Query recent jobs (last 24 hours) + cursor.execute(""" + SELECT * FROM jobs + WHERE created_at > datetime('now', '-1 day') + ORDER BY created_at DESC + """) + + rows = cursor.fetchall() + conn.close() + + # Check each job's date_range + target_range = set(date_range) + for row in rows: + job_range = set(json.loads(row[3])) # date_range column + if job_range == target_range: + return self._row_to_job_dict(row) + + return None + + # ========== Utility Methods ========== + + def _row_to_job_dict(self, row: tuple) -> Dict: + """Convert DB row to job dictionary""" + return { + "job_id": row[0], + "config_path": row[1], + "status": row[2], + "date_range": json.loads(row[3]), + "models": json.loads(row[4]), + "created_at": row[5], + "started_at": row[6], + "completed_at": row[7], + "total_duration_seconds": row[8], + "error": row[9] + } + + def cleanup_old_jobs(self, days: int = 30) -> int: + """ + Delete jobs older than specified days (cleanup maintenance). + + Returns: + Number of jobs deleted + """ + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + cursor.execute(""" + DELETE FROM jobs + WHERE created_at < datetime('now', '-' || ? || ' days') + """, (days,)) + + deleted_count = cursor.rowcount + conn.commit() + conn.close() + + return deleted_count +``` + +--- + +## 4. Database Utility Module + +```python +# api/database.py + +import sqlite3 +from typing import Optional +import os + +def get_db_connection(db_path: str = "data/jobs.db") -> sqlite3.Connection: + """ + Get SQLite database connection. + + Ensures: + - Database directory exists + - Foreign keys are enabled + - Row factory returns dict-like objects + """ + # Ensure data directory exists + os.makedirs(os.path.dirname(db_path), exist_ok=True) + + conn = sqlite3.connect(db_path, check_same_thread=False) + conn.execute("PRAGMA foreign_keys = ON") # Enable FK constraints + conn.row_factory = sqlite3.Row # Return rows as dict-like objects + + return conn + +def initialize_database(db_path: str = "data/jobs.db") -> None: + """Create database tables if they don't exist""" + conn = get_db_connection(db_path) + cursor = conn.cursor() + + # Create jobs table + cursor.execute(""" + CREATE TABLE IF NOT EXISTS jobs ( + job_id TEXT PRIMARY KEY, + config_path TEXT NOT NULL, + status TEXT NOT NULL CHECK(status IN ('pending', 'running', 'completed', 'partial', 'failed')), + date_range TEXT NOT NULL, + models TEXT NOT NULL, + created_at TEXT NOT NULL, + started_at TEXT, + completed_at TEXT, + total_duration_seconds REAL, + error TEXT + ) + """) + + # Create indexes + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_jobs_status ON jobs(status) + """) + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_jobs_created_at ON jobs(created_at DESC) + """) + + # Create job_details table + cursor.execute(""" + CREATE TABLE IF NOT EXISTS job_details ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + job_id TEXT NOT NULL, + date TEXT NOT NULL, + model TEXT NOT NULL, + status TEXT NOT NULL CHECK(status IN ('pending', 'running', 'completed', 'failed')), + started_at TEXT, + completed_at TEXT, + duration_seconds REAL, + error TEXT, + FOREIGN KEY (job_id) REFERENCES jobs(job_id) ON DELETE CASCADE + ) + """) + + # Create indexes + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_job_details_job_id ON job_details(job_id) + """) + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_job_details_status ON job_details(status) + """) + cursor.execute(""" + CREATE UNIQUE INDEX IF NOT EXISTS idx_job_details_unique + ON job_details(job_id, date, model) + """) + + conn.commit() + conn.close() +``` + +--- + +## 5. State Transitions + +### 5.1 Job Status State Machine + +``` +pending โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€> running โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€> completed + โ”‚ โ”‚ + โ”‚ โ”‚ + โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€> partial + โ”‚ โ”‚ + โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€> failed +``` + +**Transition Logic:** +- `pending โ†’ running`: When first model-day starts executing +- `running โ†’ completed`: When all model-days complete successfully +- `running โ†’ partial`: When some model-days succeed, some fail +- `running โ†’ failed`: When all model-days fail (rare) + +### 5.2 Job Detail Status State Machine + +``` +pending โ”€โ”€โ”€โ”€โ”€โ”€> running โ”€โ”€โ”€โ”€โ”€โ”€> completed + โ”‚ + โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€> failed +``` + +**Transition Logic:** +- `pending โ†’ running`: When worker starts executing that model-day +- `running โ†’ completed`: When `agent.run_trading_session()` succeeds +- `running โ†’ failed`: When `agent.run_trading_session()` raises exception after retries + +--- + +## 6. Concurrency Scenarios + +### 6.1 Scenario: Duplicate Trigger Requests + +**Timeline:** +1. Request A: POST /simulate/trigger โ†’ Job created with date_range=[2025-01-16, 2025-01-17] +2. Request B (5 seconds later): POST /simulate/trigger โ†’ Same date range + +**Expected Behavior:** +- Request A: Returns `{"job_id": "abc123", "status": "accepted"}` +- Request B: `find_job_by_date_range()` finds Job abc123 +- Request B: Returns `{"job_id": "abc123", "status": "running", ...}` (same job) + +**Code:** +```python +# In /simulate/trigger endpoint +existing_job = job_manager.find_job_by_date_range(date_range) +if existing_job: + # Return existing job instead of creating duplicate + return existing_job +``` + +### 6.2 Scenario: Concurrent Jobs with Different Dates + +**Timeline:** +1. Job A running: date_range=[2025-01-01 to 2025-01-10] (started 5 min ago) +2. Request: POST /simulate/trigger with date_range=[2025-01-11 to 2025-01-15] + +**Expected Behavior:** +- `can_start_new_job()` returns False (Job A is still running) +- Request returns 409 Conflict with details of Job A + +### 6.3 Scenario: Job Cleanup on API Restart + +**Problem:** API crashes while job is running. On restart, job stuck in "running" state. + +**Solution:** On API startup, detect stale jobs and mark as failed: +```python +# In api/main.py startup event +@app.on_event("startup") +async def startup_event(): + job_manager = JobManager() + + # Find jobs stuck in 'running' or 'pending' state + stale_jobs = job_manager.get_running_jobs() + + for job in stale_jobs: + # Mark as failed with explanation + job_manager.update_job_status( + job["job_id"], + "failed", + error="API restarted while job was running" + ) +``` + +--- + +## 7. Testing Strategy + +### 7.1 Unit Tests + +```python +# tests/test_job_manager.py + +import pytest +from api.job_manager import JobManager +import tempfile +import os + +@pytest.fixture +def job_manager(): + # Use temporary database for tests + temp_db = tempfile.NamedTemporaryFile(delete=False, suffix=".db") + temp_db.close() + + jm = JobManager(db_path=temp_db.name) + yield jm + + # Cleanup + os.unlink(temp_db.name) + +def test_create_job(job_manager): + job_id = job_manager.create_job( + config_path="configs/test.json", + date_range=["2025-01-16", "2025-01-17"], + models=["gpt-5", "claude-3.7-sonnet"] + ) + + assert job_id is not None + job = job_manager.get_job(job_id) + assert job["status"] == "pending" + assert job["date_range"] == ["2025-01-16", "2025-01-17"] + + # Check job_details created + progress = job_manager.get_job_progress(job_id) + assert progress["total_model_days"] == 4 # 2 dates ร— 2 models + +def test_concurrent_job_blocked(job_manager): + # Create first job + job1_id = job_manager.create_job("configs/test.json", ["2025-01-16"], ["gpt-5"]) + + # Try to create second job while first is pending + with pytest.raises(ValueError, match="Another simulation job is already running"): + job_manager.create_job("configs/test.json", ["2025-01-17"], ["gpt-5"]) + + # Mark first job as completed + job_manager.update_job_status(job1_id, "completed") + + # Now second job should be allowed + job2_id = job_manager.create_job("configs/test.json", ["2025-01-17"], ["gpt-5"]) + assert job2_id is not None + +def test_job_status_transitions(job_manager): + job_id = job_manager.create_job("configs/test.json", ["2025-01-16"], ["gpt-5"]) + + # Update job detail to running + job_manager.update_job_detail_status(job_id, "2025-01-16", "gpt-5", "running") + + # Job should now be 'running' + job = job_manager.get_job(job_id) + assert job["status"] == "running" + assert job["started_at"] is not None + + # Complete the detail + job_manager.update_job_detail_status(job_id, "2025-01-16", "gpt-5", "completed") + + # Job should now be 'completed' + job = job_manager.get_job(job_id) + assert job["status"] == "completed" + assert job["completed_at"] is not None + +def test_partial_job_status(job_manager): + job_id = job_manager.create_job( + "configs/test.json", + ["2025-01-16"], + ["gpt-5", "claude-3.7-sonnet"] + ) + + # One model succeeds + job_manager.update_job_detail_status(job_id, "2025-01-16", "gpt-5", "running") + job_manager.update_job_detail_status(job_id, "2025-01-16", "gpt-5", "completed") + + # One model fails + job_manager.update_job_detail_status(job_id, "2025-01-16", "claude-3.7-sonnet", "running") + job_manager.update_job_detail_status( + job_id, "2025-01-16", "claude-3.7-sonnet", "failed", + error="API timeout" + ) + + # Job should be 'partial' + job = job_manager.get_job(job_id) + assert job["status"] == "partial" + + progress = job_manager.get_job_progress(job_id) + assert progress["completed"] == 1 + assert progress["failed"] == 1 +``` + +--- + +## 8. Performance Considerations + +### 8.1 Database Indexing + +- `idx_jobs_status`: Fast filtering for running jobs +- `idx_jobs_created_at DESC`: Fast retrieval of most recent job +- `idx_job_details_unique`: Prevent duplicate model-day entries + +### 8.2 Connection Pooling + +For MVP, using `sqlite3.connect()` per operation is acceptable (low concurrency). + +For higher concurrency (future), consider: +- SQLAlchemy ORM with connection pooling +- PostgreSQL for production deployments + +### 8.3 Query Optimization + +**Avoid N+1 queries:** +```python +# BAD: Separate query for each job's progress +for job in jobs: + progress = job_manager.get_job_progress(job["job_id"]) + +# GOOD: Join jobs and job_details in single query +SELECT + jobs.*, + COUNT(job_details.id) as total, + SUM(CASE WHEN job_details.status = 'completed' THEN 1 ELSE 0 END) as completed +FROM jobs +LEFT JOIN job_details ON jobs.job_id = job_details.job_id +GROUP BY jobs.job_id +``` + +--- + +## 9. Error Handling + +### 9.1 Database Errors + +**Scenario:** SQLite database is locked or corrupted + +**Handling:** +```python +try: + job_id = job_manager.create_job(...) +except sqlite3.OperationalError as e: + # Database locked - retry with exponential backoff + logger.error(f"Database error: {e}") + raise HTTPException(status_code=503, detail="Database temporarily unavailable") +except sqlite3.IntegrityError as e: + # Constraint violation (e.g., duplicate job_id) + logger.error(f"Integrity error: {e}") + raise HTTPException(status_code=400, detail="Invalid job data") +``` + +### 9.2 Foreign Key Violations + +**Scenario:** Attempt to create job_detail for non-existent job + +**Prevention:** +- Always create job record before job_details records +- Use transactions to ensure atomicity + +```python +def create_job(self, ...): + conn = get_db_connection(self.db_path) + try: + cursor = conn.cursor() + + # Insert job + cursor.execute("INSERT INTO jobs ...") + + # Insert job_details + for date in date_range: + for model in models: + cursor.execute("INSERT INTO job_details ...") + + conn.commit() # Atomic commit + except Exception as e: + conn.rollback() # Rollback on any error + raise + finally: + conn.close() +``` + +--- + +## 10. Migration Strategy + +### 10.1 Schema Versioning + +For future schema changes, use migration scripts: + +``` +data/ +โ””โ”€โ”€ migrations/ + โ”œโ”€โ”€ 001_initial_schema.sql + โ”œโ”€โ”€ 002_add_priority_column.sql + โ””โ”€โ”€ ... +``` + +Track applied migrations in database: +```sql +CREATE TABLE IF NOT EXISTS schema_migrations ( + version INTEGER PRIMARY KEY, + applied_at TEXT NOT NULL +); +``` + +### 10.2 Backward Compatibility + +When adding columns: +- Use `ALTER TABLE ADD COLUMN ... DEFAULT ...` for backward compatibility +- Never remove columns (deprecate instead) +- Version API responses to handle schema changes + +--- + +## Summary + +The Job Manager provides: +1. **Robust job tracking** with SQLite persistence +2. **Concurrency control** ensuring single-job execution +3. **Granular progress monitoring** at model-day level +4. **Flexible status handling** (completed/partial/failed) +5. **Idempotency** for duplicate trigger requests + +Next specification: **Background Worker Architecture** diff --git a/docs/testing-specification.md b/docs/testing-specification.md new file mode 100644 index 0000000..7fab38d --- /dev/null +++ b/docs/testing-specification.md @@ -0,0 +1,1155 @@ +# Comprehensive Testing Suite Specification + +## 1. Overview + +This document defines the complete testing strategy, test suite structure, coverage requirements, and quality thresholds for the AI-Trader API service. + +**Testing Philosophy:** +- **Test-Driven Development (TDD)** for critical paths +- **High coverage** (โ‰ฅ85%) for production code +- **Fast feedback** - unit tests run in <10 seconds +- **Realistic integration tests** with test database +- **Performance benchmarks** to catch regressions +- **Security testing** for API vulnerabilities + +--- + +## 2. Testing Thresholds & Requirements + +### 2.1 Code Coverage + +| Component | Minimum Coverage | Target Coverage | Notes | +|-----------|-----------------|-----------------|-------| +| **api/job_manager.py** | 90% | 95% | Critical - job lifecycle | +| **api/worker.py** | 85% | 90% | Core execution logic | +| **api/executor.py** | 85% | 90% | Model-day execution | +| **api/results_service.py** | 90% | 95% | Data retrieval | +| **api/database.py** | 95% | 100% | Database utilities | +| **api/runtime_manager.py** | 85% | 90% | Config isolation | +| **api/main.py** | 80% | 85% | API endpoints | +| **Overall** | **85%** | **90%** | **Project minimum** | + +**Enforcement:** +- CI/CD pipeline **fails** if coverage drops below minimum +- Coverage report generated on every commit +- Uncovered lines flagged in PR reviews + +--- + +### 2.2 Performance Thresholds + +| Metric | Threshold | Test Method | +|--------|-----------|-------------| +| **Unit test suite** | < 10 seconds | `pytest tests/unit/` | +| **Integration test suite** | < 60 seconds | `pytest tests/integration/` | +| **API endpoint `/simulate/trigger`** | < 500ms | Load testing | +| **API endpoint `/simulate/status`** | < 100ms | Load testing | +| **API endpoint `/results?detail=minimal`** | < 200ms | Load testing | +| **API endpoint `/results?detail=full`** | < 1 second | Load testing | +| **Database query (get_job)** | < 50ms | Benchmark tests | +| **Database query (get_job_progress)** | < 100ms | Benchmark tests | +| **Simulation (single model-day)** | 30-60s | Acceptance test | + +**Enforcement:** +- Performance tests run nightly +- Alerts triggered if thresholds exceeded +- Benchmark results tracked over time + +--- + +### 2.3 Quality Gates + +**All PRs must pass:** +1. โœ… All tests passing (unit + integration) +2. โœ… Code coverage โ‰ฅ 85% +3. โœ… No critical security vulnerabilities (Bandit scan) +4. โœ… Linting passes (Ruff or Flake8) +5. โœ… Type checking passes (mypy with strict mode) +6. โœ… No performance regressions (ยฑ10% tolerance) + +**Release checklist:** +1. โœ… All quality gates pass +2. โœ… End-to-end tests pass in Docker +3. โœ… Load testing passes (100 concurrent requests) +4. โœ… Security scan passes (OWASP ZAP) +5. โœ… Manual smoke tests complete + +--- + +## 3. Test Suite Structure + +``` +tests/ +โ”œโ”€โ”€ __init__.py +โ”œโ”€โ”€ conftest.py # Shared pytest fixtures +โ”‚ +โ”œโ”€โ”€ unit/ # Fast, isolated tests +โ”‚ โ”œโ”€โ”€ __init__.py +โ”‚ โ”œโ”€โ”€ test_job_manager.py # JobManager CRUD operations +โ”‚ โ”œโ”€โ”€ test_database.py # Database utilities +โ”‚ โ”œโ”€โ”€ test_runtime_manager.py # Config isolation +โ”‚ โ”œโ”€โ”€ test_results_service.py # Results queries +โ”‚ โ””โ”€โ”€ test_models.py # Pydantic model validation +โ”‚ +โ”œโ”€โ”€ integration/ # Tests with real dependencies +โ”‚ โ”œโ”€โ”€ __init__.py +โ”‚ โ”œโ”€โ”€ test_api_endpoints.py # FastAPI endpoint tests +โ”‚ โ”œโ”€โ”€ test_worker.py # Job execution workflow +โ”‚ โ”œโ”€โ”€ test_executor.py # Model-day execution +โ”‚ โ””โ”€โ”€ test_end_to_end.py # Complete simulation flow +โ”‚ +โ”œโ”€โ”€ performance/ # Benchmark and load tests +โ”‚ โ”œโ”€โ”€ __init__.py +โ”‚ โ”œโ”€โ”€ test_database_benchmarks.py +โ”‚ โ”œโ”€โ”€ test_api_load.py # Locust or pytest-benchmark +โ”‚ โ””โ”€โ”€ test_simulation_timing.py +โ”‚ +โ”œโ”€โ”€ security/ # Security tests +โ”‚ โ”œโ”€โ”€ __init__.py +โ”‚ โ”œโ”€โ”€ test_api_security.py # Input validation, injection +โ”‚ โ””โ”€โ”€ test_auth.py # Future: API key validation +โ”‚ +โ””โ”€โ”€ e2e/ # End-to-end with Docker + โ”œโ”€โ”€ __init__.py + โ””โ”€โ”€ test_docker_workflow.py # Full Docker compose scenario +``` + +--- + +## 4. Unit Tests + +### 4.1 test_job_manager.py + +```python +# tests/unit/test_job_manager.py + +import pytest +import tempfile +import os +from datetime import datetime, timedelta +from api.job_manager import JobManager + +@pytest.fixture +def job_manager(): + """Create JobManager with temporary database""" + temp_db = tempfile.NamedTemporaryFile(delete=False, suffix=".db") + temp_db.close() + + jm = JobManager(db_path=temp_db.name) + yield jm + + # Cleanup + os.unlink(temp_db.name) + + +class TestJobCreation: + """Test job creation and validation""" + + def test_create_job_success(self, job_manager): + """Should create job with pending status""" + job_id = job_manager.create_job( + config_path="configs/test.json", + date_range=["2025-01-16", "2025-01-17"], + models=["gpt-5", "claude-3.7-sonnet"] + ) + + assert job_id is not None + job = job_manager.get_job(job_id) + assert job["status"] == "pending" + assert job["date_range"] == ["2025-01-16", "2025-01-17"] + assert job["models"] == ["gpt-5", "claude-3.7-sonnet"] + assert job["created_at"] is not None + + def test_create_job_with_job_details(self, job_manager): + """Should create job_details for each model-day""" + job_id = job_manager.create_job( + config_path="configs/test.json", + date_range=["2025-01-16", "2025-01-17"], + models=["gpt-5"] + ) + + progress = job_manager.get_job_progress(job_id) + assert progress["total_model_days"] == 2 # 2 dates ร— 1 model + assert progress["completed"] == 0 + assert progress["failed"] == 0 + + def test_create_job_blocks_concurrent(self, job_manager): + """Should prevent creating second job while first is pending""" + job1_id = job_manager.create_job( + "configs/test.json", + ["2025-01-16"], + ["gpt-5"] + ) + + with pytest.raises(ValueError, match="Another simulation job is already running"): + job_manager.create_job( + "configs/test.json", + ["2025-01-17"], + ["gpt-5"] + ) + + def test_create_job_after_completion(self, job_manager): + """Should allow new job after previous completes""" + job1_id = job_manager.create_job( + "configs/test.json", + ["2025-01-16"], + ["gpt-5"] + ) + + job_manager.update_job_status(job1_id, "completed") + + # Now second job should be allowed + job2_id = job_manager.create_job( + "configs/test.json", + ["2025-01-17"], + ["gpt-5"] + ) + assert job2_id is not None + + +class TestJobStatusTransitions: + """Test job status state machine""" + + def test_pending_to_running(self, job_manager): + """Should transition from pending to running""" + job_id = job_manager.create_job( + "configs/test.json", + ["2025-01-16"], + ["gpt-5"] + ) + + # Update detail to running + job_manager.update_job_detail_status(job_id, "2025-01-16", "gpt-5", "running") + + job = job_manager.get_job(job_id) + assert job["status"] == "running" + assert job["started_at"] is not None + + def test_running_to_completed(self, job_manager): + """Should transition to completed when all details complete""" + job_id = job_manager.create_job( + "configs/test.json", + ["2025-01-16"], + ["gpt-5"] + ) + + job_manager.update_job_detail_status(job_id, "2025-01-16", "gpt-5", "running") + job_manager.update_job_detail_status(job_id, "2025-01-16", "gpt-5", "completed") + + job = job_manager.get_job(job_id) + assert job["status"] == "completed" + assert job["completed_at"] is not None + assert job["total_duration_seconds"] is not None + + def test_partial_completion(self, job_manager): + """Should mark as partial when some models fail""" + job_id = job_manager.create_job( + "configs/test.json", + ["2025-01-16"], + ["gpt-5", "claude-3.7-sonnet"] + ) + + # First model succeeds + job_manager.update_job_detail_status(job_id, "2025-01-16", "gpt-5", "running") + job_manager.update_job_detail_status(job_id, "2025-01-16", "gpt-5", "completed") + + # Second model fails + job_manager.update_job_detail_status(job_id, "2025-01-16", "claude-3.7-sonnet", "running") + job_manager.update_job_detail_status( + job_id, "2025-01-16", "claude-3.7-sonnet", "failed", + error="API timeout" + ) + + job = job_manager.get_job(job_id) + assert job["status"] == "partial" + + progress = job_manager.get_job_progress(job_id) + assert progress["completed"] == 1 + assert progress["failed"] == 1 + + +class TestJobRetrieval: + """Test job query operations""" + + def test_get_nonexistent_job(self, job_manager): + """Should return None for nonexistent job""" + job = job_manager.get_job("nonexistent-id") + assert job is None + + def test_get_current_job(self, job_manager): + """Should return most recent job""" + job1_id = job_manager.create_job("configs/test.json", ["2025-01-16"], ["gpt-5"]) + job_manager.update_job_status(job1_id, "completed") + + job2_id = job_manager.create_job("configs/test.json", ["2025-01-17"], ["gpt-5"]) + + current = job_manager.get_current_job() + assert current["job_id"] == job2_id + + def test_find_job_by_date_range(self, job_manager): + """Should find existing job with same date range""" + job_id = job_manager.create_job( + "configs/test.json", + ["2025-01-16", "2025-01-17"], + ["gpt-5"] + ) + + found = job_manager.find_job_by_date_range(["2025-01-16", "2025-01-17"]) + assert found["job_id"] == job_id + + +class TestJobProgress: + """Test job progress tracking""" + + def test_progress_all_pending(self, job_manager): + """Should show 0 completed when all pending""" + job_id = job_manager.create_job( + "configs/test.json", + ["2025-01-16", "2025-01-17"], + ["gpt-5"] + ) + + progress = job_manager.get_job_progress(job_id) + assert progress["total_model_days"] == 2 + assert progress["completed"] == 0 + assert progress["failed"] == 0 + assert progress["current"] is None + + def test_progress_with_running(self, job_manager): + """Should identify currently running model-day""" + job_id = job_manager.create_job( + "configs/test.json", + ["2025-01-16"], + ["gpt-5"] + ) + + job_manager.update_job_detail_status(job_id, "2025-01-16", "gpt-5", "running") + + progress = job_manager.get_job_progress(job_id) + assert progress["current"] == {"date": "2025-01-16", "model": "gpt-5"} + + def test_progress_details(self, job_manager): + """Should return detailed progress for all model-days""" + job_id = job_manager.create_job( + "configs/test.json", + ["2025-01-16"], + ["gpt-5", "claude-3.7-sonnet"] + ) + + job_manager.update_job_detail_status(job_id, "2025-01-16", "gpt-5", "completed") + + progress = job_manager.get_job_progress(job_id) + assert len(progress["details"]) == 2 + assert progress["details"][0]["model"] == "gpt-5" + assert progress["details"][0]["status"] == "completed" + + +class TestJobCleanup: + """Test maintenance operations""" + + def test_cleanup_old_jobs(self, job_manager): + """Should delete jobs older than threshold""" + # Create old job (manually set created_at) + from api.database import get_db_connection + conn = get_db_connection(job_manager.db_path) + cursor = conn.cursor() + + old_date = (datetime.utcnow() - timedelta(days=35)).isoformat() + "Z" + cursor.execute(""" + INSERT INTO jobs (job_id, config_path, status, date_range, models, created_at) + VALUES (?, ?, ?, ?, ?, ?) + """, ("old-job", "configs/test.json", "completed", '["2025-01-01"]', '["gpt-5"]', old_date)) + conn.commit() + conn.close() + + # Create recent job + recent_id = job_manager.create_job("configs/test.json", ["2025-01-16"], ["gpt-5"]) + + # Cleanup jobs older than 30 days + deleted = job_manager.cleanup_old_jobs(days=30) + + assert deleted["jobs_deleted"] == 1 + assert job_manager.get_job("old-job") is None + assert job_manager.get_job(recent_id) is not None + + +# ========== Coverage Target: 95% for job_manager.py ========== +``` + +--- + +### 4.2 test_results_service.py + +```python +# tests/unit/test_results_service.py + +import pytest +import tempfile +import os +from api.results_service import ResultsService +from api.database import get_db_connection + +@pytest.fixture +def results_service(): + """Create ResultsService with test data""" + temp_db = tempfile.NamedTemporaryFile(delete=False, suffix=".db") + temp_db.close() + + service = ResultsService(db_path=temp_db.name) + + # Populate test data + _populate_test_data(temp_db.name) + + yield service + + os.unlink(temp_db.name) + + +def _populate_test_data(db_path): + """Insert sample positions data""" + from api.database import initialize_database + initialize_database(db_path) + + conn = get_db_connection(db_path) + cursor = conn.cursor() + + # Insert sample job + cursor.execute(""" + INSERT INTO jobs (job_id, config_path, status, date_range, models, created_at) + VALUES (?, ?, ?, ?, ?, ?) + """, ("test-job", "configs/test.json", "completed", '["2025-01-16"]', '["gpt-5"]', "2025-01-16T00:00:00Z")) + + # Insert positions + cursor.execute(""" + INSERT INTO positions ( + job_id, date, model, action_id, action_type, symbol, amount, price, + cash, portfolio_value, daily_profit, daily_return_pct, + cumulative_profit, cumulative_return_pct, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, ("test-job", "2025-01-16", "gpt-5", 1, "buy", "AAPL", 10, 255.88, + 7441.2, 10000.0, 0.0, 0.0, 0.0, 0.0, "2025-01-16T09:30:00Z")) + + position_id = cursor.lastrowid + + # Insert holdings + cursor.execute(""" + INSERT INTO holdings (position_id, symbol, quantity) + VALUES (?, ?, ?) + """, (position_id, "AAPL", 10)) + + conn.commit() + conn.close() + + +class TestGetResults: + """Test results retrieval""" + + def test_get_results_minimal(self, results_service): + """Should return minimal results for date""" + results = results_service.get_results("2025-01-16", model="gpt-5", detail="minimal") + + assert results["date"] == "2025-01-16" + assert len(results["results"]) == 1 + assert results["results"][0]["model"] == "gpt-5" + assert "AAPL" in results["results"][0]["positions"] + assert results["results"][0]["positions"]["AAPL"] == 10 + assert results["results"][0]["positions"]["CASH"] == 7441.2 + + def test_get_results_nonexistent_date(self, results_service): + """Should return empty results for nonexistent date""" + results = results_service.get_results("2099-12-31", model="gpt-5") + assert results["results"] == [] + + def test_get_results_all_models(self, results_service): + """Should return all models when model not specified""" + results = results_service.get_results("2025-01-16") + assert len(results["results"]) >= 1 # At least one model + + +class TestPortfolioTimeseries: + """Test timeseries queries""" + + def test_get_timeseries(self, results_service): + """Should return portfolio values over time""" + timeseries = results_service.get_portfolio_timeseries("gpt-5") + + assert len(timeseries) >= 1 + assert timeseries[0]["date"] == "2025-01-16" + assert "portfolio_value" in timeseries[0] + + def test_get_timeseries_with_date_range(self, results_service): + """Should filter by date range""" + timeseries = results_service.get_portfolio_timeseries( + "gpt-5", + start_date="2025-01-16", + end_date="2025-01-16" + ) + + assert len(timeseries) == 1 + + +class TestLeaderboard: + """Test leaderboard generation""" + + def test_get_leaderboard(self, results_service): + """Should rank models by portfolio value""" + leaderboard = results_service.get_leaderboard() + + assert len(leaderboard) >= 1 + assert leaderboard[0]["rank"] == 1 + assert "portfolio_value" in leaderboard[0] + + def test_leaderboard_for_specific_date(self, results_service): + """Should generate leaderboard for specific date""" + leaderboard = results_service.get_leaderboard(date="2025-01-16") + assert len(leaderboard) >= 1 + + +# ========== Coverage Target: 95% for results_service.py ========== +``` + +--- + +## 5. Integration Tests + +### 5.1 test_api_endpoints.py + +```python +# tests/integration/test_api_endpoints.py + +import pytest +from fastapi.testclient import TestClient +from api.main import app +import tempfile +import os + +@pytest.fixture +def client(): + """Create test client with temporary database""" + temp_db = tempfile.NamedTemporaryFile(delete=False, suffix=".db") + temp_db.close() + + # Override database path for testing + os.environ["TEST_DB_PATH"] = temp_db.name + + client = TestClient(app) + yield client + + os.unlink(temp_db.name) + + +class TestTriggerEndpoint: + """Test /simulate/trigger endpoint""" + + def test_trigger_simulation_success(self, client): + """Should accept simulation trigger and return job_id""" + response = client.post("/simulate/trigger", json={ + "config_path": "configs/test.json" + }) + + assert response.status_code == 202 + data = response.json() + assert "job_id" in data + assert data["status"] == "accepted" + assert "date_range" in data + assert "models" in data + + def test_trigger_simulation_already_running(self, client): + """Should return existing job if already running""" + # First request + response1 = client.post("/simulate/trigger", json={ + "config_path": "configs/test.json" + }) + job_id_1 = response1.json()["job_id"] + + # Second request (before first completes) + response2 = client.post("/simulate/trigger", json={ + "config_path": "configs/test.json" + }) + + # Should return same job_id + assert response2.status_code in (200, 202) + # job_id_2 = response2.json()["job_id"] + # assert job_id_1 == job_id_2 # TODO: Fix based on actual implementation + + def test_trigger_simulation_invalid_config(self, client): + """Should return 400 for invalid config path""" + response = client.post("/simulate/trigger", json={ + "config_path": "nonexistent.json" + }) + + assert response.status_code == 400 + + +class TestStatusEndpoint: + """Test /simulate/status/{job_id} endpoint""" + + def test_get_status_success(self, client): + """Should return job status""" + # Create job first + trigger_response = client.post("/simulate/trigger", json={ + "config_path": "configs/test.json" + }) + job_id = trigger_response.json()["job_id"] + + # Get status + response = client.get(f"/simulate/status/{job_id}") + + assert response.status_code == 200 + data = response.json() + assert data["job_id"] == job_id + assert data["status"] in ("pending", "running", "completed", "partial", "failed") + assert "progress" in data + + def test_get_status_nonexistent(self, client): + """Should return 404 for nonexistent job""" + response = client.get("/simulate/status/nonexistent-id") + assert response.status_code == 404 + + +class TestResultsEndpoint: + """Test /results endpoint""" + + def test_get_results_success(self, client): + """Should return simulation results""" + # TODO: Populate test data first + response = client.get("/results", params={ + "date": "2025-01-16", + "model": "gpt-5", + "detail": "minimal" + }) + + # May be 404 if no data, or 200 if test data exists + assert response.status_code in (200, 404) + + def test_get_results_invalid_date(self, client): + """Should return 400 for invalid date format""" + response = client.get("/results", params={ + "date": "invalid-date" + }) + + assert response.status_code == 400 + + +class TestHealthEndpoint: + """Test /health endpoint""" + + def test_health_check(self, client): + """Should return healthy status""" + response = client.get("/health") + + assert response.status_code in (200, 503) # May be 503 if MCP services not running + data = response.json() + assert "status" in data + assert "services" in data + + +# ========== Coverage Target: 85% for main.py ========== +``` + +--- + +## 6. Performance Tests + +```python +# tests/performance/test_api_load.py + +import pytest +from locust import HttpUser, task, between +import time + +class AITraderAPIUser(HttpUser): + """Simulate API user load""" + wait_time = between(1, 3) # Wait 1-3 seconds between requests + + @task(3) + def get_health(self): + """Most common endpoint""" + self.client.get("/health") + + @task(2) + def get_results(self): + """Fetch results""" + self.client.get("/results?date=2025-01-16&model=gpt-5&detail=minimal") + + @task(1) + def trigger_simulation(self): + """Less common - trigger simulation""" + self.client.post("/simulate/trigger", json={ + "config_path": "configs/test.json" + }) + + +# Run with: locust -f tests/performance/test_api_load.py --host=http://localhost:8080 +``` + +```python +# tests/performance/test_database_benchmarks.py + +import pytest +from api.job_manager import JobManager +import time + +@pytest.mark.benchmark +def test_create_job_performance(benchmark, job_manager): + """Benchmark job creation time""" + result = benchmark( + job_manager.create_job, + "configs/test.json", + ["2025-01-16"], + ["gpt-5"] + ) + + # Should complete in < 50ms + assert benchmark.stats.mean < 0.05 + + +@pytest.mark.benchmark +def test_get_job_performance(benchmark, job_manager): + """Benchmark job retrieval time""" + # Create job first + job_id = job_manager.create_job("configs/test.json", ["2025-01-16"], ["gpt-5"]) + + result = benchmark(job_manager.get_job, job_id) + + # Should complete in < 10ms + assert benchmark.stats.mean < 0.01 + + +# Run with: pytest tests/performance/ --benchmark-only +``` + +--- + +## 7. Security Tests + +```python +# tests/security/test_api_security.py + +import pytest +from fastapi.testclient import TestClient +from api.main import app + +client = TestClient(app) + + +class TestInputValidation: + """Test input validation and sanitization""" + + def test_sql_injection_protection(self): + """Should reject SQL injection attempts""" + response = client.get("/results", params={ + "date": "2025-01-16' OR '1'='1", + "model": "gpt-5" + }) + + # Should return 400 (invalid date format), not execute SQL + assert response.status_code == 400 + + def test_path_traversal_protection(self): + """Should reject path traversal attempts""" + response = client.post("/simulate/trigger", json={ + "config_path": "../../etc/passwd" + }) + + # Should reject or return 404 + assert response.status_code in (400, 404) + + def test_xss_protection(self): + """Should sanitize XSS attempts""" + response = client.post("/simulate/trigger", json={ + "config_path": "" + }) + + assert response.status_code in (400, 404) + # Response should not contain unsanitized script + assert "