feat: transform to REST API service with SQLite persistence (v0.3.0)

Major architecture transformation from batch-only to API service with database persistence for Windmill integration. ## REST API Implementation - POST /simulate/trigger - Start simulation jobs - GET /simulate/status/{job_id} - Monitor job progress - GET /results - Query results with filters (job_id, date, model) - GET /health - Service health checks ## Database Layer - SQLite persistence with 6 tables (jobs, job_details, positions, holdings, reasoning_logs, tool_usage) - Foreign key constraints with cascade deletes - Replaces JSONL file storage ## Backend Components - JobManager: Job lifecycle management with concurrency control - RuntimeConfigManager: Thread-safe isolated runtime configs - ModelDayExecutor: Single model-day execution engine - SimulationWorker: Date-sequential, model-parallel orchestration ## Testing - 102 unit and integration tests (85% coverage) - Database: 98% coverage - Job manager: 98% coverage - API endpoints: 81% coverage - Pydantic models: 100% coverage - TDD approach throughout ## Docker Deployment - Dual-mode: API server (persistent) + batch (one-time) - Health checks with 30s interval - Volume persistence for database and logs - Separate entrypoints for each mode ## Validation Tools - scripts/validate_docker_build.sh - Build validation - scripts/test_api_endpoints.sh - Complete API testing - scripts/test_batch_mode.sh - Batch mode validation - DOCKER_API.md - Deployment guide - TESTING_GUIDE.md - Testing procedures ## Configuration - API_PORT environment variable (default: 8080) - Backwards compatible with existing configs - FastAPI, uvicorn, pydantic>=2.0 dependencies Co-Authored-By: AI Assistant <noreply@example.com>
2026-04-01 17:17:24 -04:00 · 2025-10-31 11:47:10 -04:00
parent 5da02b4ba0
commit fb9583b374
45 changed files with 13775 additions and 18 deletions
--- a/api/database.py
+++ b/api/database.py
@@ -0,0 +1,307 @@
+"""
+Database utilities and schema management for AI-Trader API.
+
+This module provides:
+- SQLite connection management
+- Database schema initialization (6 tables)
+- ACID-compliant transaction support
+"""
+
+import sqlite3
+from pathlib import Path
+from typing import Optional
+import os
+
+
+def get_db_connection(db_path: str = "data/jobs.db") -> sqlite3.Connection:
+    """
+    Get SQLite database connection with proper configuration.
+
+    Args:
+        db_path: Path to SQLite database file
+
+    Returns:
+        Configured SQLite connection
+
+    Configuration:
+        - Foreign keys enabled for referential integrity
+        - Row factory for dict-like access
+        - Check same thread disabled for FastAPI async compatibility
+    """
+    # Ensure data directory exists
+    db_path_obj = Path(db_path)
+    db_path_obj.parent.mkdir(parents=True, exist_ok=True)
+
+    conn = sqlite3.connect(db_path, check_same_thread=False)
+    conn.execute("PRAGMA foreign_keys = ON")
+    conn.row_factory = sqlite3.Row
+
+    return conn
+
+
+def initialize_database(db_path: str = "data/jobs.db") -> None:
+    """
+    Create all database tables with enhanced schema.
+
+    Tables created:
+        1. jobs - High-level job metadata and status
+        2. job_details - Per model-day execution tracking
+        3. positions - Trading positions and P&L metrics
+        4. holdings - Portfolio holdings per position
+        5. reasoning_logs - AI decision logs (optional, for detail=full)
+        6. tool_usage - Tool usage statistics
+
+    Args:
+        db_path: Path to SQLite database file
+    """
+    conn = get_db_connection(db_path)
+    cursor = conn.cursor()
+
+    # Table 1: Jobs - Job metadata and lifecycle
+    cursor.execute("""
+        CREATE TABLE IF NOT EXISTS jobs (
+            job_id TEXT PRIMARY KEY,
+            config_path TEXT NOT NULL,
+            status TEXT NOT NULL CHECK(status IN ('pending', 'running', 'completed', 'partial', 'failed')),
+            date_range TEXT NOT NULL,
+            models TEXT NOT NULL,
+            created_at TEXT NOT NULL,
+            started_at TEXT,
+            updated_at TEXT,
+            completed_at TEXT,
+            total_duration_seconds REAL,
+            error TEXT
+        )
+    """)
+
+    # Table 2: Job Details - Per model-day execution
+    cursor.execute("""
+        CREATE TABLE IF NOT EXISTS job_details (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            job_id TEXT NOT NULL,
+            date TEXT NOT NULL,
+            model TEXT NOT NULL,
+            status TEXT NOT NULL CHECK(status IN ('pending', 'running', 'completed', 'failed')),
+            started_at TEXT,
+            completed_at TEXT,
+            duration_seconds REAL,
+            error TEXT,
+            FOREIGN KEY (job_id) REFERENCES jobs(job_id) ON DELETE CASCADE
+        )
+    """)
+
+    # Table 3: Positions - Trading positions and P&L
+    cursor.execute("""
+        CREATE TABLE IF NOT EXISTS positions (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            job_id TEXT NOT NULL,
+            date TEXT NOT NULL,
+            model TEXT NOT NULL,
+            action_id INTEGER NOT NULL,
+            action_type TEXT CHECK(action_type IN ('buy', 'sell', 'no_trade')),
+            symbol TEXT,
+            amount INTEGER,
+            price REAL,
+            cash REAL NOT NULL,
+            portfolio_value REAL NOT NULL,
+            daily_profit REAL,
+            daily_return_pct REAL,
+            cumulative_profit REAL,
+            cumulative_return_pct REAL,
+            created_at TEXT NOT NULL,
+            FOREIGN KEY (job_id) REFERENCES jobs(job_id) ON DELETE CASCADE
+        )
+    """)
+
+    # Table 4: Holdings - Portfolio holdings
+    cursor.execute("""
+        CREATE TABLE IF NOT EXISTS holdings (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            position_id INTEGER NOT NULL,
+            symbol TEXT NOT NULL,
+            quantity INTEGER NOT NULL,
+            FOREIGN KEY (position_id) REFERENCES positions(id) ON DELETE CASCADE
+        )
+    """)
+
+    # Table 5: Reasoning Logs - AI decision logs (optional)
+    cursor.execute("""
+        CREATE TABLE IF NOT EXISTS reasoning_logs (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            job_id TEXT NOT NULL,
+            date TEXT NOT NULL,
+            model TEXT NOT NULL,
+            step_number INTEGER NOT NULL,
+            timestamp TEXT NOT NULL,
+            role TEXT CHECK(role IN ('user', 'assistant', 'tool')),
+            content TEXT,
+            tool_name TEXT,
+            FOREIGN KEY (job_id) REFERENCES jobs(job_id) ON DELETE CASCADE
+        )
+    """)
+
+    # Table 6: Tool Usage - Tool usage statistics
+    cursor.execute("""
+        CREATE TABLE IF NOT EXISTS tool_usage (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            job_id TEXT NOT NULL,
+            date TEXT NOT NULL,
+            model TEXT NOT NULL,
+            tool_name TEXT NOT NULL,
+            call_count INTEGER NOT NULL DEFAULT 1,
+            total_duration_seconds REAL,
+            FOREIGN KEY (job_id) REFERENCES jobs(job_id) ON DELETE CASCADE
+        )
+    """)
+
+    # Create indexes for performance
+    _create_indexes(cursor)
+
+    conn.commit()
+    conn.close()
+
+
+def _create_indexes(cursor: sqlite3.Cursor) -> None:
+    """Create database indexes for query performance."""
+
+    # Jobs table indexes
+    cursor.execute("""
+        CREATE INDEX IF NOT EXISTS idx_jobs_status ON jobs(status)
+    """)
+    cursor.execute("""
+        CREATE INDEX IF NOT EXISTS idx_jobs_created_at ON jobs(created_at DESC)
+    """)
+
+    # Job details table indexes
+    cursor.execute("""
+        CREATE INDEX IF NOT EXISTS idx_job_details_job_id ON job_details(job_id)
+    """)
+    cursor.execute("""
+        CREATE INDEX IF NOT EXISTS idx_job_details_status ON job_details(status)
+    """)
+    cursor.execute("""
+        CREATE UNIQUE INDEX IF NOT EXISTS idx_job_details_unique
+        ON job_details(job_id, date, model)
+    """)
+
+    # Positions table indexes
+    cursor.execute("""
+        CREATE INDEX IF NOT EXISTS idx_positions_job_id ON positions(job_id)
+    """)
+    cursor.execute("""
+        CREATE INDEX IF NOT EXISTS idx_positions_date ON positions(date)
+    """)
+    cursor.execute("""
+        CREATE INDEX IF NOT EXISTS idx_positions_model ON positions(model)
+    """)
+    cursor.execute("""
+        CREATE INDEX IF NOT EXISTS idx_positions_date_model ON positions(date, model)
+    """)
+    cursor.execute("""
+        CREATE UNIQUE INDEX IF NOT EXISTS idx_positions_unique
+        ON positions(job_id, date, model, action_id)
+    """)
+
+    # Holdings table indexes
+    cursor.execute("""
+        CREATE INDEX IF NOT EXISTS idx_holdings_position_id ON holdings(position_id)
+    """)
+    cursor.execute("""
+        CREATE INDEX IF NOT EXISTS idx_holdings_symbol ON holdings(symbol)
+    """)
+
+    # Reasoning logs table indexes
+    cursor.execute("""
+        CREATE INDEX IF NOT EXISTS idx_reasoning_logs_job_date_model
+        ON reasoning_logs(job_id, date, model)
+    """)
+
+    # Tool usage table indexes
+    cursor.execute("""
+        CREATE INDEX IF NOT EXISTS idx_tool_usage_job_date_model
+        ON tool_usage(job_id, date, model)
+    """)
+
+
+def drop_all_tables(db_path: str = "data/jobs.db") -> None:
+    """
+    Drop all database tables. USE WITH CAUTION.
+
+    This is primarily for testing and development.
+
+    Args:
+        db_path: Path to SQLite database file
+    """
+    conn = get_db_connection(db_path)
+    cursor = conn.cursor()
+
+    tables = [
+        'tool_usage',
+        'reasoning_logs',
+        'holdings',
+        'positions',
+        'job_details',
+        'jobs'
+    ]
+
+    for table in tables:
+        cursor.execute(f"DROP TABLE IF EXISTS {table}")
+
+    conn.commit()
+    conn.close()
+
+
+def vacuum_database(db_path: str = "data/jobs.db") -> None:
+    """
+    Reclaim disk space after deletions.
+
+    Should be run periodically after cleanup operations.
+
+    Args:
+        db_path: Path to SQLite database file
+    """
+    conn = get_db_connection(db_path)
+    conn.execute("VACUUM")
+    conn.close()
+
+
+def get_database_stats(db_path: str = "data/jobs.db") -> dict:
+    """
+    Get database statistics for monitoring.
+
+    Returns:
+        Dictionary with table row counts and database size
+
+    Example:
+        {
+            "database_size_mb": 12.5,
+            "jobs": 150,
+            "job_details": 3000,
+            "positions": 15000,
+            "holdings": 45000,
+            "reasoning_logs": 300000,
+            "tool_usage": 12000
+        }
+    """
+    conn = get_db_connection(db_path)
+    cursor = conn.cursor()
+
+    stats = {}
+
+    # Get database file size
+    if os.path.exists(db_path):
+        size_bytes = os.path.getsize(db_path)
+        stats["database_size_mb"] = round(size_bytes / (1024 * 1024), 2)
+    else:
+        stats["database_size_mb"] = 0
+
+    # Get row counts for each table
+    tables = ['jobs', 'job_details', 'positions', 'holdings', 'reasoning_logs', 'tool_usage']
+
+    for table in tables:
+        cursor.execute(f"SELECT COUNT(*) FROM {table}")
+        stats[table] = cursor.fetchone()[0]
+
+    conn.close()
+
+    return stats