diff --git a/API_REFERENCE.md b/API_REFERENCE.md index 5bba390..4fe5eec 100644 --- a/API_REFERENCE.md +++ b/API_REFERENCE.md @@ -14,13 +14,19 @@ Complete reference for the AI-Trader-Server REST API service. Trigger a new simulation job for a specified date range and models. +**Supports three operational modes:** +1. **Explicit date range**: Provide both `start_date` and `end_date` +2. **Single date**: Set `start_date` = `end_date` +3. **Resume mode**: Set `start_date` to `null` to continue from each model's last completed date + **Request Body:** ```json { "start_date": "2025-01-16", "end_date": "2025-01-17", - "models": ["gpt-4", "claude-3.7-sonnet"] + "models": ["gpt-4", "claude-3.7-sonnet"], + "replace_existing": false } ``` @@ -28,9 +34,10 @@ Trigger a new simulation job for a specified date range and models. | Field | Type | Required | Description | |-------|------|----------|-------------| -| `start_date` | string | Yes | Start date in YYYY-MM-DD format | -| `end_date` | string | No | End date in YYYY-MM-DD format. If omitted, simulates single day (uses `start_date`) | -| `models` | array[string] | No | Model signatures to run. If omitted, uses all enabled models from server config | +| `start_date` | string \| null | No | Start date in YYYY-MM-DD format. If `null`, enables resume mode (each model continues from its last completed date). Defaults to `null`. | +| `end_date` | string | **Yes** | End date in YYYY-MM-DD format. **Required** - cannot be null or empty. | +| `models` | array[string] | No | Model signatures to run. If omitted, uses all enabled models from server config. | +| `replace_existing` | boolean | No | If `false` (default), skips already-completed model-days (idempotent). If `true`, re-runs all dates even if previously completed. | **Response (200 OK):** @@ -86,7 +93,8 @@ Trigger a new simulation job for a specified date range and models. - **Date format:** Must be YYYY-MM-DD - **Date validity:** Must be valid calendar dates -- **Date order:** `start_date` must be <= `end_date` +- **Date order:** `start_date` must be <= `end_date` (when `start_date` is not null) +- **end_date required:** Cannot be null or empty string - **Future dates:** Cannot simulate future dates (must be <= today) - **Date range limit:** Maximum 30 days (configurable via `MAX_SIMULATION_DAYS`) - **Model signatures:** Must match models defined in server configuration @@ -96,12 +104,21 @@ Trigger a new simulation job for a specified date range and models. 1. Validates date range and parameters 2. Determines which models to run (from request or server config) -3. Checks for missing price data in date range -4. Downloads missing data if `AUTO_DOWNLOAD_PRICE_DATA=true` (default) -5. Identifies trading dates with complete price data (all symbols available) -6. Creates job in database with status `pending` -7. Starts background worker thread -8. Returns immediately with job ID +3. **Resume mode** (if `start_date` is null): + - For each model, queries last completed simulation date + - If no previous data exists (cold start), uses `end_date` as single-day simulation + - Otherwise, resumes from day after last completed date + - Each model can have different resume start dates +4. **Idempotent mode** (if `replace_existing=false`, default): + - Queries database for already-completed model-day combinations in date range + - Skips completed model-days, only creates tasks for gaps + - Returns error if all requested dates are already completed +5. Checks for missing price data in date range +6. Downloads missing data if `AUTO_DOWNLOAD_PRICE_DATA=true` (default) +7. Identifies trading dates with complete price data (all symbols available) +8. Creates job in database with status `pending` (only for model-days that will actually run) +9. Starts background worker thread +10. Returns immediately with job ID **Examples:** @@ -111,6 +128,7 @@ curl -X POST http://localhost:8080/simulate/trigger \ -H "Content-Type: application/json" \ -d '{ "start_date": "2025-01-16", + "end_date": "2025-01-16", "models": ["gpt-4"] }' ``` @@ -125,6 +143,41 @@ curl -X POST http://localhost:8080/simulate/trigger \ }' ``` +Resume from last completed date: +```bash +curl -X POST http://localhost:8080/simulate/trigger \ + -H "Content-Type: application/json" \ + -d '{ + "start_date": null, + "end_date": "2025-01-31", + "models": ["gpt-4"] + }' +``` + +Idempotent simulation (skip already-completed dates): +```bash +curl -X POST http://localhost:8080/simulate/trigger \ + -H "Content-Type: application/json" \ + -d '{ + "start_date": "2025-01-16", + "end_date": "2025-01-20", + "models": ["gpt-4"], + "replace_existing": false + }' +``` + +Re-run existing dates (force replace): +```bash +curl -X POST http://localhost:8080/simulate/trigger \ + -H "Content-Type: application/json" \ + -d '{ + "start_date": "2025-01-16", + "end_date": "2025-01-20", + "models": ["gpt-4"], + "replace_existing": true + }' +``` + --- ### GET /simulate/status/{job_id} @@ -484,6 +537,15 @@ JOB_ID=$(echo $RESPONSE | jq -r '.job_id') echo "Job ID: $JOB_ID" ``` +Or use resume mode: +```bash +RESPONSE=$(curl -X POST http://localhost:8080/simulate/trigger \ + -H "Content-Type: application/json" \ + -d '{"start_date": null, "end_date": "2025-01-31", "models": ["gpt-4"]}') + +JOB_ID=$(echo $RESPONSE | jq -r '.job_id') +``` + 2. **Poll for completion:** ```bash while true; do @@ -507,9 +569,24 @@ curl "http://localhost:8080/results?job_id=$JOB_ID" | jq '.' Use a scheduler (cron, Airflow, etc.) to trigger simulations: +**Option 1: Resume mode (recommended)** ```bash #!/bin/bash -# daily_simulation.sh +# daily_simulation.sh - Resume from last completed date + +# Calculate today's date +TODAY=$(date +%Y-%m-%d) + +# Trigger simulation in resume mode +curl -X POST http://localhost:8080/simulate/trigger \ + -H "Content-Type: application/json" \ + -d "{\"start_date\": null, \"end_date\": \"$TODAY\", \"models\": [\"gpt-4\"]}" +``` + +**Option 2: Explicit yesterday's date** +```bash +#!/bin/bash +# daily_simulation.sh - Run specific date # Calculate yesterday's date DATE=$(date -d "yesterday" +%Y-%m-%d) @@ -517,7 +594,7 @@ DATE=$(date -d "yesterday" +%Y-%m-%d) # Trigger simulation curl -X POST http://localhost:8080/simulate/trigger \ -H "Content-Type: application/json" \ - -d "{\"start_date\": \"$DATE\", \"models\": [\"gpt-4\"]}" + -d "{\"start_date\": \"$DATE\", \"end_date\": \"$DATE\", \"models\": [\"gpt-4\"]}" ``` Add to crontab: @@ -676,11 +753,19 @@ class AITraderServerClient: def __init__(self, base_url="http://localhost:8080"): self.base_url = base_url - def trigger_simulation(self, start_date, end_date=None, models=None): - """Trigger a simulation job.""" - payload = {"start_date": start_date} - if end_date: - payload["end_date"] = end_date + def trigger_simulation(self, end_date, start_date=None, models=None, replace_existing=False): + """ + Trigger a simulation job. + + Args: + end_date: End date (YYYY-MM-DD), required + start_date: Start date (YYYY-MM-DD) or None for resume mode + models: List of model signatures or None for all enabled models + replace_existing: If False, skip already-completed dates (idempotent) + """ + payload = {"end_date": end_date, "replace_existing": replace_existing} + if start_date is not None: + payload["start_date"] = start_date if models: payload["models"] = models @@ -719,9 +804,19 @@ class AITraderServerClient: response.raise_for_status() return response.json() -# Usage +# Usage examples client = AITraderServerClient() -job = client.trigger_simulation("2025-01-16", models=["gpt-4"]) + +# Single day simulation +job = client.trigger_simulation(end_date="2025-01-16", start_date="2025-01-16", models=["gpt-4"]) + +# Date range simulation +job = client.trigger_simulation(end_date="2025-01-20", start_date="2025-01-16") + +# Resume mode (continue from last completed) +job = client.trigger_simulation(end_date="2025-01-31", models=["gpt-4"]) + +# Wait for completion and get results result = client.wait_for_completion(job["job_id"]) results = client.get_results(job_id=job["job_id"]) ``` @@ -733,13 +828,23 @@ class AITraderServerClient { constructor(private baseUrl: string = "http://localhost:8080") {} async triggerSimulation( - startDate: string, - endDate?: string, - models?: string[] + endDate: string, + options: { + startDate?: string | null; + models?: string[]; + replaceExisting?: boolean; + } = {} ) { - const body: any = { start_date: startDate }; - if (endDate) body.end_date = endDate; - if (models) body.models = models; + const body: any = { + end_date: endDate, + replace_existing: options.replaceExisting ?? false + }; + if (options.startDate !== undefined) { + body.start_date = options.startDate; + } + if (options.models) { + body.models = options.models; + } const response = await fetch(`${this.baseUrl}/simulate/trigger`, { method: "POST", @@ -787,9 +892,27 @@ class AITraderServerClient { } } -// Usage +// Usage examples const client = new AITraderServerClient(); -const job = await client.triggerSimulation("2025-01-16", null, ["gpt-4"]); -const result = await client.waitForCompletion(job.job_id); -const results = await client.getResults({ jobId: job.job_id }); + +// Single day simulation +const job1 = await client.triggerSimulation("2025-01-16", { + startDate: "2025-01-16", + models: ["gpt-4"] +}); + +// Date range simulation +const job2 = await client.triggerSimulation("2025-01-20", { + startDate: "2025-01-16" +}); + +// Resume mode (continue from last completed) +const job3 = await client.triggerSimulation("2025-01-31", { + startDate: null, + models: ["gpt-4"] +}); + +// Wait for completion and get results +const result = await client.waitForCompletion(job1.job_id); +const results = await client.getResults({ jobId: job1.job_id }); ``` diff --git a/CHANGELOG_NEW_API.md b/CHANGELOG_NEW_API.md new file mode 100644 index 0000000..ad05684 --- /dev/null +++ b/CHANGELOG_NEW_API.md @@ -0,0 +1,265 @@ +# API Schema Update - Resume Mode & Idempotent Behavior + +## Summary + +Updated the `/simulate/trigger` endpoint to support three new use cases: +1. **Resume mode**: Continue simulations from last completed date per model +2. **Idempotent behavior**: Skip already-completed dates by default +3. **Explicit date ranges**: Clearer API contract with required `end_date` + +## Breaking Changes + +### Request Schema + +**Before:** +```json +{ + "start_date": "2025-10-01", // Required + "end_date": "2025-10-02", // Optional (defaulted to start_date) + "models": ["gpt-5"] // Optional +} +``` + +**After:** +```json +{ + "start_date": "2025-10-01", // Optional (null for resume mode) + "end_date": "2025-10-02", // REQUIRED (cannot be null/empty) + "models": ["gpt-5"], // Optional + "replace_existing": false // NEW: Optional (default: false) +} +``` + +### Key Changes + +1. **`end_date` is now REQUIRED** + - Cannot be `null` or empty string + - Must always be provided + - For single-day simulation, set `start_date` == `end_date` + +2. **`start_date` is now OPTIONAL** + - Can be `null` or omitted to enable resume mode + - When `null`, each model resumes from its last completed date + - If no data exists (cold start), uses `end_date` as single-day simulation + +3. **NEW `replace_existing` field** + - `false` (default): Skip already-completed model-days (idempotent) + - `true`: Re-run all dates even if previously completed + +## Use Cases + +### 1. Explicit Date Range +```bash +curl -X POST http://localhost:8080/simulate/trigger \ + -H "Content-Type: application/json" \ + -d '{ + "start_date": "2025-10-01", + "end_date": "2025-10-31", + "models": ["gpt-5"] + }' +``` + +### 2. Single Date +```bash +curl -X POST http://localhost:8080/simulate/trigger \ + -H "Content-Type: application/json" \ + -d '{ + "start_date": "2025-10-15", + "end_date": "2025-10-15", + "models": ["gpt-5"] + }' +``` + +### 3. Resume Mode (NEW) +```bash +curl -X POST http://localhost:8080/simulate/trigger \ + -H "Content-Type: application/json" \ + -d '{ + "start_date": null, + "end_date": "2025-10-31", + "models": ["gpt-5"] + }' +``` + +**Behavior:** +- Model "gpt-5" last completed: `2025-10-15` +- Will simulate: `2025-10-16` through `2025-10-31` +- If no data exists: Will simulate only `2025-10-31` + +### 4. Idempotent Simulation (NEW) +```bash +curl -X POST http://localhost:8080/simulate/trigger \ + -H "Content-Type: application/json" \ + -d '{ + "start_date": "2025-10-01", + "end_date": "2025-10-31", + "models": ["gpt-5"], + "replace_existing": false + }' +``` + +**Behavior:** +- Checks database for already-completed dates +- Only simulates dates that haven't been completed yet +- Returns error if all dates already completed + +### 5. Force Replace +```bash +curl -X POST http://localhost:8080/simulate/trigger \ + -H "Content-Type: application/json" \ + -d '{ + "start_date": "2025-10-01", + "end_date": "2025-10-31", + "models": ["gpt-5"], + "replace_existing": true + }' +``` + +**Behavior:** +- Re-runs all dates regardless of completion status + +## Implementation Details + +### Files Modified + +1. **`api/main.py`** + - Updated `SimulateTriggerRequest` Pydantic model + - Added validators for `end_date` (required) + - Added validators for `start_date` (optional, can be null) + - Added resume logic per model + - Added idempotent filtering logic + - Fixed bug with `start_date=None` in price data checks + +2. **`api/job_manager.py`** + - Added `get_last_completed_date_for_model(model)` method + - Added `get_completed_model_dates(models, start_date, end_date)` method + - Updated `create_job()` to accept `model_day_filter` parameter + +3. **`tests/integration/test_api_endpoints.py`** + - Updated all tests to use new schema + - Added tests for resume mode + - Added tests for idempotent behavior + - Added tests for validation rules + +4. **Documentation Updated** + - `API_REFERENCE.md` - Complete API documentation with examples + - `QUICK_START.md` - Updated getting started examples + - `docs/user-guide/using-the-api.md` - Updated user guide + - Client library examples (Python, TypeScript) + +### Database Schema + +No changes to database schema. New functionality uses existing tables: +- `job_details` table tracks completion status per model-day +- Unique index on `(job_id, date, model)` ensures no duplicates + +### Per-Model Independence + +Each model maintains its own completion state: +``` +Model A: last_completed_date = 2025-10-15 +Model B: last_completed_date = 2025-10-10 + +Request: start_date=null, end_date=2025-10-31 + +Result: +- Model A simulates: 2025-10-16 through 2025-10-31 (16 days) +- Model B simulates: 2025-10-11 through 2025-10-31 (21 days) +``` + +## Migration Guide + +### For API Clients + +**Old Code:** +```python +# Single day (old) +client.trigger_simulation(start_date="2025-10-15") +``` + +**New Code:** +```python +# Single day (new) - MUST provide end_date +client.trigger_simulation(start_date="2025-10-15", end_date="2025-10-15") + +# Or use resume mode +client.trigger_simulation(start_date=None, end_date="2025-10-31") +``` + +### Validation Changes + +**Will Now Fail:** +```json +{ + "start_date": "2025-10-01", + "end_date": "" // ❌ Empty string rejected +} +``` + +```json +{ + "start_date": "2025-10-01", + "end_date": null // ❌ Null rejected +} +``` + +```json +{ + "start_date": "2025-10-01" // ❌ Missing end_date +} +``` + +**Will Work:** +```json +{ + "end_date": "2025-10-31" // ✓ start_date omitted = resume mode +} +``` + +```json +{ + "start_date": null, + "end_date": "2025-10-31" // ✓ Explicit null = resume mode +} +``` + +## Benefits + +1. **Daily Automation**: Resume mode perfect for cron jobs + - No need to calculate "yesterday's date" + - Just provide today as end_date + +2. **Idempotent by Default**: Safe to re-run + - Accidentally trigger same date? No problem, it's skipped + - Explicit `replace_existing=true` when you want to re-run + +3. **Per-Model Independence**: Flexible deployment + - Can add new models without re-running old ones + - Models can progress at different rates + +4. **Clear API Contract**: No ambiguity + - `end_date` always required + - `start_date=null` clearly means "resume" + - Default behavior is safe (idempotent) + +## Backward Compatibility + +⚠️ **This is a BREAKING CHANGE** for clients that: +- Rely on `end_date` defaulting to `start_date` +- Don't explicitly provide `end_date` + +**Migration:** Update all API calls to explicitly provide `end_date`. + +## Testing + +Run integration tests: +```bash +pytest tests/integration/test_api_endpoints.py -v +``` + +All tests updated to cover: +- Single-day simulation +- Date ranges +- Resume mode (cold start and with existing data) +- Idempotent behavior +- Validation rules diff --git a/QUICK_START.md b/QUICK_START.md index acd6030..300f32e 100644 --- a/QUICK_START.md +++ b/QUICK_START.md @@ -108,6 +108,7 @@ curl -X POST http://localhost:8080/simulate/trigger \ -H "Content-Type: application/json" \ -d '{ "start_date": "2025-01-16", + "end_date": "2025-01-16", "models": ["gpt-4"] }' ``` @@ -119,12 +120,14 @@ curl -X POST http://localhost:8080/simulate/trigger \ "job_id": "550e8400-e29b-41d4-a716-446655440000", "status": "pending", "total_model_days": 1, - "message": "Simulation job created with 1 trading dates" + "message": "Simulation job created with 1 model-day tasks" } ``` **Save the `job_id`** - you'll need it to check status. +**Note:** Both `start_date` and `end_date` are required. For a single day, set them to the same value. To simulate a range, use different dates (e.g., `"start_date": "2025-01-16", "end_date": "2025-01-20"`). + --- ## Step 6: Monitor Progress @@ -234,12 +237,32 @@ curl -X POST http://localhost:8080/simulate/trigger \ -H "Content-Type: application/json" \ -d '{ "start_date": "2025-01-16", + "end_date": "2025-01-16", "models": ["gpt-4", "claude-3.7-sonnet"] }' ``` **Note:** Models must be defined and enabled in `configs/default_config.json`. +### Resume from Last Completed Date + +Continue simulations from where you left off (useful for daily automation): + +```bash +curl -X POST http://localhost:8080/simulate/trigger \ + -H "Content-Type: application/json" \ + -d '{ + "start_date": null, + "end_date": "2025-01-31", + "models": ["gpt-4"] + }' +``` + +This will: +- Check the last completed date for each model +- Resume from the next day after the last completed date +- If no previous data exists, run only the `end_date` as a single day + ### Query Specific Results ```bash diff --git a/api/job_manager.py b/api/job_manager.py index 03d812a..8dee0fd 100644 --- a/api/job_manager.py +++ b/api/job_manager.py @@ -54,7 +54,8 @@ class JobManager: self, config_path: str, date_range: List[str], - models: List[str] + models: List[str], + model_day_filter: Optional[List[tuple]] = None ) -> str: """ Create new simulation job. @@ -63,6 +64,8 @@ class JobManager: config_path: Path to configuration file date_range: List of dates to simulate (YYYY-MM-DD) models: List of model signatures to execute + model_day_filter: Optional list of (model, date) tuples to limit job_details. + If None, creates job_details for all model-date combinations. Returns: job_id: UUID of created job @@ -95,9 +98,10 @@ class JobManager: created_at )) - # Create job_details for each model-day combination - for date in date_range: - for model in models: + # Create job_details based on filter + if model_day_filter is not None: + # Only create job_details for specified model-day pairs + for model, date in model_day_filter: cursor.execute(""" INSERT INTO job_details ( job_id, date, model, status @@ -105,8 +109,21 @@ class JobManager: VALUES (?, ?, ?, ?) """, (job_id, date, model, "pending")) + logger.info(f"Created job {job_id} with {len(model_day_filter)} model-day tasks (filtered)") + else: + # Create job_details for all model-day combinations + for date in date_range: + for model in models: + cursor.execute(""" + INSERT INTO job_details ( + job_id, date, model, status + ) + VALUES (?, ?, ?, ?) + """, (job_id, date, model, "pending")) + + logger.info(f"Created job {job_id} with {len(date_range)} dates and {len(models)} models") + conn.commit() - logger.info(f"Created job {job_id} with {len(date_range)} dates and {len(models)} models") return job_id @@ -585,6 +602,67 @@ class JobManager: finally: conn.close() + def get_last_completed_date_for_model(self, model: str) -> Optional[str]: + """ + Get last completed simulation date for a specific model. + + Args: + model: Model signature + + Returns: + Last completed date (YYYY-MM-DD) or None if no data exists + """ + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + try: + cursor.execute(""" + SELECT date + FROM job_details + WHERE model = ? AND status = 'completed' + ORDER BY date DESC + LIMIT 1 + """, (model,)) + + row = cursor.fetchone() + return row[0] if row else None + + finally: + conn.close() + + def get_completed_model_dates(self, models: List[str], start_date: str, end_date: str) -> Dict[str, List[str]]: + """ + Get all completed dates for each model within a date range. + + Args: + models: List of model signatures + start_date: Start date (YYYY-MM-DD) + end_date: End date (YYYY-MM-DD) + + Returns: + Dict mapping model signature to list of completed dates + """ + conn = get_db_connection(self.db_path) + cursor = conn.cursor() + + try: + result = {model: [] for model in models} + + for model in models: + cursor.execute(""" + SELECT DISTINCT date + FROM job_details + WHERE model = ? AND status = 'completed' AND date >= ? AND date <= ? + ORDER BY date + """, (model, start_date, end_date)) + + result[model] = [row[0] for row in cursor.fetchall()] + + return result + + finally: + conn.close() + def cleanup_old_jobs(self, days: int = 30) -> Dict[str, int]: """ Delete jobs older than threshold. diff --git a/api/main.py b/api/main.py index 6d05be8..5c466cf 100644 --- a/api/main.py +++ b/api/main.py @@ -33,28 +33,36 @@ logger = logging.getLogger(__name__) # Pydantic models for request/response validation class SimulateTriggerRequest(BaseModel): """Request body for POST /simulate/trigger.""" - start_date: str = Field(..., description="Start date for simulation (YYYY-MM-DD)") - end_date: Optional[str] = Field(None, description="End date for simulation (YYYY-MM-DD). If not provided, simulates single day.") + start_date: Optional[str] = Field(None, description="Start date for simulation (YYYY-MM-DD). If null/omitted, resumes from last completed date per model.") + end_date: str = Field(..., description="End date for simulation (YYYY-MM-DD). Required.") models: Optional[List[str]] = Field( None, description="Optional: List of model signatures to simulate. If not provided, uses enabled models from config." ) + replace_existing: bool = Field( + False, + description="If true, replaces existing simulation data. If false (default), skips dates that already have data (idempotent)." + ) @field_validator("start_date", "end_date") @classmethod def validate_date_format(cls, v): """Validate date format.""" - if v is None: - return v + if v is None or v == "": + return None try: datetime.strptime(v, "%Y-%m-%d") except ValueError: raise ValueError(f"Invalid date format: {v}. Expected YYYY-MM-DD") return v - def get_end_date(self) -> str: - """Get end date, defaulting to start_date if not provided.""" - return self.end_date or self.start_date + @field_validator("end_date") + @classmethod + def validate_end_date_required(cls, v): + """Ensure end_date is not null or empty.""" + if v is None or v == "": + raise ValueError("end_date is required and cannot be null or empty") + return v class SimulateTriggerResponse(BaseModel): @@ -136,6 +144,12 @@ def create_app( Validates date range, downloads missing price data if needed, and creates job with available trading dates. + Supports: + - Single date: start_date == end_date + - Date range: start_date < end_date + - Resume: start_date is null (each model resumes from its last completed date) + - Idempotent: replace_existing=false skips already completed model-days + Raises: HTTPException 400: Validation errors, running job, or invalid dates HTTPException 503: Price data download failed @@ -151,12 +165,7 @@ def create_app( detail=f"Server configuration file not found: {config_path}" ) - # Get end date (defaults to start_date for single day) - end_date = request.get_end_date() - - # Validate date range - max_days = get_max_simulation_days() - validate_date_range(request.start_date, end_date, max_days=max_days) + end_date = request.end_date # Determine which models to run import json @@ -180,13 +189,44 @@ def create_app( detail="No enabled models found in config. Either enable models in config or specify them in request." ) + job_manager = JobManager(db_path=app.state.db_path) + + # Handle resume logic (start_date is null) + if request.start_date is None: + # Resume mode: determine start date per model + model_start_dates = {} + + for model in models_to_run: + last_date = job_manager.get_last_completed_date_for_model(model) + + if last_date is None: + # Cold start: use end_date as single-day simulation + model_start_dates[model] = end_date + else: + # Resume from next day after last completed + last_dt = datetime.strptime(last_date, "%Y-%m-%d") + next_dt = last_dt + timedelta(days=1) + model_start_dates[model] = next_dt.strftime("%Y-%m-%d") + + # For validation purposes, use earliest start date + earliest_start = min(model_start_dates.values()) + start_date = earliest_start + else: + # Explicit start date provided + start_date = request.start_date + model_start_dates = {model: start_date for model in models_to_run} + + # Validate date range + max_days = get_max_simulation_days() + validate_date_range(start_date, end_date, max_days=max_days) + # Check price data and download if needed auto_download = os.getenv("AUTO_DOWNLOAD_PRICE_DATA", "true").lower() == "true" price_manager = PriceDataManager(db_path=app.state.db_path) - # Check what's missing + # Check what's missing (use computed start_date, not request.start_date which may be None) missing_coverage = price_manager.get_missing_coverage( - request.start_date, + start_date, end_date ) @@ -203,7 +243,7 @@ def create_app( logger.info(f"Downloading missing price data for {len(missing_coverage)} symbols") - requested_dates = set(expand_date_range(request.start_date, end_date)) + requested_dates = set(expand_date_range(start_date, end_date)) download_result = price_manager.download_missing_data_prioritized( missing_coverage, @@ -229,7 +269,7 @@ def create_app( # Get available trading dates (after potential download) available_dates = price_manager.get_available_trading_dates( - request.start_date, + start_date, end_date ) @@ -237,11 +277,54 @@ def create_app( raise HTTPException( status_code=400, detail=f"No trading dates with complete price data in range " - f"{request.start_date} to {end_date}. " + f"{start_date} to {end_date}. " f"All symbols must have data for a date to be tradeable." ) - job_manager = JobManager(db_path=app.state.db_path) + # Handle idempotent behavior (skip already completed model-days) + if not request.replace_existing: + # Get existing completed dates per model + completed_dates = job_manager.get_completed_model_dates( + models_to_run, + start_date, + end_date + ) + + # Build list of model-day tuples to simulate + model_day_tasks = [] + for model in models_to_run: + # Filter dates for this model + model_start = model_start_dates[model] + + for date in available_dates: + # Skip if before model's start date + if date < model_start: + continue + + # Skip if already completed (idempotent) + if date in completed_dates.get(model, []): + continue + + model_day_tasks.append((model, date)) + + if not model_day_tasks: + raise HTTPException( + status_code=400, + detail="No new model-days to simulate. All requested dates are already completed. " + "Use replace_existing=true to re-run." + ) + + # Extract unique dates that will actually be run + dates_to_run = sorted(list(set([date for _, date in model_day_tasks]))) + else: + # Replace mode: run all model-date combinations + dates_to_run = available_dates + model_day_tasks = [ + (model, date) + for model in models_to_run + for date in available_dates + if date >= model_start_dates[model] + ] # Check if can start new job if not job_manager.can_start_new_job(): @@ -250,11 +333,13 @@ def create_app( detail="Another simulation job is already running or pending. Please wait for it to complete." ) - # Create job with available dates + # Create job with dates that will be run + # Pass model_day_tasks to only create job_details for tasks that will actually run job_id = job_manager.create_job( config_path=config_path, - date_range=available_dates, - models=models_to_run + date_range=dates_to_run, + models=models_to_run, + model_day_filter=model_day_tasks ) # Start worker in background thread (only if not in test mode) @@ -266,12 +351,26 @@ def create_app( thread = threading.Thread(target=run_worker, daemon=True) thread.start() - logger.info(f"Triggered simulation job {job_id} with {len(available_dates)} dates") + logger.info(f"Triggered simulation job {job_id} with {len(model_day_tasks)} model-day tasks") # Build response message - message = f"Simulation job created with {len(available_dates)} trading dates" + total_model_days = len(model_day_tasks) + message_parts = [f"Simulation job created with {total_model_days} model-day tasks"] + + if request.start_date is None: + message_parts.append("(resume mode)") + + if not request.replace_existing: + # Calculate how many were skipped + total_possible = len(models_to_run) * len(available_dates) + skipped = total_possible - total_model_days + if skipped > 0: + message_parts.append(f"({skipped} already completed, skipped)") + if download_info and download_info["rate_limited"]: - message += " (rate limit reached - partial data)" + message_parts.append("(rate limit reached - partial data)") + + message = " ".join(message_parts) # Get deployment mode info deployment_info = get_deployment_mode_dict() @@ -279,7 +378,7 @@ def create_app( response = SimulateTriggerResponse( job_id=job_id, status="pending", - total_model_days=len(available_dates) * len(models_to_run), + total_model_days=total_model_days, message=message, **deployment_info ) diff --git a/docs/user-guide/using-the-api.md b/docs/user-guide/using-the-api.md index b9d5010..f3e4d2f 100644 --- a/docs/user-guide/using-the-api.md +++ b/docs/user-guide/using-the-api.md @@ -49,11 +49,12 @@ curl "http://localhost:8080/results?job_id=$JOB_ID" | jq '.' ### Single-Day Simulation -Omit `end_date` to simulate just one day: +Set `start_date` and `end_date` to the same value: ```bash curl -X POST http://localhost:8080/simulate/trigger \ - -d '{"start_date": "2025-01-16", "models": ["gpt-4"]}' + -H "Content-Type: application/json" \ + -d '{"start_date": "2025-01-16", "end_date": "2025-01-16", "models": ["gpt-4"]}' ``` ### All Enabled Models @@ -62,9 +63,22 @@ Omit `models` to run all enabled models from config: ```bash curl -X POST http://localhost:8080/simulate/trigger \ + -H "Content-Type: application/json" \ -d '{"start_date": "2025-01-16", "end_date": "2025-01-20"}' ``` +### Resume from Last Completed + +Use `"start_date": null` to continue from where you left off: + +```bash +curl -X POST http://localhost:8080/simulate/trigger \ + -H "Content-Type: application/json" \ + -d '{"start_date": null, "end_date": "2025-01-31", "models": ["gpt-4"]}' +``` + +Each model will resume from its own last completed date. If no data exists, runs only `end_date` as a single day. + ### Filter Results ```bash diff --git a/tests/integration/test_api_endpoints.py b/tests/integration/test_api_endpoints.py index 19c5e30..a925eab 100644 --- a/tests/integration/test_api_endpoints.py +++ b/tests/integration/test_api_endpoints.py @@ -50,8 +50,8 @@ class TestSimulateTriggerEndpoint: def test_trigger_creates_job(self, api_client): """Should create job and return job_id.""" response = api_client.post("/simulate/trigger", json={ - "config_path": api_client.test_config_path, - "date_range": ["2025-01-16", "2025-01-17"], + "start_date": "2025-01-16", + "end_date": "2025-01-17", "models": ["gpt-4"] }) @@ -61,56 +61,107 @@ class TestSimulateTriggerEndpoint: assert data["status"] == "pending" assert data["total_model_days"] == 2 - def test_trigger_validates_config_path(self, api_client): - """Should reject nonexistent config path.""" + def test_trigger_single_date(self, api_client): + """Should create job for single date.""" response = api_client.post("/simulate/trigger", json={ - "config_path": "/nonexistent/config.json", - "date_range": ["2025-01-16"], + "start_date": "2025-01-16", + "end_date": "2025-01-16", "models": ["gpt-4"] }) - assert response.status_code == 400 - assert "does not exist" in response.json()["detail"].lower() + assert response.status_code == 200 + data = response.json() + assert data["total_model_days"] == 1 - def test_trigger_validates_date_range(self, api_client): - """Should reject invalid date range.""" + def test_trigger_resume_mode_cold_start(self, api_client): + """Should use end_date as single day when no existing data (cold start).""" response = api_client.post("/simulate/trigger", json={ - "config_path": api_client.test_config_path, - "date_range": [], # Empty date range + "start_date": None, + "end_date": "2025-01-16", "models": ["gpt-4"] }) - assert response.status_code == 422 # Pydantic validation error + assert response.status_code == 200 + data = response.json() + assert data["total_model_days"] == 1 + assert "resume mode" in data["message"] + + def test_trigger_requires_end_date(self, api_client): + """Should reject request with missing end_date.""" + response = api_client.post("/simulate/trigger", json={ + "start_date": "2025-01-16", + "end_date": "", + "models": ["gpt-4"] + }) + + assert response.status_code == 422 + assert "end_date" in str(response.json()["detail"]).lower() + + def test_trigger_rejects_null_end_date(self, api_client): + """Should reject request with null end_date.""" + response = api_client.post("/simulate/trigger", json={ + "start_date": "2025-01-16", + "end_date": None, + "models": ["gpt-4"] + }) + + assert response.status_code == 422 def test_trigger_validates_models(self, api_client): - """Should reject empty model list.""" + """Should use enabled models from config when models not specified.""" response = api_client.post("/simulate/trigger", json={ - "config_path": api_client.test_config_path, - "date_range": ["2025-01-16"], - "models": [] # Empty models + "start_date": "2025-01-16", + "end_date": "2025-01-16" + # models not specified - should use enabled models from config }) - assert response.status_code == 422 # Pydantic validation error + assert response.status_code == 200 + data = response.json() + assert data["total_model_days"] >= 1 def test_trigger_enforces_single_job_limit(self, api_client): """Should reject trigger when job already running.""" # Create first job api_client.post("/simulate/trigger", json={ - "config_path": api_client.test_config_path, - "date_range": ["2025-01-16"], + "start_date": "2025-01-16", + "end_date": "2025-01-16", "models": ["gpt-4"] }) # Try to create second job response = api_client.post("/simulate/trigger", json={ - "config_path": api_client.test_config_path, - "date_range": ["2025-01-17"], + "start_date": "2025-01-17", + "end_date": "2025-01-17", "models": ["gpt-4"] }) assert response.status_code == 400 assert "already running" in response.json()["detail"].lower() + def test_trigger_idempotent_behavior(self, api_client): + """Should skip already completed dates when replace_existing=false.""" + # This test would need a completed job first + # For now, just verify the parameter is accepted + response = api_client.post("/simulate/trigger", json={ + "start_date": "2025-01-16", + "end_date": "2025-01-16", + "models": ["gpt-4"], + "replace_existing": False + }) + + assert response.status_code == 200 + + def test_trigger_replace_existing_flag(self, api_client): + """Should accept replace_existing flag.""" + response = api_client.post("/simulate/trigger", json={ + "start_date": "2025-01-16", + "end_date": "2025-01-16", + "models": ["gpt-4"], + "replace_existing": True + }) + + assert response.status_code == 200 + @pytest.mark.integration class TestSimulateStatusEndpoint: @@ -120,8 +171,8 @@ class TestSimulateStatusEndpoint: """Should return job status and progress.""" # Create job create_response = api_client.post("/simulate/trigger", json={ - "config_path": api_client.test_config_path, - "date_range": ["2025-01-16"], + "start_date": "2025-01-16", + "end_date": "2025-01-16", "models": ["gpt-4"] }) job_id = create_response.json()["job_id"] @@ -147,8 +198,8 @@ class TestSimulateStatusEndpoint: """Should include model-day execution details.""" # Create job create_response = api_client.post("/simulate/trigger", json={ - "config_path": api_client.test_config_path, - "date_range": ["2025-01-16", "2025-01-17"], + "start_date": "2025-01-16", + "end_date": "2025-01-17", "models": ["gpt-4"] }) job_id = create_response.json()["job_id"] @@ -182,8 +233,8 @@ class TestResultsEndpoint: """Should filter results by job_id.""" # Create job create_response = api_client.post("/simulate/trigger", json={ - "config_path": api_client.test_config_path, - "date_range": ["2025-01-16"], + "start_date": "2025-01-16", + "end_date": "2025-01-16", "models": ["gpt-4"] }) job_id = create_response.json()["job_id"] @@ -279,8 +330,8 @@ class TestErrorHandling: def test_missing_required_fields_returns_422(self, api_client): """Should validate required fields.""" response = api_client.post("/simulate/trigger", json={ - "config_path": api_client.test_config_path - # Missing date_range and models + "start_date": "2025-01-16" + # Missing end_date }) assert response.status_code == 422