diff --git a/.env.example b/.env.example index 9b1af33..59a6d33 100644 --- a/.env.example +++ b/.env.example @@ -24,6 +24,16 @@ API_PORT=8080 # Agent Configuration AGENT_MAX_STEP=30 +# Simulation Configuration +# Maximum number of days allowed in a single simulation range +# Prevents accidentally requesting very large date ranges +MAX_SIMULATION_DAYS=30 + +# Price Data Configuration +# Automatically download missing price data from Alpha Vantage when needed +# If disabled, all price data must be pre-populated in the database +AUTO_DOWNLOAD_PRICE_DATA=true + # Data Volume Configuration # Base directory for all persistent data (will contain data/, logs/, configs/ subdirectories) # Use relative paths (./volumes) or absolute paths (/home/user/ai-trader-volumes) diff --git a/CHANGELOG.md b/CHANGELOG.md index 77d0513..a007345 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,27 +7,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -### Changed -- **Simplified API Interface** - Config path is now a server-side detail - - Removed `config_path` parameter from POST /simulate/trigger - - Server uses internal default config (configs/default_config.json) - - Simplifies API calls - only need to specify date_range -- **Model Selection** - `enabled` field in config now controls which models run - - API `models` parameter is now optional - - If not provided, uses models where `enabled: true` in config - - If provided, explicitly overrides config (for manual testing) - - Prevents accidental execution of all models - -### Removed -- **Web UI Port** - Removed unused web dashboard port configuration - - Removed port 8888 from docker-compose.yml (not implemented) - - Removed WEB_HTTP_PORT from .env.example - - Removed port 8888 from Dockerfile EXPOSE - - Web UI static files exist in docs/ but are not served in API mode - - Only port 8080 (REST API) is now exposed - ## [0.3.0] - 2025-10-31 +### Added - Price Data Management & On-Demand Downloads +- **SQLite Price Data Storage** - Replaced JSONL files with relational database + - `price_data` table for OHLCV data (replaces merged.jsonl) + - `price_data_coverage` table for tracking downloaded date ranges + - `simulation_runs` table for soft-delete position tracking + - Comprehensive indexes for query performance +- **On-Demand Price Data Downloads** - Automatic gap filling via Alpha Vantage + - Priority-based download strategy (maximize date completion) + - Graceful rate limit handling (no pre-configured limits needed) + - Smart coverage gap detection + - Configurable via `AUTO_DOWNLOAD_PRICE_DATA` (default: true) +- **Date Range API** - Simplified date specification + - Single date: `{"start_date": "2025-01-20"}` + - Date range: `{"start_date": "2025-01-20", "end_date": "2025-01-24"}` + - Automatic validation (chronological order, max range, not future) + - Configurable max days via `MAX_SIMULATION_DAYS` (default: 30) +- **Migration Tooling** - Script to import existing merged.jsonl data + - `scripts/migrate_price_data.py` for one-time data migration + - Automatic coverage tracking during migration + ### Added - API Service Transformation - **REST API Service** - Complete FastAPI implementation for external orchestration - `POST /simulate/trigger` - Trigger simulation jobs with config, date range, and models @@ -74,15 +75,35 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - **Architecture** - Transformed from batch-only to API-first service with database persistence - **Data Storage** - Migrated from JSONL files to SQLite relational database + - Price data now stored in `price_data` table instead of `merged.jsonl` + - Tools/price_tools.py updated to query database + - Position data remains in database (already migrated in earlier versions) - **Deployment** - Simplified to single API-only Docker service +- **API Request Format** - Date range specification changed + - Old: `{"date_range": ["2025-01-20", "2025-01-21", ...]}` + - New: `{"start_date": "2025-01-20", "end_date": "2025-01-24"}` + - `end_date` is optional (defaults to `start_date` for single day simulation) + - Server automatically expands range and validates trading days - **Configuration** - Simplified environment variable configuration - - Added configurable API_PORT for host port mapping (default: 8080, customizable for port conflicts) - - Removed `RUNTIME_ENV_PATH` (API dynamically manages runtime configs via RuntimeConfigManager) - - Removed MCP service port configuration (MATH_HTTP_PORT, SEARCH_HTTP_PORT, TRADE_HTTP_PORT, GETPRICE_HTTP_PORT) + - **Added:** `AUTO_DOWNLOAD_PRICE_DATA` (default: true) - Enable on-demand downloads + - **Added:** `MAX_SIMULATION_DAYS` (default: 30) - Maximum date range size + - **Added:** `API_PORT` for host port mapping (default: 8080, customizable for port conflicts) + - **Removed:** `RUNTIME_ENV_PATH` (API dynamically manages runtime configs) + - **Removed:** MCP service ports (MATH_HTTP_PORT, SEARCH_HTTP_PORT, TRADE_HTTP_PORT, GETPRICE_HTTP_PORT) + - **Removed:** `WEB_HTTP_PORT` (web UI not implemented) - MCP services use fixed internal ports (8000-8003) and are no longer exposed to host - - Container always uses port 8080 internally for API (hardcoded in entrypoint.sh) - - Only API port (8080) and web dashboard (8888) are exposed to host + - Container always uses port 8080 internally for API + - Only API port (8080) is exposed to host - Reduces configuration complexity and attack surface +- **Model Selection** - `enabled` field in config now controls which models run + - API `models` parameter is now optional + - If not provided, uses models where `enabled: true` in config + - If provided, explicitly overrides config (for manual testing) + - Prevents accidental execution of all models +- **API Interface** - Config path is now server-side detail + - Removed `config_path` parameter from POST /simulate/trigger + - Server uses internal default config (configs/default_config.json) + - Simplifies API calls - **Requirements** - Added fastapi>=0.120.0, uvicorn[standard]>=0.27.0, pydantic>=2.0.0 - **Docker Compose** - Single service (ai-trader) instead of dual-mode - **Dockerfile** - Added system dependencies (curl, procps) and port 8080 exposure @@ -120,6 +141,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Simplifies deployment and eliminates dual-mode complexity - Focus on API-first architecture for external orchestration - Migration: Use POST /simulate/trigger endpoint instead of batch execution +- **API Request Format Changed** - Date specification now uses start_date/end_date + - Old format: `{"date_range": ["2025-01-20", "2025-01-21"], "models": [...]}` + - New format: `{"start_date": "2025-01-20", "end_date": "2025-01-21"}` + - Models parameter is optional (uses enabled models from config) + - Config_path parameter removed (server-side detail) +- **Data Storage Format Changed** - Price data moved from JSONL to SQLite + - Run `python scripts/migrate_price_data.py` to migrate existing data + - `merged.jsonl` no longer used (replaced by `price_data` table) + - Automatic on-demand downloads eliminate need for manual data fetching +- **Configuration Variables Changed** + - Added: `AUTO_DOWNLOAD_PRICE_DATA`, `MAX_SIMULATION_DAYS` + - Removed: `RUNTIME_ENV_PATH`, MCP port configs, `WEB_HTTP_PORT` ## [0.2.0] - 2025-10-31 diff --git a/tools/price_tools.py b/tools/price_tools.py index f70ddda..4aeec81 100644 --- a/tools/price_tools.py +++ b/tools/price_tools.py @@ -12,6 +12,7 @@ project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) if project_root not in sys.path: sys.path.insert(0, project_root) from tools.general_tools import get_config_value +from api.database import get_db_connection all_nasdaq_100_symbols = [ "NVDA", "MSFT", "AAPL", "GOOG", "GOOGL", "AMZN", "META", "AVGO", "TSLA", @@ -47,143 +48,95 @@ def get_yesterday_date(today_date: str) -> str: yesterday_date = yesterday_dt.strftime("%Y-%m-%d") return yesterday_date -def get_open_prices(today_date: str, symbols: List[str], merged_path: Optional[str] = None) -> Dict[str, Optional[float]]: - """从 data/merged.jsonl 中读取指定日期与标的的开盘价。 +def get_open_prices(today_date: str, symbols: List[str], merged_path: Optional[str] = None, db_path: str = "data/jobs.db") -> Dict[str, Optional[float]]: + """从 price_data 数据库表中读取指定日期与标的的开盘价。 Args: today_date: 日期字符串,格式 YYYY-MM-DD。 symbols: 需要查询的股票代码列表。 - merged_path: 可选,自定义 merged.jsonl 路径;默认读取项目根目录下 data/merged.jsonl。 + merged_path: 已废弃,保留用于向后兼容。 + db_path: 数据库路径,默认 data/jobs.db。 Returns: {symbol_price: open_price 或 None} 的字典;若未找到对应日期或标的,则值为 None。 """ - wanted = set(symbols) results: Dict[str, Optional[float]] = {} - if merged_path is None: - base_dir = Path(__file__).resolve().parents[1] - merged_file = base_dir / "data" / "merged.jsonl" - else: - merged_file = Path(merged_path) + try: + conn = get_db_connection(db_path) + cursor = conn.cursor() - if not merged_file.exists(): - return results + # Query all requested symbols for the date + placeholders = ','.join('?' * len(symbols)) + query = f""" + SELECT symbol, open + FROM price_data + WHERE date = ? AND symbol IN ({placeholders}) + """ - with merged_file.open("r", encoding="utf-8") as f: - for line in f: - if not line.strip(): - continue - try: - doc = json.loads(line) - except Exception: - continue - meta = doc.get("Meta Data", {}) if isinstance(doc, dict) else {} - sym = meta.get("2. Symbol") - if sym not in wanted: - continue - series = doc.get("Time Series (Daily)", {}) - if not isinstance(series, dict): - continue - bar = series.get(today_date) - if isinstance(bar, dict): - open_val = bar.get("1. buy price") - try: - results[f'{sym}_price'] = float(open_val) if open_val is not None else None - except Exception: - results[f'{sym}_price'] = None + params = [today_date] + list(symbols) + cursor.execute(query, params) + + # Build results dict + for row in cursor.fetchall(): + symbol = row[0] + open_price = row[1] + results[f'{symbol}_price'] = float(open_price) if open_price is not None else None + + conn.close() + + except Exception as e: + # Log error but return empty results to maintain compatibility + print(f"Error querying price data: {e}") return results -def get_yesterday_open_and_close_price(today_date: str, symbols: List[str], merged_path: Optional[str] = None) -> Tuple[Dict[str, Optional[float]], Dict[str, Optional[float]]]: - """从 data/merged.jsonl 中读取指定日期与股票的昨日买入价和卖出价。 +def get_yesterday_open_and_close_price(today_date: str, symbols: List[str], merged_path: Optional[str] = None, db_path: str = "data/jobs.db") -> Tuple[Dict[str, Optional[float]], Dict[str, Optional[float]]]: + """从 price_data 数据库表中读取指定日期与股票的昨日买入价和卖出价。 Args: today_date: 日期字符串,格式 YYYY-MM-DD,代表今天日期。 symbols: 需要查询的股票代码列表。 - merged_path: 可选,自定义 merged.jsonl 路径;默认读取项目根目录下 data/merged.jsonl。 + merged_path: 已废弃,保留用于向后兼容。 + db_path: 数据库路径,默认 data/jobs.db。 Returns: (买入价字典, 卖出价字典) 的元组;若未找到对应日期或标的,则值为 None。 """ - wanted = set(symbols) buy_results: Dict[str, Optional[float]] = {} sell_results: Dict[str, Optional[float]] = {} - if merged_path is None: - base_dir = Path(__file__).resolve().parents[1] - merged_file = base_dir / "data" / "merged.jsonl" - else: - merged_file = Path(merged_path) - - if not merged_file.exists(): - return buy_results, sell_results - yesterday_date = get_yesterday_date(today_date) - with merged_file.open("r", encoding="utf-8") as f: - for line in f: - if not line.strip(): - continue - try: - doc = json.loads(line) - except Exception: - continue - meta = doc.get("Meta Data", {}) if isinstance(doc, dict) else {} - sym = meta.get("2. Symbol") - if sym not in wanted: - continue - series = doc.get("Time Series (Daily)", {}) - if not isinstance(series, dict): - continue - - # 尝试获取昨日买入价和卖出价 - bar = series.get(yesterday_date) - if isinstance(bar, dict): - buy_val = bar.get("1. buy price") # 买入价字段 - sell_val = bar.get("4. sell price") # 卖出价字段 - - try: - buy_price = float(buy_val) if buy_val is not None else None - sell_price = float(sell_val) if sell_val is not None else None - buy_results[f'{sym}_price'] = buy_price - sell_results[f'{sym}_price'] = sell_price - except Exception: - buy_results[f'{sym}_price'] = None - sell_results[f'{sym}_price'] = None - else: - # 如果昨日没有数据,尝试向前查找最近的交易日 - today_dt = datetime.strptime(today_date, "%Y-%m-%d") - yesterday_dt = today_dt - timedelta(days=1) - current_date = yesterday_dt - found_data = False - - # 最多向前查找5个交易日 - for _ in range(5): - current_date -= timedelta(days=1) - # 跳过周末 - while current_date.weekday() >= 5: - current_date -= timedelta(days=1) - - check_date = current_date.strftime("%Y-%m-%d") - bar = series.get(check_date) - if isinstance(bar, dict): - buy_val = bar.get("1. buy price") - sell_val = bar.get("4. sell price") - - try: - buy_price = float(buy_val) if buy_val is not None else None - sell_price = float(sell_val) if sell_val is not None else None - buy_results[f'{sym}_price'] = buy_price - sell_results[f'{sym}_price'] = sell_price - found_data = True - break - except Exception: - continue - - if not found_data: - buy_results[f'{sym}_price'] = None - sell_results[f'{sym}_price'] = None + try: + conn = get_db_connection(db_path) + cursor = conn.cursor() + + # Query all requested symbols for yesterday's date + placeholders = ','.join('?' * len(symbols)) + query = f""" + SELECT symbol, open, close + FROM price_data + WHERE date = ? AND symbol IN ({placeholders}) + """ + + params = [yesterday_date] + list(symbols) + cursor.execute(query, params) + + # Build results dicts + for row in cursor.fetchall(): + symbol = row[0] + open_price = row[1] # Buy price (open) + close_price = row[2] # Sell price (close) + + buy_results[f'{symbol}_price'] = float(open_price) if open_price is not None else None + sell_results[f'{symbol}_price'] = float(close_price) if close_price is not None else None + + conn.close() + + except Exception as e: + # Log error but return empty results to maintain compatibility + print(f"Error querying price data: {e}") return buy_results, sell_results