mirror of
https://github.com/Xe138/AI-Trader.git
synced 2026-04-02 01:27:24 -04:00
Changed merge_jsonl.py to use os.getcwd() instead of os.path.dirname(__file__) to ensure merged.jsonl is written to the working directory where data files exist, not to the script's installation directory. Root cause: - Dockerfile copies scripts to /app/scripts/ for volume compatibility - entrypoint.sh runs: cd /app/data && python /app/scripts/merge_jsonl.py - Old logic used script directory (/app/scripts/), ignoring working directory - This caused merged.jsonl to be created in /app/scripts/ instead of /app/data/ - Since /app/data/ is volume-mounted, merged file was not visible to host Solution: - Scripts now respect current working directory (Unix philosophy) - Works correctly with volume mounts and script relocation - Tested in both local and Docker directory structure scenarios Fixes the issue where merged.jsonl was missing from mounted data volume.
69 lines
3.2 KiB
Python
69 lines
3.2 KiB
Python
import json
|
||
import os
|
||
import glob
|
||
|
||
|
||
all_nasdaq_100_symbols = [
|
||
"NVDA", "MSFT", "AAPL", "GOOG", "GOOGL", "AMZN", "META", "AVGO", "TSLA",
|
||
"NFLX", "PLTR", "COST", "ASML", "AMD", "CSCO", "AZN", "TMUS", "MU", "LIN",
|
||
"PEP", "SHOP", "APP", "INTU", "AMAT", "LRCX", "PDD", "QCOM", "ARM", "INTC",
|
||
"BKNG", "AMGN", "TXN", "ISRG", "GILD", "KLAC", "PANW", "ADBE", "HON",
|
||
"CRWD", "CEG", "ADI", "ADP", "DASH", "CMCSA", "VRTX", "MELI", "SBUX",
|
||
"CDNS", "ORLY", "SNPS", "MSTR", "MDLZ", "ABNB", "MRVL", "CTAS", "TRI",
|
||
"MAR", "MNST", "CSX", "ADSK", "PYPL", "FTNT", "AEP", "WDAY", "REGN", "ROP",
|
||
"NXPI", "DDOG", "AXON", "ROST", "IDXX", "EA", "PCAR", "FAST", "EXC", "TTWO",
|
||
"XEL", "ZS", "PAYX", "WBD", "BKR", "CPRT", "CCEP", "FANG", "TEAM", "CHTR",
|
||
"KDP", "MCHP", "GEHC", "VRSK", "CTSH", "CSGP", "KHC", "ODFL", "DXCM", "TTD",
|
||
"ON", "BIIB", "LULU", "CDW", "GFS"
|
||
]
|
||
|
||
# 合并所有以 daily_price 开头的 json,逐文件一行写入 merged.jsonl
|
||
# Use current working directory instead of script directory for volume compatibility
|
||
current_dir = os.getcwd()
|
||
pattern = os.path.join(current_dir, 'daily_price*.json')
|
||
files = sorted(glob.glob(pattern))
|
||
|
||
output_file = os.path.join(current_dir, 'merged.jsonl')
|
||
|
||
with open(output_file, 'w', encoding='utf-8') as fout:
|
||
for fp in files:
|
||
basename = os.path.basename(fp)
|
||
# 仅当文件名包含任一纳指100成分符号时才写入
|
||
if not any(symbol in basename for symbol in all_nasdaq_100_symbols):
|
||
continue
|
||
with open(fp, 'r', encoding='utf-8') as f:
|
||
data = json.load(f)
|
||
# 统一重命名:"1. open" -> "1. buy price";"4. close" -> "4. sell price"
|
||
# 对于最新的一天,只保留并写入 "1. buy price"
|
||
try:
|
||
# 查找所有以 "Time Series" 开头的键
|
||
series = None
|
||
for key, value in data.items():
|
||
if key.startswith("Time Series"):
|
||
series = value
|
||
break
|
||
if isinstance(series, dict) and series:
|
||
# 先对所有日期做键名重命名
|
||
for d, bar in list(series.items()):
|
||
if not isinstance(bar, dict):
|
||
continue
|
||
if "1. open" in bar:
|
||
bar["1. buy price"] = bar.pop("1. open")
|
||
if "4. close" in bar:
|
||
bar["4. sell price"] = bar.pop("4. close")
|
||
# 再处理最新日期,仅保留买入价
|
||
latest_date = max(series.keys())
|
||
latest_bar = series.get(latest_date, {})
|
||
if isinstance(latest_bar, dict):
|
||
buy_val = latest_bar.get("1. buy price")
|
||
series[latest_date] = {"1. buy price": buy_val} if buy_val is not None else {}
|
||
# 更新 Meta Data 描述
|
||
meta = data.get("Meta Data", {})
|
||
if isinstance(meta, dict):
|
||
meta["1. Information"] = "Daily Prices (buy price, high, low, sell price) and Volumes"
|
||
except Exception:
|
||
# 若结构异常则原样写入
|
||
pass
|
||
|
||
fout.write(json.dumps(data, ensure_ascii=False) + "\n")
|