Files
AI-Trader/data/merge_jsonl.py
2025-10-28 11:32:54 +08:00

68 lines
3.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import json
import os
import glob
all_nasdaq_100_symbols = [
"NVDA", "MSFT", "AAPL", "GOOG", "GOOGL", "AMZN", "META", "AVGO", "TSLA",
"NFLX", "PLTR", "COST", "ASML", "AMD", "CSCO", "AZN", "TMUS", "MU", "LIN",
"PEP", "SHOP", "APP", "INTU", "AMAT", "LRCX", "PDD", "QCOM", "ARM", "INTC",
"BKNG", "AMGN", "TXN", "ISRG", "GILD", "KLAC", "PANW", "ADBE", "HON",
"CRWD", "CEG", "ADI", "ADP", "DASH", "CMCSA", "VRTX", "MELI", "SBUX",
"CDNS", "ORLY", "SNPS", "MSTR", "MDLZ", "ABNB", "MRVL", "CTAS", "TRI",
"MAR", "MNST", "CSX", "ADSK", "PYPL", "FTNT", "AEP", "WDAY", "REGN", "ROP",
"NXPI", "DDOG", "AXON", "ROST", "IDXX", "EA", "PCAR", "FAST", "EXC", "TTWO",
"XEL", "ZS", "PAYX", "WBD", "BKR", "CPRT", "CCEP", "FANG", "TEAM", "CHTR",
"KDP", "MCHP", "GEHC", "VRSK", "CTSH", "CSGP", "KHC", "ODFL", "DXCM", "TTD",
"ON", "BIIB", "LULU", "CDW", "GFS"
]
# 合并所有以 daily_price 开头的 json逐文件一行写入 merged.jsonl
current_dir = os.path.dirname(__file__)
pattern = os.path.join(current_dir, 'daily_price*.json')
files = sorted(glob.glob(pattern))
output_file = os.path.join(current_dir, 'merged.jsonl')
with open(output_file, 'w', encoding='utf-8') as fout:
for fp in files:
basename = os.path.basename(fp)
# 仅当文件名包含任一纳指100成分符号时才写入
if not any(symbol in basename for symbol in all_nasdaq_100_symbols):
continue
with open(fp, 'r', encoding='utf-8') as f:
data = json.load(f)
# 统一重命名:"1. open" -> "1. buy price""4. close" -> "4. sell price"
# 对于最新的一天,只保留并写入 "1. buy price"
try:
# 查找所有以 "Time Series" 开头的键
series = None
for key, value in data.items():
if key.startswith("Time Series"):
series = value
break
if isinstance(series, dict) and series:
# 先对所有日期做键名重命名
for d, bar in list(series.items()):
if not isinstance(bar, dict):
continue
if "1. open" in bar:
bar["1. buy price"] = bar.pop("1. open")
if "4. close" in bar:
bar["4. sell price"] = bar.pop("4. close")
# 再处理最新日期,仅保留买入价
latest_date = max(series.keys())
latest_bar = series.get(latest_date, {})
if isinstance(latest_bar, dict):
buy_val = latest_bar.get("1. buy price")
series[latest_date] = {"1. buy price": buy_val} if buy_val is not None else {}
# 更新 Meta Data 描述
meta = data.get("Meta Data", {})
if isinstance(meta, dict):
meta["1. Information"] = "Daily Prices (buy price, high, low, sell price) and Volumes"
except Exception:
# 若结构异常则原样写入
pass
fout.write(json.dumps(data, ensure_ascii=False) + "\n")