mirror of
https://github.com/Xe138/AI-Trader.git
synced 2026-04-07 03:07:24 -04:00
init update
This commit is contained in:
62
data/merge_jsonl.py
Normal file
62
data/merge_jsonl.py
Normal file
@@ -0,0 +1,62 @@
|
||||
import json
|
||||
import os
|
||||
import glob
|
||||
|
||||
|
||||
all_nasdaq_100_symbols = [
|
||||
"NVDA", "MSFT", "AAPL", "GOOG", "GOOGL", "AMZN", "META", "AVGO", "TSLA",
|
||||
"NFLX", "PLTR", "COST", "ASML", "AMD", "CSCO", "AZN", "TMUS", "MU", "LIN",
|
||||
"PEP", "SHOP", "APP", "INTU", "AMAT", "LRCX", "PDD", "QCOM", "ARM", "INTC",
|
||||
"BKNG", "AMGN", "TXN", "ISRG", "GILD", "KLAC", "PANW", "ADBE", "HON",
|
||||
"CRWD", "CEG", "ADI", "ADP", "DASH", "CMCSA", "VRTX", "MELI", "SBUX",
|
||||
"CDNS", "ORLY", "SNPS", "MSTR", "MDLZ", "ABNB", "MRVL", "CTAS", "TRI",
|
||||
"MAR", "MNST", "CSX", "ADSK", "PYPL", "FTNT", "AEP", "WDAY", "REGN", "ROP",
|
||||
"NXPI", "DDOG", "AXON", "ROST", "IDXX", "EA", "PCAR", "FAST", "EXC", "TTWO",
|
||||
"XEL", "ZS", "PAYX", "WBD", "BKR", "CPRT", "CCEP", "FANG", "TEAM", "CHTR",
|
||||
"KDP", "MCHP", "GEHC", "VRSK", "CTSH", "CSGP", "KHC", "ODFL", "DXCM", "TTD",
|
||||
"ON", "BIIB", "LULU", "CDW", "GFS"
|
||||
]
|
||||
|
||||
# 合并所有以 daily_price 开头的 json,逐文件一行写入 merged.jsonl
|
||||
current_dir = os.path.dirname(__file__)
|
||||
pattern = os.path.join(current_dir, 'daily_price*.json')
|
||||
files = sorted(glob.glob(pattern))
|
||||
|
||||
output_file = os.path.join(current_dir, 'merged.jsonl')
|
||||
|
||||
with open(output_file, 'w', encoding='utf-8') as fout:
|
||||
for fp in files:
|
||||
basename = os.path.basename(fp)
|
||||
# 仅当文件名包含任一纳指100成分符号时才写入
|
||||
if not any(symbol in basename for symbol in all_nasdaq_100_symbols):
|
||||
continue
|
||||
with open(fp, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
# 统一重命名:"1. open" -> "1. buy price";"4. close" -> "4. sell price"
|
||||
# 对于最新的一天,只保留并写入 "1. buy price"
|
||||
try:
|
||||
series = data.get("Time Series (Daily)", {})
|
||||
if isinstance(series, dict) and series:
|
||||
# 先对所有日期做键名重命名
|
||||
for d, bar in list(series.items()):
|
||||
if not isinstance(bar, dict):
|
||||
continue
|
||||
if "1. open" in bar:
|
||||
bar["1. buy price"] = bar.pop("1. open")
|
||||
if "4. close" in bar:
|
||||
bar["4. sell price"] = bar.pop("4. close")
|
||||
# 再处理最新日期,仅保留买入价
|
||||
latest_date = max(series.keys())
|
||||
latest_bar = series.get(latest_date, {})
|
||||
if isinstance(latest_bar, dict):
|
||||
buy_val = latest_bar.get("1. buy price")
|
||||
series[latest_date] = {"1. buy price": buy_val} if buy_val is not None else {}
|
||||
# 更新 Meta Data 描述
|
||||
meta = data.get("Meta Data", {})
|
||||
if isinstance(meta, dict):
|
||||
meta["1. Information"] = "Daily Prices (buy price, high, low, sell price) and Volumes"
|
||||
except Exception:
|
||||
# 若结构异常则原样写入
|
||||
pass
|
||||
|
||||
fout.write(json.dumps(data, ensure_ascii=False) + "\n")
|
||||
Reference in New Issue
Block a user