init update

This commit is contained in:
tianyufan
2025-10-24 00:35:21 +08:00
commit df5c25c98d
205 changed files with 81998 additions and 0 deletions

62
data/merge_jsonl.py Normal file
View File

@@ -0,0 +1,62 @@
import json
import os
import glob
all_nasdaq_100_symbols = [
"NVDA", "MSFT", "AAPL", "GOOG", "GOOGL", "AMZN", "META", "AVGO", "TSLA",
"NFLX", "PLTR", "COST", "ASML", "AMD", "CSCO", "AZN", "TMUS", "MU", "LIN",
"PEP", "SHOP", "APP", "INTU", "AMAT", "LRCX", "PDD", "QCOM", "ARM", "INTC",
"BKNG", "AMGN", "TXN", "ISRG", "GILD", "KLAC", "PANW", "ADBE", "HON",
"CRWD", "CEG", "ADI", "ADP", "DASH", "CMCSA", "VRTX", "MELI", "SBUX",
"CDNS", "ORLY", "SNPS", "MSTR", "MDLZ", "ABNB", "MRVL", "CTAS", "TRI",
"MAR", "MNST", "CSX", "ADSK", "PYPL", "FTNT", "AEP", "WDAY", "REGN", "ROP",
"NXPI", "DDOG", "AXON", "ROST", "IDXX", "EA", "PCAR", "FAST", "EXC", "TTWO",
"XEL", "ZS", "PAYX", "WBD", "BKR", "CPRT", "CCEP", "FANG", "TEAM", "CHTR",
"KDP", "MCHP", "GEHC", "VRSK", "CTSH", "CSGP", "KHC", "ODFL", "DXCM", "TTD",
"ON", "BIIB", "LULU", "CDW", "GFS"
]
# 合并所有以 daily_price 开头的 json逐文件一行写入 merged.jsonl
current_dir = os.path.dirname(__file__)
pattern = os.path.join(current_dir, 'daily_price*.json')
files = sorted(glob.glob(pattern))
output_file = os.path.join(current_dir, 'merged.jsonl')
with open(output_file, 'w', encoding='utf-8') as fout:
for fp in files:
basename = os.path.basename(fp)
# 仅当文件名包含任一纳指100成分符号时才写入
if not any(symbol in basename for symbol in all_nasdaq_100_symbols):
continue
with open(fp, 'r', encoding='utf-8') as f:
data = json.load(f)
# 统一重命名:"1. open" -> "1. buy price""4. close" -> "4. sell price"
# 对于最新的一天,只保留并写入 "1. buy price"
try:
series = data.get("Time Series (Daily)", {})
if isinstance(series, dict) and series:
# 先对所有日期做键名重命名
for d, bar in list(series.items()):
if not isinstance(bar, dict):
continue
if "1. open" in bar:
bar["1. buy price"] = bar.pop("1. open")
if "4. close" in bar:
bar["4. sell price"] = bar.pop("4. close")
# 再处理最新日期,仅保留买入价
latest_date = max(series.keys())
latest_bar = series.get(latest_date, {})
if isinstance(latest_bar, dict):
buy_val = latest_bar.get("1. buy price")
series[latest_date] = {"1. buy price": buy_val} if buy_val is not None else {}
# 更新 Meta Data 描述
meta = data.get("Meta Data", {})
if isinstance(meta, dict):
meta["1. Information"] = "Daily Prices (buy price, high, low, sell price) and Volumes"
except Exception:
# 若结构异常则原样写入
pass
fout.write(json.dumps(data, ensure_ascii=False) + "\n")