import json import os import glob all_nasdaq_100_symbols = [ "NVDA", "MSFT", "AAPL", "GOOG", "GOOGL", "AMZN", "META", "AVGO", "TSLA", "NFLX", "PLTR", "COST", "ASML", "AMD", "CSCO", "AZN", "TMUS", "MU", "LIN", "PEP", "SHOP", "APP", "INTU", "AMAT", "LRCX", "PDD", "QCOM", "ARM", "INTC", "BKNG", "AMGN", "TXN", "ISRG", "GILD", "KLAC", "PANW", "ADBE", "HON", "CRWD", "CEG", "ADI", "ADP", "DASH", "CMCSA", "VRTX", "MELI", "SBUX", "CDNS", "ORLY", "SNPS", "MSTR", "MDLZ", "ABNB", "MRVL", "CTAS", "TRI", "MAR", "MNST", "CSX", "ADSK", "PYPL", "FTNT", "AEP", "WDAY", "REGN", "ROP", "NXPI", "DDOG", "AXON", "ROST", "IDXX", "EA", "PCAR", "FAST", "EXC", "TTWO", "XEL", "ZS", "PAYX", "WBD", "BKR", "CPRT", "CCEP", "FANG", "TEAM", "CHTR", "KDP", "MCHP", "GEHC", "VRSK", "CTSH", "CSGP", "KHC", "ODFL", "DXCM", "TTD", "ON", "BIIB", "LULU", "CDW", "GFS" ] # 合并所有以 daily_price 开头的 json,逐文件一行写入 merged.jsonl current_dir = os.path.dirname(__file__) pattern = os.path.join(current_dir, 'daily_price*.json') files = sorted(glob.glob(pattern)) output_file = os.path.join(current_dir, 'merged.jsonl') with open(output_file, 'w', encoding='utf-8') as fout: for fp in files: basename = os.path.basename(fp) # 仅当文件名包含任一纳指100成分符号时才写入 if not any(symbol in basename for symbol in all_nasdaq_100_symbols): continue with open(fp, 'r', encoding='utf-8') as f: data = json.load(f) # 统一重命名:"1. open" -> "1. buy price";"4. close" -> "4. sell price" # 对于最新的一天,只保留并写入 "1. buy price" try: # 查找所有以 "Time Series" 开头的键 series = None for key, value in data.items(): if key.startswith("Time Series"): series = value break if isinstance(series, dict) and series: # 先对所有日期做键名重命名 for d, bar in list(series.items()): if not isinstance(bar, dict): continue if "1. open" in bar: bar["1. buy price"] = bar.pop("1. open") if "4. close" in bar: bar["4. sell price"] = bar.pop("4. close") # 再处理最新日期,仅保留买入价 latest_date = max(series.keys()) latest_bar = series.get(latest_date, {}) if isinstance(latest_bar, dict): buy_val = latest_bar.get("1. buy price") series[latest_date] = {"1. buy price": buy_val} if buy_val is not None else {} # 更新 Meta Data 描述 meta = data.get("Meta Data", {}) if isinstance(meta, dict): meta["1. Information"] = "Daily Prices (buy price, high, low, sell price) and Volumes" except Exception: # 若结构异常则原样写入 pass fout.write(json.dumps(data, ensure_ascii=False) + "\n")