init update

This commit is contained in:
tianyufan
2025-10-24 00:35:21 +08:00
commit df5c25c98d
205 changed files with 81998 additions and 0 deletions

96
delete.py Normal file
View File

@@ -0,0 +1,96 @@
#!/usr/bin/env python3
"""
删除./data/agent_data/{}/log/{}下面的所有log.jsonl里每一条的"timestamp"字段
"""
import os
import json
import glob
from pathlib import Path
def remove_timestamp_from_log_files():
"""
遍历所有agent_data目录下的log.jsonl文件删除每一条记录中的timestamp字段
"""
base_path = Path("./data/agent_data")
if not base_path.exists():
print(f"错误:目录 {base_path} 不存在")
return
# 查找所有log.jsonl文件
log_files = list(base_path.glob("*/log/*/log.jsonl"))
if not log_files:
print("未找到任何log.jsonl文件")
return
print(f"找到 {len(log_files)} 个log.jsonl文件")
processed_files = 0
total_records = 0
modified_records = 0
for log_file in log_files:
print(f"处理文件: {log_file}")
try:
# 读取文件内容
with open(log_file, 'r', encoding='utf-8') as f:
lines = f.readlines()
if not lines:
print(f" 文件为空,跳过")
continue
# 处理每一行
modified_lines = []
file_modified = False
for line_num, line in enumerate(lines, 1):
line = line.strip()
if not line:
modified_lines.append(line)
continue
try:
# 解析JSON
data = json.loads(line)
total_records += 1
# 检查是否有timestamp字段
if 'timestamp' in data:
# 删除timestamp字段
del data['timestamp']
modified_records += 1
file_modified = True
print(f"{line_num}删除了timestamp字段")
# 重新序列化
modified_lines.append(json.dumps(data, ensure_ascii=False))
except json.JSONDecodeError as e:
print(f"{line_num}JSON解析错误 - {e}")
modified_lines.append(line) # 保持原样
# 如果有修改,写回文件
if file_modified:
with open(log_file, 'w', encoding='utf-8') as f:
for line in modified_lines:
f.write(line + '\n')
print(f" 文件已更新")
processed_files += 1
else:
print(f" 文件无需修改")
except Exception as e:
print(f" 处理文件时出错: {e}")
continue
print(f"\n处理完成!")
print(f"处理了 {processed_files} 个文件")
print(f"总共 {total_records} 条记录")
print(f"修改了 {modified_records} 条记录")
if __name__ == "__main__":
remove_timestamp_from_log_files()