mirror of
https://github.com/Xe138/AI-Trader.git
synced 2026-04-01 17:17:24 -04:00
feat: add duplicate detection to job creation
- Skip already-completed model-day pairs in create_job() - Return warnings for skipped simulations - Raise error if all simulations are already completed - Update create_job() return type from str to Dict[str, Any] - Update all callers to handle new dict return type - Add comprehensive test coverage for duplicate detection - Log warnings when simulations are skipped
This commit is contained in:
@@ -56,7 +56,7 @@ class JobManager:
|
||||
date_range: List[str],
|
||||
models: List[str],
|
||||
model_day_filter: Optional[List[tuple]] = None
|
||||
) -> str:
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Create new simulation job.
|
||||
|
||||
@@ -68,10 +68,12 @@ class JobManager:
|
||||
If None, creates job_details for all model-date combinations.
|
||||
|
||||
Returns:
|
||||
job_id: UUID of created job
|
||||
Dict with:
|
||||
- job_id: UUID of created job
|
||||
- warnings: List of warning messages for skipped simulations
|
||||
|
||||
Raises:
|
||||
ValueError: If another job is already running/pending
|
||||
ValueError: If another job is already running/pending or if all simulations are already completed
|
||||
"""
|
||||
if not self.can_start_new_job():
|
||||
raise ValueError("Another simulation job is already running or pending")
|
||||
@@ -83,6 +85,43 @@ class JobManager:
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
# Determine which model-day pairs to check
|
||||
if model_day_filter is not None:
|
||||
pairs_to_check = model_day_filter
|
||||
else:
|
||||
pairs_to_check = [(model, date) for date in date_range for model in models]
|
||||
|
||||
# Check for already-completed simulations
|
||||
skipped_pairs = []
|
||||
pending_pairs = []
|
||||
|
||||
for model, date in pairs_to_check:
|
||||
cursor.execute("""
|
||||
SELECT COUNT(*)
|
||||
FROM job_details
|
||||
WHERE model = ? AND date = ? AND status = 'completed'
|
||||
""", (model, date))
|
||||
|
||||
count = cursor.fetchone()[0]
|
||||
|
||||
if count > 0:
|
||||
skipped_pairs.append((model, date))
|
||||
logger.info(f"Skipping {model}/{date} - already completed in previous job")
|
||||
else:
|
||||
pending_pairs.append((model, date))
|
||||
|
||||
# If all simulations are already completed, raise error
|
||||
if len(pending_pairs) == 0:
|
||||
warnings = [
|
||||
f"Skipped {model}/{date} - already completed"
|
||||
for model, date in skipped_pairs
|
||||
]
|
||||
raise ValueError(
|
||||
f"All requested simulations are already completed. "
|
||||
f"Skipped {len(skipped_pairs)} model-day pair(s). "
|
||||
f"Details: {warnings}"
|
||||
)
|
||||
|
||||
# Insert job
|
||||
cursor.execute("""
|
||||
INSERT INTO jobs (
|
||||
@@ -98,34 +137,32 @@ class JobManager:
|
||||
created_at
|
||||
))
|
||||
|
||||
# Create job_details based on filter
|
||||
if model_day_filter is not None:
|
||||
# Only create job_details for specified model-day pairs
|
||||
for model, date in model_day_filter:
|
||||
cursor.execute("""
|
||||
INSERT INTO job_details (
|
||||
job_id, date, model, status
|
||||
)
|
||||
VALUES (?, ?, ?, ?)
|
||||
""", (job_id, date, model, "pending"))
|
||||
# Create job_details only for pending pairs
|
||||
for model, date in pending_pairs:
|
||||
cursor.execute("""
|
||||
INSERT INTO job_details (
|
||||
job_id, date, model, status
|
||||
)
|
||||
VALUES (?, ?, ?, ?)
|
||||
""", (job_id, date, model, "pending"))
|
||||
|
||||
logger.info(f"Created job {job_id} with {len(model_day_filter)} model-day tasks (filtered)")
|
||||
else:
|
||||
# Create job_details for all model-day combinations
|
||||
for date in date_range:
|
||||
for model in models:
|
||||
cursor.execute("""
|
||||
INSERT INTO job_details (
|
||||
job_id, date, model, status
|
||||
)
|
||||
VALUES (?, ?, ?, ?)
|
||||
""", (job_id, date, model, "pending"))
|
||||
logger.info(f"Created job {job_id} with {len(pending_pairs)} model-day tasks")
|
||||
|
||||
logger.info(f"Created job {job_id} with {len(date_range)} dates and {len(models)} models")
|
||||
if skipped_pairs:
|
||||
logger.info(f"Skipped {len(skipped_pairs)} already-completed simulations")
|
||||
|
||||
conn.commit()
|
||||
|
||||
return job_id
|
||||
# Prepare warnings
|
||||
warnings = [
|
||||
f"Skipped {model}/{date} - already completed"
|
||||
for model, date in skipped_pairs
|
||||
]
|
||||
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"warnings": warnings
|
||||
}
|
||||
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
@@ -280,12 +280,18 @@ def create_app(
|
||||
|
||||
# Create job immediately with all requested dates
|
||||
# Worker will handle data download and filtering
|
||||
job_id = job_manager.create_job(
|
||||
result = job_manager.create_job(
|
||||
config_path=config_path,
|
||||
date_range=all_dates,
|
||||
models=models_to_run,
|
||||
model_day_filter=None # Worker will filter based on available data
|
||||
)
|
||||
job_id = result["job_id"]
|
||||
warnings = result.get("warnings", [])
|
||||
|
||||
# Log warnings if any simulations were skipped
|
||||
if warnings:
|
||||
logger.warning(f"Job {job_id} created with {len(warnings)} skipped simulations: {warnings}")
|
||||
|
||||
# Start worker in background thread (only if not in test mode)
|
||||
if not getattr(app.state, "test_mode", False):
|
||||
|
||||
Reference in New Issue
Block a user