diff --git a/api/job_manager.py b/api/job_manager.py index 46d47ee..344063c 100644 --- a/api/job_manager.py +++ b/api/job_manager.py @@ -55,7 +55,8 @@ class JobManager: config_path: str, date_range: List[str], models: List[str], - model_day_filter: Optional[List[tuple]] = None + model_day_filter: Optional[List[tuple]] = None, + skip_completed: bool = True ) -> Dict[str, Any]: """ Create new simulation job. @@ -66,6 +67,8 @@ class JobManager: models: List of model signatures to execute model_day_filter: Optional list of (model, date) tuples to limit job_details. If None, creates job_details for all model-date combinations. + skip_completed: If True (default), skips already-completed simulations. + If False, includes all requested simulations regardless of completion status. Returns: Dict with: @@ -73,7 +76,7 @@ class JobManager: - warnings: List of warning messages for skipped simulations Raises: - ValueError: If another job is already running/pending or if all simulations are already completed + ValueError: If another job is already running/pending or if all simulations are already completed (when skip_completed=True) """ if not self.can_start_new_job(): raise ValueError("Another simulation job is already running or pending") @@ -91,36 +94,42 @@ class JobManager: else: pairs_to_check = [(model, date) for date in date_range for model in models] - # Check for already-completed simulations + # Check for already-completed simulations (only if skip_completed=True) skipped_pairs = [] pending_pairs = [] - for model, date in pairs_to_check: - cursor.execute(""" - SELECT COUNT(*) - FROM job_details - WHERE model = ? AND date = ? AND status = 'completed' - """, (model, date)) + if skip_completed: + # Perform duplicate checking + for model, date in pairs_to_check: + cursor.execute(""" + SELECT COUNT(*) + FROM job_details + WHERE model = ? AND date = ? AND status = 'completed' + """, (model, date)) - count = cursor.fetchone()[0] + count = cursor.fetchone()[0] - if count > 0: - skipped_pairs.append((model, date)) - logger.info(f"Skipping {model}/{date} - already completed in previous job") - else: - pending_pairs.append((model, date)) + if count > 0: + skipped_pairs.append((model, date)) + logger.info(f"Skipping {model}/{date} - already completed in previous job") + else: + pending_pairs.append((model, date)) - # If all simulations are already completed, raise error - if len(pending_pairs) == 0: - warnings = [ - f"Skipped {model}/{date} - already completed" - for model, date in skipped_pairs - ] - raise ValueError( - f"All requested simulations are already completed. " - f"Skipped {len(skipped_pairs)} model-day pair(s). " - f"Details: {warnings}" - ) + # If all simulations are already completed, raise error + if len(pending_pairs) == 0: + warnings = [ + f"Skipped {model}/{date} - already completed" + for model, date in skipped_pairs + ] + raise ValueError( + f"All requested simulations are already completed. " + f"Skipped {len(skipped_pairs)} model-day pair(s). " + f"Details: {warnings}" + ) + else: + # skip_completed=False: include ALL pairs (no duplicate checking) + pending_pairs = pairs_to_check + logger.info(f"Including all {len(pending_pairs)} model-day pairs (skip_completed=False)") # Insert job cursor.execute(""" diff --git a/api/main.py b/api/main.py index 0d0c923..5e71560 100644 --- a/api/main.py +++ b/api/main.py @@ -284,7 +284,8 @@ def create_app( config_path=config_path, date_range=all_dates, models=models_to_run, - model_day_filter=None # Worker will filter based on available data + model_day_filter=None, # Worker will filter based on available data + skip_completed=(not request.replace_existing) # Skip if replace_existing=False ) job_id = result["job_id"] warnings = result.get("warnings", []) diff --git a/tests/unit/test_job_manager_duplicate_detection.py b/tests/unit/test_job_manager_duplicate_detection.py index a373a86..08d23cb 100644 --- a/tests/unit/test_job_manager_duplicate_detection.py +++ b/tests/unit/test_job_manager_duplicate_detection.py @@ -211,3 +211,46 @@ def test_create_job_raises_error_when_all_simulations_completed(temp_db): error_message = str(exc_info.value) assert "All requested simulations are already completed" in error_message assert "Skipped 4 model-day pair(s)" in error_message + + +def test_create_job_with_skip_completed_false_includes_all_simulations(temp_db): + """Test that skip_completed=False includes ALL simulations, even already-completed ones.""" + manager = JobManager(db_path=temp_db) + + # Create first job and complete some model-days + result_1 = manager.create_job( + config_path="test_config.json", + date_range=["2025-10-15", "2025-10-16"], + models=["model-a", "model-b"] + ) + job_id_1 = result_1["job_id"] + + # Mark all model-days as completed + manager.update_job_detail_status(job_id_1, "2025-10-15", "model-a", "completed") + manager.update_job_detail_status(job_id_1, "2025-10-15", "model-b", "completed") + manager.update_job_detail_status(job_id_1, "2025-10-16", "model-a", "completed") + manager.update_job_detail_status(job_id_1, "2025-10-16", "model-b", "completed") + + # Create second job with skip_completed=False + result_2 = manager.create_job( + config_path="test_config.json", + date_range=["2025-10-15", "2025-10-16"], + models=["model-a", "model-b"], + skip_completed=False + ) + job_id_2 = result_2["job_id"] + + # Get job details for second job + details = manager.get_job_details(job_id_2) + + # Should have ALL 4 model-day pairs (no skipping) + assert len(details) == 4 + + dates_models = [(d["date"], d["model"]) for d in details] + assert ("2025-10-15", "model-a") in dates_models + assert ("2025-10-15", "model-b") in dates_models + assert ("2025-10-16", "model-a") in dates_models + assert ("2025-10-16", "model-b") in dates_models + + # Verify no warnings were returned + assert result_2.get("warnings") == []