mirror of
https://github.com/Xe138/AI-Trader.git
synced 2026-04-07 11:17:25 -04:00
fix: cleanup stale jobs on container restart to unblock new job creation
When a Docker container is shutdown and restarted, jobs with status 'pending', 'downloading_data', or 'running' remained in the database, preventing new jobs from starting due to concurrency control checks. This commit adds automatic cleanup of stale jobs during FastAPI startup: - New cleanup_stale_jobs() method in JobManager (api/job_manager.py:702-779) - Integrated into FastAPI lifespan startup (api/main.py:164-168) - Intelligent status determination based on completion percentage: - 'partial' if any model-days completed (preserves progress data) - 'failed' if no progress made - Detailed error messages with original status and completion counts - Marks incomplete job_details as 'failed' with clear error messages - Deployment-aware: skips cleanup in DEV mode when DB is reset - Comprehensive logging at warning level for visibility Testing: - 6 new unit tests covering all cleanup scenarios (451-609) - All 30 existing job_manager tests still pass - Tests verify pending, running, downloading_data, partial progress, no stale jobs, and multiple stale jobs scenarios Resolves issue where container restarts left stale jobs blocking the can_start_new_job() concurrency check.
This commit is contained in:
16
api/main.py
16
api/main.py
@@ -134,25 +134,39 @@ def create_app(
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Initialize database on startup, cleanup on shutdown if needed"""
|
||||
from tools.deployment_config import is_dev_mode, get_db_path
|
||||
from tools.deployment_config import is_dev_mode, get_db_path, should_preserve_dev_data
|
||||
from api.database import initialize_dev_database, initialize_database
|
||||
|
||||
# Startup - use closure to access db_path from create_app scope
|
||||
logger.info("🚀 FastAPI application starting...")
|
||||
logger.info("📊 Initializing database...")
|
||||
|
||||
should_cleanup_stale_jobs = False
|
||||
|
||||
if is_dev_mode():
|
||||
# Initialize dev database (reset unless PRESERVE_DEV_DATA=true)
|
||||
logger.info(" 🔧 DEV mode detected - initializing dev database")
|
||||
dev_db_path = get_db_path(db_path)
|
||||
initialize_dev_database(dev_db_path)
|
||||
log_dev_mode_startup_warning()
|
||||
|
||||
# Only cleanup stale jobs if preserving dev data (otherwise DB is fresh)
|
||||
if should_preserve_dev_data():
|
||||
should_cleanup_stale_jobs = True
|
||||
else:
|
||||
# Ensure production database schema exists
|
||||
logger.info(" 🏭 PROD mode - ensuring database schema exists")
|
||||
initialize_database(db_path)
|
||||
should_cleanup_stale_jobs = True
|
||||
|
||||
logger.info("✅ Database initialized")
|
||||
|
||||
# Clean up stale jobs from previous container session
|
||||
if should_cleanup_stale_jobs:
|
||||
logger.info("🧹 Checking for stale jobs from previous session...")
|
||||
job_manager = JobManager(get_db_path(db_path) if is_dev_mode() else db_path)
|
||||
job_manager.cleanup_stale_jobs()
|
||||
|
||||
logger.info("🌐 API server ready to accept requests")
|
||||
|
||||
yield
|
||||
|
||||
Reference in New Issue
Block a user