""" Watchdog: APScheduler BackgroundScheduler,每 3 分鐘掃描 schedules 表。 防重複執行:原子 UPDATE status='Going',影響 0 筆則跳過。 """ import logging from datetime import datetime from apscheduler.schedulers.background import BackgroundScheduler from croniter import croniter from sqlalchemy import update from sqlalchemy.orm import Session from app.core.database import SessionLocal from app.models.schedule import Schedule, ScheduleLog logger = logging.getLogger(__name__) _scheduler = BackgroundScheduler(timezone="Asia/Taipei") def _watchdog_tick(): db: Session = SessionLocal() try: due = ( db.query(Schedule) .filter( Schedule.status == "Waiting", Schedule.next_run_at <= datetime.utcnow(), ) .all() ) for schedule in due: # Atomic lock: only one process wins affected = db.execute( update(Schedule) .where(Schedule.id == schedule.id, Schedule.status == "Waiting") .values(status="Going") ).rowcount db.commit() if affected == 0: # Another process already grabbed it continue log = ScheduleLog( schedule_id=schedule.id, schedule_name=schedule.name, started_at=datetime.utcnow(), status="running", ) db.add(log) db.commit() db.refresh(log) try: from app.services.scheduler.runner import dispatch_schedule dispatch_schedule(schedule.id, log.id, db) final_status = "ok" except Exception as e: logger.exception(f"Schedule {schedule.name} failed: {e}") final_status = "error" # Update log log.ended_at = datetime.utcnow() log.status = final_status # Recalculate next_run_at try: cron = croniter(schedule.cron_timer, datetime.utcnow()) next_run = cron.get_next(datetime) except Exception: next_run = None # Reset schedule db.execute( update(Schedule) .where(Schedule.id == schedule.id) .values( status="Waiting", last_run_at=datetime.utcnow(), next_run_at=next_run, last_status=final_status, ) ) db.commit() except Exception as e: logger.exception(f"Watchdog tick error: {e}") db.rollback() finally: db.close() def start_watchdog(): _scheduler.add_job( _watchdog_tick, trigger="interval", minutes=3, id="watchdog", replace_existing=True, ) _scheduler.start() logger.info("Watchdog scheduler started") def stop_watchdog(): _scheduler.shutdown(wait=False) logger.info("Watchdog scheduler stopped")