""" Watchdog: APScheduler BackgroundScheduler,每 3 分鐘掃描 schedules 表。 防重複執行:原子 UPDATE status='Going',影響 0 筆則跳過。 """ import logging from datetime import datetime from app.core.utils import now_tw from apscheduler.schedulers.background import BackgroundScheduler from croniter import croniter from sqlalchemy import update from sqlalchemy.orm import Session from app.core.database import SessionLocal from app.models.schedule import Schedule, ScheduleLog logger = logging.getLogger(__name__) _scheduler = BackgroundScheduler(timezone="Asia/Taipei") def _watchdog_tick(): db: Session = SessionLocal() try: due = ( db.query(Schedule) .filter( Schedule.status == "Waiting", Schedule.next_run_at <= now_tw(), ) .all() ) for schedule in due: # Atomic lock: only one process wins affected = db.execute( update(Schedule) .where(Schedule.id == schedule.id, Schedule.status == "Waiting") .values(status="Going") ).rowcount db.commit() if affected == 0: # Another process already grabbed it continue log = ScheduleLog( schedule_id=schedule.id, schedule_name=schedule.name, started_at=now_tw(), status="running", ) db.add(log) db.commit() db.refresh(log) try: from app.services.scheduler.runner import dispatch_schedule dispatch_schedule(schedule.id, log.id, db) final_status = "ok" except Exception as e: logger.exception(f"Schedule {schedule.name} failed: {e}") final_status = "error" # Update log log.ended_at = now_tw() log.status = final_status # Recalculate next_run_at (5-field cron: 分 時 日 月 週) try: cron = croniter(schedule.cron_timer, now_tw()) next_run = cron.get_next(datetime) except Exception: next_run = None # Reset schedule db.execute( update(Schedule) .where(Schedule.id == schedule.id) .values( status="Waiting", last_run_at=now_tw(), next_run_at=next_run, last_status=final_status, ) ) db.commit() except Exception as e: logger.exception(f"Watchdog tick error: {e}") db.rollback() finally: db.close() def start_watchdog(): _scheduler.add_job( _watchdog_tick, trigger="interval", minutes=3, id="watchdog", replace_existing=True, ) _scheduler.start() logger.info("Watchdog scheduler started") def stop_watchdog(): _scheduler.shutdown(wait=False) logger.info("Watchdog scheduler stopped")