feat(backend): Phase 1-4 全新開發完成,37/37 TDD 通過
[Phase 0 Reset]
- 清除舊版 app/、alembic/versions/、雜亂測試腳本
- 新 requirements.txt (移除 caldav/redis/keycloak-lib,加入 apscheduler/croniter/docker/paramiko/ping3/dnspython)
[Phase 1 資料庫]
- 9 張資料表 SQLAlchemy Models:tenants / accounts / schedules / schedule_logs /
tenant_schedule_results / account_schedule_results / servers / server_status_logs / system_status_logs
- Alembic migration 001_create_all_tables (已套用到 10.1.0.20:5433/virtual_mis)
- seed.py:schedules 初始 3 筆 / servers 初始 4 筆
[Phase 2 CRUD API]
- GET/POST/PUT/DELETE: /api/v1/tenants / accounts / servers / schedules
- /api/v1/system-status
- 帳號編碼自動產生 (prefix + seq_no 4碼左補0)
- 燈號 (lights) 從最新排程結果取得
[Phase 3 Watchdog]
- APScheduler interval 3分鐘,原子 UPDATE status=Going 防重複執行
- 手動觸發 API: POST /api/v1/schedules/{id}/run
[Phase 4 Service Clients]
- KeycloakClient:vmis-admin realm,REST API (不用 python-keycloak)
- MailClient:Docker Mailserver @ 10.1.0.254:8080,含 MX DNS 驗證
- DockerClient:docker-py 本機 + paramiko SSH 遠端 compose
- NextcloudClient:OCS API user/quota
- SystemChecker:功能驗證 (traefik routers>0 / keycloak token / SMTP EHLO / DB SELECT 1 / ping)
[TDD]
- 37 tests / 37 passed (2.11s)
- SQLite in-memory + StaticPool,無需外部 DB
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
107
backend/app/services/scheduler/watchdog.py
Normal file
107
backend/app/services/scheduler/watchdog.py
Normal file
@@ -0,0 +1,107 @@
|
||||
"""
|
||||
Watchdog: APScheduler BackgroundScheduler,每 3 分鐘掃描 schedules 表。
|
||||
防重複執行:原子 UPDATE status='Going',影響 0 筆則跳過。
|
||||
"""
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from apscheduler.schedulers.background import BackgroundScheduler
|
||||
from croniter import croniter
|
||||
from sqlalchemy import update
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.database import SessionLocal
|
||||
from app.models.schedule import Schedule, ScheduleLog
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_scheduler = BackgroundScheduler(timezone="Asia/Taipei")
|
||||
|
||||
|
||||
def _watchdog_tick():
|
||||
db: Session = SessionLocal()
|
||||
try:
|
||||
due = (
|
||||
db.query(Schedule)
|
||||
.filter(
|
||||
Schedule.status == "Waiting",
|
||||
Schedule.next_run_at <= datetime.utcnow(),
|
||||
)
|
||||
.all()
|
||||
)
|
||||
for schedule in due:
|
||||
# Atomic lock: only one process wins
|
||||
affected = db.execute(
|
||||
update(Schedule)
|
||||
.where(Schedule.id == schedule.id, Schedule.status == "Waiting")
|
||||
.values(status="Going")
|
||||
).rowcount
|
||||
db.commit()
|
||||
|
||||
if affected == 0:
|
||||
# Another process already grabbed it
|
||||
continue
|
||||
|
||||
log = ScheduleLog(
|
||||
schedule_id=schedule.id,
|
||||
schedule_name=schedule.name,
|
||||
started_at=datetime.utcnow(),
|
||||
status="running",
|
||||
)
|
||||
db.add(log)
|
||||
db.commit()
|
||||
db.refresh(log)
|
||||
|
||||
try:
|
||||
from app.services.scheduler.runner import dispatch_schedule
|
||||
dispatch_schedule(schedule.id, log.id, db)
|
||||
final_status = "ok"
|
||||
except Exception as e:
|
||||
logger.exception(f"Schedule {schedule.name} failed: {e}")
|
||||
final_status = "error"
|
||||
|
||||
# Update log
|
||||
log.ended_at = datetime.utcnow()
|
||||
log.status = final_status
|
||||
|
||||
# Recalculate next_run_at
|
||||
try:
|
||||
cron = croniter(schedule.cron_timer, datetime.utcnow())
|
||||
next_run = cron.get_next(datetime)
|
||||
except Exception:
|
||||
next_run = None
|
||||
|
||||
# Reset schedule
|
||||
db.execute(
|
||||
update(Schedule)
|
||||
.where(Schedule.id == schedule.id)
|
||||
.values(
|
||||
status="Waiting",
|
||||
last_run_at=datetime.utcnow(),
|
||||
next_run_at=next_run,
|
||||
last_status=final_status,
|
||||
)
|
||||
)
|
||||
db.commit()
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"Watchdog tick error: {e}")
|
||||
db.rollback()
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def start_watchdog():
|
||||
_scheduler.add_job(
|
||||
_watchdog_tick,
|
||||
trigger="interval",
|
||||
minutes=3,
|
||||
id="watchdog",
|
||||
replace_existing=True,
|
||||
)
|
||||
_scheduler.start()
|
||||
logger.info("Watchdog scheduler started")
|
||||
|
||||
|
||||
def stop_watchdog():
|
||||
_scheduler.shutdown(wait=False)
|
||||
logger.info("Watchdog scheduler stopped")
|
||||
Reference in New Issue
Block a user