feat(backend): Phase 1-4 全新開發完成,37/37 TDD 通過

[Phase 0 Reset]
- 清除舊版 app/、alembic/versions/、雜亂測試腳本
- 新 requirements.txt (移除 caldav/redis/keycloak-lib,加入 apscheduler/croniter/docker/paramiko/ping3/dnspython)

[Phase 1 資料庫]
- 9 張資料表 SQLAlchemy Models:tenants / accounts / schedules / schedule_logs /
  tenant_schedule_results / account_schedule_results / servers / server_status_logs / system_status_logs
- Alembic migration 001_create_all_tables (已套用到 10.1.0.20:5433/virtual_mis)
- seed.py:schedules 初始 3 筆 / servers 初始 4 筆

[Phase 2 CRUD API]
- GET/POST/PUT/DELETE: /api/v1/tenants / accounts / servers / schedules
- /api/v1/system-status
- 帳號編碼自動產生 (prefix + seq_no 4碼左補0)
- 燈號 (lights) 從最新排程結果取得

[Phase 3 Watchdog]
- APScheduler interval 3分鐘,原子 UPDATE status=Going 防重複執行
- 手動觸發 API: POST /api/v1/schedules/{id}/run

[Phase 4 Service Clients]
- KeycloakClient:vmis-admin realm,REST API (不用 python-keycloak)
- MailClient:Docker Mailserver @ 10.1.0.254:8080,含 MX DNS 驗證
- DockerClient:docker-py 本機 + paramiko SSH 遠端 compose
- NextcloudClient:OCS API user/quota
- SystemChecker:功能驗證 (traefik routers>0 / keycloak token / SMTP EHLO / DB SELECT 1 / ping)

[TDD]
- 37 tests / 37 passed (2.11s)
- SQLite in-memory + StaticPool,無需外部 DB

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
VMIS Developer
2026-03-14 13:10:15 +08:00
parent 22611f7f73
commit 42d1420f9c
52 changed files with 2934 additions and 0 deletions

0
backend/app/__init__.py Normal file
View File

View File

View File

View File

@@ -0,0 +1,118 @@
from typing import List, Optional
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy.orm import Session
from app.core.database import get_db
from app.models.account import Account
from app.models.tenant import Tenant
from app.models.result import AccountScheduleResult
from app.schemas.account import AccountCreate, AccountUpdate, AccountResponse, AccountStatusLight
router = APIRouter(prefix="/accounts", tags=["accounts"])
def _next_seq_no(db: Session, tenant_id: int) -> int:
max_seq = db.query(Account.seq_no).filter(Account.tenant_id == tenant_id).order_by(Account.seq_no.desc()).first()
return (max_seq[0] + 1) if max_seq else 1
def _build_account_code(prefix: str, seq_no: int) -> str:
return f"{prefix}{str(seq_no).zfill(4)}"
def _get_lights(db: Session, account_id: int) -> Optional[AccountStatusLight]:
result = (
db.query(AccountScheduleResult)
.filter(AccountScheduleResult.account_id == account_id)
.order_by(AccountScheduleResult.recorded_at.desc())
.first()
)
if not result:
return None
return AccountStatusLight(
sso_result=result.sso_result,
mailbox_result=result.mailbox_result,
nc_result=result.nc_result,
quota_usage=result.quota_usage,
)
@router.get("", response_model=List[AccountResponse])
def list_accounts(
tenant_id: Optional[int] = Query(None),
is_active: Optional[bool] = Query(None),
db: Session = Depends(get_db),
):
q = db.query(Account)
if tenant_id is not None:
q = q.filter(Account.tenant_id == tenant_id)
if is_active is not None:
q = q.filter(Account.is_active == is_active)
accounts = q.order_by(Account.id).all()
result = []
for a in accounts:
resp = AccountResponse.model_validate(a)
resp.tenant_name = a.tenant.name if a.tenant else None
resp.lights = _get_lights(db, a.id)
result.append(resp)
return result
@router.post("", response_model=AccountResponse, status_code=201)
def create_account(payload: AccountCreate, db: Session = Depends(get_db)):
tenant = db.get(Tenant, payload.tenant_id)
if not tenant:
raise HTTPException(status_code=404, detail="Tenant not found")
seq_no = _next_seq_no(db, payload.tenant_id)
account_code = _build_account_code(tenant.prefix, seq_no)
email = f"{payload.sso_account}@{tenant.domain}"
account = Account(
**payload.model_dump(),
seq_no=seq_no,
account_code=account_code,
email=email,
)
db.add(account)
db.commit()
db.refresh(account)
resp = AccountResponse.model_validate(account)
resp.tenant_name = tenant.name
resp.lights = None
return resp
@router.get("/{account_id}", response_model=AccountResponse)
def get_account(account_id: int, db: Session = Depends(get_db)):
account = db.get(Account, account_id)
if not account:
raise HTTPException(status_code=404, detail="Account not found")
resp = AccountResponse.model_validate(account)
resp.tenant_name = account.tenant.name if account.tenant else None
resp.lights = _get_lights(db, account_id)
return resp
@router.put("/{account_id}", response_model=AccountResponse)
def update_account(account_id: int, payload: AccountUpdate, db: Session = Depends(get_db)):
account = db.get(Account, account_id)
if not account:
raise HTTPException(status_code=404, detail="Account not found")
for field, value in payload.model_dump(exclude_none=True).items():
setattr(account, field, value)
db.commit()
db.refresh(account)
resp = AccountResponse.model_validate(account)
resp.tenant_name = account.tenant.name if account.tenant else None
resp.lights = _get_lights(db, account_id)
return resp
@router.delete("/{account_id}", status_code=204)
def delete_account(account_id: int, db: Session = Depends(get_db)):
account = db.get(Account, account_id)
if not account:
raise HTTPException(status_code=404, detail="Account not found")
db.delete(account)
db.commit()

View File

@@ -0,0 +1,9 @@
from fastapi import APIRouter
from app.api.v1 import tenants, accounts, schedules, servers, status
api_router = APIRouter()
api_router.include_router(tenants.router)
api_router.include_router(accounts.router)
api_router.include_router(schedules.router)
api_router.include_router(servers.router)
api_router.include_router(status.router)

View File

@@ -0,0 +1,69 @@
from typing import List
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
from sqlalchemy.orm import Session
from croniter import croniter
from app.core.database import get_db
from app.models.schedule import Schedule
from app.schemas.schedule import ScheduleResponse, ScheduleUpdate, ScheduleLogResponse
router = APIRouter(prefix="/schedules", tags=["schedules"])
@router.get("", response_model=List[ScheduleResponse])
def list_schedules(db: Session = Depends(get_db)):
return db.query(Schedule).order_by(Schedule.id).all()
@router.get("/{schedule_id}", response_model=ScheduleResponse)
def get_schedule(schedule_id: int, db: Session = Depends(get_db)):
s = db.get(Schedule, schedule_id)
if not s:
raise HTTPException(status_code=404, detail="Schedule not found")
return s
@router.put("/{schedule_id}", response_model=ScheduleResponse)
def update_schedule_cron(schedule_id: int, payload: ScheduleUpdate, db: Session = Depends(get_db)):
s = db.get(Schedule, schedule_id)
if not s:
raise HTTPException(status_code=404, detail="Schedule not found")
# Validate cron expression
try:
cron = croniter(payload.cron_timer, datetime.utcnow())
next_run = cron.get_next(datetime)
except Exception:
raise HTTPException(status_code=422, detail="Invalid cron expression")
s.cron_timer = payload.cron_timer
s.next_run_at = next_run
db.commit()
db.refresh(s)
return s
@router.post("/{schedule_id}/run", status_code=202)
def manual_run(schedule_id: int, background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
"""手動觸發排程(非同步執行)"""
s = db.get(Schedule, schedule_id)
if not s:
raise HTTPException(status_code=404, detail="Schedule not found")
if s.status == "Going":
raise HTTPException(status_code=409, detail="Schedule is already running")
from app.services.scheduler.runner import dispatch_schedule
background_tasks.add_task(dispatch_schedule, schedule_id)
return {"message": f"Schedule '{s.name}' triggered", "schedule_id": schedule_id}
@router.get("/{schedule_id}/logs", response_model=List[ScheduleLogResponse])
def get_schedule_logs(schedule_id: int, limit: int = 20, db: Session = Depends(get_db)):
from app.models.schedule import ScheduleLog
logs = (
db.query(ScheduleLog)
.filter(ScheduleLog.schedule_id == schedule_id)
.order_by(ScheduleLog.started_at.desc())
.limit(limit)
.all()
)
return logs

View File

@@ -0,0 +1,104 @@
from typing import List, Optional
from datetime import datetime, timedelta
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy import func, case
from sqlalchemy.orm import Session
from app.core.database import get_db
from app.models.server import Server, ServerStatusLog
from app.schemas.server import ServerCreate, ServerUpdate, ServerResponse, ServerAvailability
router = APIRouter(prefix="/servers", tags=["servers"])
def _calc_availability(db: Session, server_id: int, days: int) -> Optional[float]:
since = datetime.utcnow() - timedelta(days=days)
row = (
db.query(
func.count().label("total"),
func.sum(case((ServerStatusLog.result == True, 1), else_=0)).label("ok"),
)
.filter(ServerStatusLog.server_id == server_id, ServerStatusLog.recorded_at >= since)
.first()
)
if not row or not row.total:
return None
return round(row.ok * 100.0 / row.total, 2)
def _get_last_status(db: Session, server_id: int):
return (
db.query(ServerStatusLog)
.filter(ServerStatusLog.server_id == server_id)
.order_by(ServerStatusLog.recorded_at.desc(), ServerStatusLog.id.desc())
.first()
)
@router.get("", response_model=List[ServerResponse])
def list_servers(db: Session = Depends(get_db)):
servers = db.query(Server).order_by(Server.sort_order).all()
result = []
for s in servers:
resp = ServerResponse.model_validate(s)
last = _get_last_status(db, s.id)
if last:
resp.last_result = last.result
resp.last_response_time = last.response_time
resp.availability = ServerAvailability(
availability_30d=_calc_availability(db, s.id, 30),
availability_90d=_calc_availability(db, s.id, 90),
availability_365d=_calc_availability(db, s.id, 365),
)
result.append(resp)
return result
@router.post("", response_model=ServerResponse, status_code=201)
def create_server(payload: ServerCreate, db: Session = Depends(get_db)):
if db.query(Server).filter(Server.ip_address == payload.ip_address).first():
raise HTTPException(status_code=409, detail="IP address already exists")
server = Server(**payload.model_dump())
db.add(server)
db.commit()
db.refresh(server)
return ServerResponse.model_validate(server)
@router.get("/{server_id}", response_model=ServerResponse)
def get_server(server_id: int, db: Session = Depends(get_db)):
server = db.get(Server, server_id)
if not server:
raise HTTPException(status_code=404, detail="Server not found")
resp = ServerResponse.model_validate(server)
last = _get_last_status(db, server_id)
if last:
resp.last_result = last.result
resp.last_response_time = last.response_time
resp.availability = ServerAvailability(
availability_30d=_calc_availability(db, server_id, 30),
availability_90d=_calc_availability(db, server_id, 90),
availability_365d=_calc_availability(db, server_id, 365),
)
return resp
@router.put("/{server_id}", response_model=ServerResponse)
def update_server(server_id: int, payload: ServerUpdate, db: Session = Depends(get_db)):
server = db.get(Server, server_id)
if not server:
raise HTTPException(status_code=404, detail="Server not found")
for field, value in payload.model_dump(exclude_none=True).items():
setattr(server, field, value)
db.commit()
db.refresh(server)
return ServerResponse.model_validate(server)
@router.delete("/{server_id}", status_code=204)
def delete_server(server_id: int, db: Session = Depends(get_db)):
server = db.get(Server, server_id)
if not server:
raise HTTPException(status_code=404, detail="Server not found")
db.delete(server)
db.commit()

View File

@@ -0,0 +1,44 @@
from typing import List
from fastapi import APIRouter, Depends
from sqlalchemy.orm import Session
from pydantic import BaseModel
from typing import Optional
from datetime import datetime
from app.core.database import get_db
from app.models.server import SystemStatusLog
router = APIRouter(tags=["status"])
class SystemStatusItem(BaseModel):
id: int
environment: str
service_name: str
service_desc: Optional[str]
result: bool
fail_reason: Optional[str]
recorded_at: datetime
class Config:
from_attributes = True
@router.get("/system-status", response_model=List[SystemStatusItem])
def get_system_status(db: Session = Depends(get_db)):
"""最新一次系統狀態 (8 筆: test/prod × traefik/keycloak/mail/db)"""
# Get latest schedule_log_id for schedule_id=3
from app.models.schedule import ScheduleLog
latest_log = (
db.query(ScheduleLog)
.filter(ScheduleLog.schedule_id == 3)
.order_by(ScheduleLog.started_at.desc())
.first()
)
if not latest_log:
return []
return (
db.query(SystemStatusLog)
.filter(SystemStatusLog.schedule_log_id == latest_log.id)
.all()
)

View File

@@ -0,0 +1,94 @@
from typing import List, Optional
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy import text
from sqlalchemy.orm import Session
from app.core.database import get_db
from app.models.tenant import Tenant
from app.models.result import TenantScheduleResult
from app.schemas.tenant import TenantCreate, TenantUpdate, TenantResponse, TenantStatusLight
router = APIRouter(prefix="/tenants", tags=["tenants"])
def _get_lights(db: Session, tenant_id: int) -> Optional[TenantStatusLight]:
result = (
db.query(TenantScheduleResult)
.filter(TenantScheduleResult.tenant_id == tenant_id)
.order_by(TenantScheduleResult.recorded_at.desc())
.first()
)
if not result:
return None
return TenantStatusLight(
sso_result=result.sso_result,
mailbox_result=result.mailbox_result,
nc_result=result.nc_result,
office_result=result.office_result,
quota_usage=result.quota_usage,
)
@router.get("", response_model=List[TenantResponse])
def list_tenants(
is_active: Optional[bool] = Query(None),
db: Session = Depends(get_db),
):
q = db.query(Tenant)
if is_active is not None:
q = q.filter(Tenant.is_active == is_active)
tenants = q.order_by(Tenant.id).all()
result = []
for t in tenants:
resp = TenantResponse.model_validate(t)
resp.lights = _get_lights(db, t.id)
result.append(resp)
return result
@router.post("", response_model=TenantResponse, status_code=201)
def create_tenant(payload: TenantCreate, db: Session = Depends(get_db)):
if db.query(Tenant).filter(Tenant.code == payload.code).first():
raise HTTPException(status_code=409, detail="Tenant code already exists")
if db.query(Tenant).filter(Tenant.domain == payload.domain).first():
raise HTTPException(status_code=409, detail="Domain already exists")
tenant = Tenant(**payload.model_dump())
db.add(tenant)
db.commit()
db.refresh(tenant)
resp = TenantResponse.model_validate(tenant)
resp.lights = None
return resp
@router.get("/{tenant_id}", response_model=TenantResponse)
def get_tenant(tenant_id: int, db: Session = Depends(get_db)):
tenant = db.get(Tenant, tenant_id)
if not tenant:
raise HTTPException(status_code=404, detail="Tenant not found")
resp = TenantResponse.model_validate(tenant)
resp.lights = _get_lights(db, tenant_id)
return resp
@router.put("/{tenant_id}", response_model=TenantResponse)
def update_tenant(tenant_id: int, payload: TenantUpdate, db: Session = Depends(get_db)):
tenant = db.get(Tenant, tenant_id)
if not tenant:
raise HTTPException(status_code=404, detail="Tenant not found")
for field, value in payload.model_dump(exclude_none=True).items():
setattr(tenant, field, value)
db.commit()
db.refresh(tenant)
resp = TenantResponse.model_validate(tenant)
resp.lights = _get_lights(db, tenant_id)
return resp
@router.delete("/{tenant_id}", status_code=204)
def delete_tenant(tenant_id: int, db: Session = Depends(get_db)):
tenant = db.get(Tenant, tenant_id)
if not tenant:
raise HTTPException(status_code=404, detail="Tenant not found")
db.delete(tenant)
db.commit()

View File

View File

@@ -0,0 +1,27 @@
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
DATABASE_URL: str = "postgresql+psycopg2://admin:DC1qaz2wsx@10.1.0.20:5433/virtual_mis"
KEYCLOAK_URL: str = "https://auth.lab.taipei"
KEYCLOAK_ADMIN_REALM: str = "vmis-admin"
KEYCLOAK_ADMIN_CLIENT_ID: str = "vmis-backend"
KEYCLOAK_ADMIN_CLIENT_SECRET: str = ""
MAIL_ADMIN_API_URL: str = "http://10.1.0.254:8080"
MAIL_ADMIN_API_KEY: str = ""
MAIL_MX_HOST: str = "mail.ease.taipei"
DOCKER_SSH_HOST: str = "10.1.0.254"
DOCKER_SSH_USER: str = "porsche"
TENANT_DEPLOY_BASE: str = "/home/porsche/tenants"
APP_ENV: str = "development"
APP_PORT: int = 10281
class Config:
env_file = ".env"
settings = Settings()

View File

@@ -0,0 +1,25 @@
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker, DeclarativeBase
from app.core.config import settings
engine = create_engine(
settings.DATABASE_URL,
pool_pre_ping=True,
pool_size=10,
max_overflow=20,
)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
class Base(DeclarativeBase):
pass
def get_db():
db = SessionLocal()
try:
yield db
finally:
db.close()

45
backend/app/main.py Normal file
View File

@@ -0,0 +1,45 @@
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from app.core.database import SessionLocal
from app.services.seed import seed_initial_data
from app.services.scheduler.watchdog import start_watchdog, stop_watchdog
from app.api.v1.router import api_router
@asynccontextmanager
async def lifespan(app: FastAPI):
# Startup
db = SessionLocal()
try:
seed_initial_data(db)
finally:
db.close()
start_watchdog()
yield
# Shutdown
stop_watchdog()
app = FastAPI(
title="Virtual MIS API",
version="2.0.0",
description="SaaS 虛擬 MIS 平台管理後端",
lifespan=lifespan,
)
app.add_middleware(
CORSMiddleware,
allow_origins=["http://localhost:10280", "http://localhost:10290"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
app.include_router(api_router, prefix="/api/v1")
@app.get("/health")
def health():
return {"status": "ok", "service": "vmis-backend"}

View File

@@ -0,0 +1,11 @@
from app.models.tenant import Tenant
from app.models.account import Account
from app.models.schedule import Schedule, ScheduleLog
from app.models.result import TenantScheduleResult, AccountScheduleResult
from app.models.server import Server, ServerStatusLog, SystemStatusLog
__all__ = [
"Tenant", "Account", "Schedule", "ScheduleLog",
"TenantScheduleResult", "AccountScheduleResult",
"Server", "ServerStatusLog", "SystemStatusLog",
]

View File

@@ -0,0 +1,28 @@
from datetime import datetime
from sqlalchemy import Boolean, Column, Integer, String, DateTime, ForeignKey
from sqlalchemy.orm import relationship
from app.core.database import Base
class Account(Base):
__tablename__ = "accounts"
id = Column(Integer, primary_key=True, index=True, autoincrement=True)
tenant_id = Column(Integer, ForeignKey("tenants.id", ondelete="CASCADE"), nullable=False, index=True)
account_code = Column(String(20), unique=True, nullable=False, index=True) # prefix + seq_no 4碼左補0
sso_account = Column(String(100), nullable=False)
sso_uuid = Column(String(100), unique=True) # Keycloak UUID首次建立後回寫
notification_email = Column(String(200), nullable=False)
email = Column(String(200)) # sso_account@tenants.domain
legal_name = Column(String(200))
english_name = Column(String(200))
quota_limit = Column(Integer, nullable=False, default=20) # GB
is_active = Column(Boolean, nullable=False, default=True)
default_password = Column(String(200))
seq_no = Column(Integer, nullable=False) # 同租戶內流水號
created_at = Column(DateTime, nullable=False, default=datetime.utcnow)
updated_at = Column(DateTime, nullable=False, default=datetime.utcnow, onupdate=datetime.utcnow)
tenant = relationship("Tenant", back_populates="accounts")
schedule_results = relationship("AccountScheduleResult", back_populates="account")

View File

@@ -0,0 +1,60 @@
from datetime import datetime
from sqlalchemy import Boolean, Column, Integer, String, Text, DateTime, Float, ForeignKey
from sqlalchemy.orm import relationship
from app.core.database import Base
class TenantScheduleResult(Base):
__tablename__ = "tenant_schedule_results"
id = Column(Integer, primary_key=True, index=True, autoincrement=True)
schedule_log_id = Column(Integer, ForeignKey("schedule_logs.id"), nullable=False, index=True)
tenant_id = Column(Integer, ForeignKey("tenants.id"), nullable=False, index=True)
traefik_status = Column(Boolean)
traefik_done_at = Column(DateTime)
sso_result = Column(Boolean)
sso_done_at = Column(DateTime)
mailbox_result = Column(Boolean)
mailbox_done_at = Column(DateTime)
nc_result = Column(Boolean)
nc_done_at = Column(DateTime)
office_result = Column(Boolean)
office_done_at = Column(DateTime)
fail_reason = Column(Text)
quota_usage = Column(Float) # GB
recorded_at = Column(DateTime, nullable=False, default=datetime.utcnow)
schedule_log = relationship("ScheduleLog", back_populates="tenant_results")
tenant = relationship("Tenant", back_populates="schedule_results")
class AccountScheduleResult(Base):
__tablename__ = "account_schedule_results"
id = Column(Integer, primary_key=True, index=True, autoincrement=True)
schedule_log_id = Column(Integer, ForeignKey("schedule_logs.id"), nullable=False, index=True)
account_id = Column(Integer, ForeignKey("accounts.id"), nullable=False, index=True)
sso_uuid = Column(String(100))
sso_account = Column(String(100))
sso_result = Column(Boolean)
sso_done_at = Column(DateTime)
mailbox_result = Column(Boolean)
mailbox_done_at = Column(DateTime)
nc_result = Column(Boolean)
nc_done_at = Column(DateTime)
fail_reason = Column(Text)
quota_usage = Column(Float) # GB
recorded_at = Column(DateTime, nullable=False, default=datetime.utcnow)
schedule_log = relationship("ScheduleLog", back_populates="account_results")
account = relationship("Account", back_populates="schedule_results")

View File

@@ -0,0 +1,36 @@
from datetime import datetime
from sqlalchemy import Column, Integer, String, DateTime, ForeignKey
from sqlalchemy.orm import relationship
from app.core.database import Base
class Schedule(Base):
__tablename__ = "schedules"
id = Column(Integer, primary_key=True, index=True, autoincrement=True)
name = Column(String(100), unique=True, nullable=False)
cron_timer = Column(String(50), nullable=False) # 六碼 cron: 秒 分 時 日 月 週
status = Column(String(10), nullable=False, default="Waiting") # Waiting / Going
last_run_at = Column(DateTime)
next_run_at = Column(DateTime)
last_status = Column(String(10)) # ok / error
recorded_at = Column(DateTime, nullable=False, default=datetime.utcnow)
logs = relationship("ScheduleLog", back_populates="schedule")
class ScheduleLog(Base):
__tablename__ = "schedule_logs"
id = Column(Integer, primary_key=True, index=True, autoincrement=True)
schedule_id = Column(Integer, ForeignKey("schedules.id"), nullable=False, index=True)
schedule_name = Column(String(100), nullable=False)
started_at = Column(DateTime, nullable=False)
ended_at = Column(DateTime)
status = Column(String(10), nullable=False, default="running") # running / ok / error
schedule = relationship("Schedule", back_populates="logs")
tenant_results = relationship("TenantScheduleResult", back_populates="schedule_log")
account_results = relationship("AccountScheduleResult", back_populates="schedule_log")
system_status_logs = relationship("SystemStatusLog", back_populates="schedule_log")
server_status_logs = relationship("ServerStatusLog", back_populates="schedule_log")

View File

@@ -0,0 +1,48 @@
from datetime import datetime
from sqlalchemy import Boolean, Column, Integer, String, Text, DateTime, Float, ForeignKey
from sqlalchemy.orm import relationship
from app.core.database import Base
class Server(Base):
__tablename__ = "servers"
id = Column(Integer, primary_key=True, index=True, autoincrement=True)
name = Column(String(100), nullable=False)
ip_address = Column(String(50), unique=True, nullable=False)
description = Column(String(200))
sort_order = Column(Integer, nullable=False, default=0)
is_active = Column(Boolean, nullable=False, default=True)
recorded_at = Column(DateTime, nullable=False, default=datetime.utcnow)
status_logs = relationship("ServerStatusLog", back_populates="server")
class ServerStatusLog(Base):
__tablename__ = "server_status_logs"
id = Column(Integer, primary_key=True, index=True, autoincrement=True)
schedule_log_id = Column(Integer, ForeignKey("schedule_logs.id"), nullable=False, index=True)
server_id = Column(Integer, ForeignKey("servers.id"), nullable=False, index=True)
result = Column(Boolean, nullable=False)
response_time = Column(Float) # ms
fail_reason = Column(Text)
recorded_at = Column(DateTime, nullable=False, default=datetime.utcnow)
schedule_log = relationship("ScheduleLog", back_populates="server_status_logs")
server = relationship("Server", back_populates="status_logs")
class SystemStatusLog(Base):
__tablename__ = "system_status_logs"
id = Column(Integer, primary_key=True, index=True, autoincrement=True)
schedule_log_id = Column(Integer, ForeignKey("schedule_logs.id"), nullable=False, index=True)
environment = Column(String(10), nullable=False) # test / prod
service_name = Column(String(50), nullable=False) # traefik / keycloak / mail / db
service_desc = Column(String(100))
result = Column(Boolean, nullable=False)
fail_reason = Column(Text)
recorded_at = Column(DateTime, nullable=False, default=datetime.utcnow)
schedule_log = relationship("ScheduleLog", back_populates="system_status_logs")

View File

@@ -0,0 +1,37 @@
from datetime import datetime
from sqlalchemy import Boolean, Column, Integer, String, Text, DateTime, Date
from sqlalchemy.orm import relationship
from app.core.database import Base
class Tenant(Base):
__tablename__ = "tenants"
id = Column(Integer, primary_key=True, index=True, autoincrement=True)
code = Column(String(50), unique=True, nullable=False, index=True)
prefix = Column(String(20), nullable=False, default="")
name = Column(String(200), nullable=False)
name_eng = Column(String(200))
tax_id = Column(String(20))
domain = Column(String(200), unique=True, nullable=False, index=True)
address = Column(String(500))
tel = Column(String(50))
contact = Column(String(100))
contact_mobile = Column(String(50))
contact_email = Column(String(200))
keycloak_realm = Column(String(100))
plan_code = Column(String(50))
employee_limit = Column(Integer)
trial_start_date = Column(Date)
trial_end_date = Column(Date)
quota_per_user = Column(Integer, nullable=False, default=20) # GB
total_quota = Column(Integer, nullable=False, default=200) # GB
is_manager = Column(Boolean, nullable=False, default=False)
is_active = Column(Boolean, nullable=False, default=True)
status = Column(String(20), nullable=False, default="trial") # trial / active / inactive
note = Column(Text)
created_at = Column(DateTime, nullable=False, default=datetime.utcnow)
updated_at = Column(DateTime, nullable=False, default=datetime.utcnow, onupdate=datetime.utcnow)
accounts = relationship("Account", back_populates="tenant", cascade="all, delete-orphan")
schedule_results = relationship("TenantScheduleResult", back_populates="tenant")

View File

View File

@@ -0,0 +1,50 @@
from datetime import datetime
from typing import Optional
from pydantic import BaseModel
class AccountBase(BaseModel):
tenant_id: int
sso_account: str
notification_email: str
legal_name: Optional[str] = None
english_name: Optional[str] = None
quota_limit: int = 20
is_active: bool = True
default_password: Optional[str] = None
class AccountCreate(AccountBase):
pass
class AccountUpdate(BaseModel):
sso_account: Optional[str] = None
notification_email: Optional[str] = None
legal_name: Optional[str] = None
english_name: Optional[str] = None
quota_limit: Optional[int] = None
is_active: Optional[bool] = None
default_password: Optional[str] = None
class AccountStatusLight(BaseModel):
sso_result: Optional[bool] = None
mailbox_result: Optional[bool] = None
nc_result: Optional[bool] = None
quota_usage: Optional[float] = None
class AccountResponse(AccountBase):
id: int
account_code: str
email: Optional[str] = None
sso_uuid: Optional[str] = None
seq_no: int
created_at: datetime
updated_at: datetime
tenant_name: Optional[str] = None
lights: Optional[AccountStatusLight] = None
class Config:
from_attributes = True

View File

@@ -0,0 +1,33 @@
from datetime import datetime
from typing import Optional
from pydantic import BaseModel
class ScheduleResponse(BaseModel):
id: int
name: str
cron_timer: str
status: str
last_run_at: Optional[datetime] = None
next_run_at: Optional[datetime] = None
last_status: Optional[str] = None
recorded_at: datetime
class Config:
from_attributes = True
class ScheduleUpdate(BaseModel):
cron_timer: str
class ScheduleLogResponse(BaseModel):
id: int
schedule_id: int
schedule_name: str
started_at: datetime
ended_at: Optional[datetime] = None
status: str
class Config:
from_attributes = True

View File

@@ -0,0 +1,40 @@
from datetime import datetime
from typing import Optional
from pydantic import BaseModel
class ServerBase(BaseModel):
name: str
ip_address: str
description: Optional[str] = None
sort_order: int = 0
is_active: bool = True
class ServerCreate(ServerBase):
pass
class ServerUpdate(BaseModel):
name: Optional[str] = None
ip_address: Optional[str] = None
description: Optional[str] = None
sort_order: Optional[int] = None
is_active: Optional[bool] = None
class ServerAvailability(BaseModel):
availability_30d: Optional[float] = None
availability_90d: Optional[float] = None
availability_365d: Optional[float] = None
class ServerResponse(ServerBase):
id: int
recorded_at: datetime
last_result: Optional[bool] = None
last_response_time: Optional[float] = None
availability: Optional[ServerAvailability] = None
class Config:
from_attributes = True

View File

@@ -0,0 +1,71 @@
from datetime import date, datetime
from typing import Optional
from pydantic import BaseModel, EmailStr
class TenantBase(BaseModel):
code: str
prefix: str = ""
name: str
name_eng: Optional[str] = None
tax_id: Optional[str] = None
domain: str
address: Optional[str] = None
tel: Optional[str] = None
contact: Optional[str] = None
contact_mobile: Optional[str] = None
contact_email: Optional[str] = None
keycloak_realm: Optional[str] = None
plan_code: Optional[str] = None
employee_limit: Optional[int] = None
trial_start_date: Optional[date] = None
trial_end_date: Optional[date] = None
quota_per_user: int = 20
total_quota: int = 200
is_manager: bool = False
is_active: bool = True
status: str = "trial"
note: Optional[str] = None
class TenantCreate(TenantBase):
pass
class TenantUpdate(BaseModel):
name: Optional[str] = None
name_eng: Optional[str] = None
tax_id: Optional[str] = None
domain: Optional[str] = None
address: Optional[str] = None
tel: Optional[str] = None
contact: Optional[str] = None
contact_mobile: Optional[str] = None
contact_email: Optional[str] = None
plan_code: Optional[str] = None
employee_limit: Optional[int] = None
trial_start_date: Optional[date] = None
trial_end_date: Optional[date] = None
quota_per_user: Optional[int] = None
total_quota: Optional[int] = None
is_active: Optional[bool] = None
status: Optional[str] = None
note: Optional[str] = None
class TenantStatusLight(BaseModel):
sso_result: Optional[bool] = None
mailbox_result: Optional[bool] = None
nc_result: Optional[bool] = None
office_result: Optional[bool] = None
quota_usage: Optional[float] = None
class TenantResponse(TenantBase):
id: int
created_at: datetime
updated_at: datetime
lights: Optional[TenantStatusLight] = None
class Config:
from_attributes = True

View File

View File

@@ -0,0 +1,86 @@
"""
DockerClient — docker-py (本機 Docker socket) + paramiko SSH (遠端 docker compose)
管理租戶的 NC / OO 容器。
"""
import logging
from typing import Optional
import httpx
from app.core.config import settings
logger = logging.getLogger(__name__)
class DockerClient:
def __init__(self):
self._docker = None
def _get_docker(self):
if self._docker is None:
import docker
self._docker = docker.from_env()
return self._docker
def check_traefik_route(self, domain: str) -> bool:
"""
Traefik API: GET http://localhost:8080/api/http/routers
驗證 routers 中包含 domain且 routers 數量 > 0
"""
try:
resp = httpx.get("http://localhost:8080/api/overview", timeout=5.0)
if resp.status_code != 200:
return False
data = resp.json()
# Verify actual routes exist (functional check)
http_count = data.get("http", {}).get("routers", {}).get("total", 0)
if http_count == 0:
return False
# Check domain-specific router
routers_resp = httpx.get("http://localhost:8080/api/http/routers", timeout=5.0)
if routers_resp.status_code != 200:
return False
routers = routers_resp.json()
return any(domain in str(r.get("rule", "")) for r in routers)
except Exception as e:
logger.warning(f"Traefik check failed for {domain}: {e}")
return False
def ensure_container_running(self, container_name: str, tenant_code: str, realm: str) -> bool:
"""Check container status; start if exited; deploy via SSH if not found."""
try:
docker_client = self._get_docker()
container = docker_client.containers.get(container_name)
if container.status == "running":
return True
elif container.status == "exited":
container.start()
container.reload()
return container.status == "running"
except Exception as e:
if "Not Found" in str(e) or "404" in str(e):
return self._ssh_compose_up(tenant_code, realm)
logger.error(f"Docker check failed for {container_name}: {e}")
return False
return False
def _ssh_compose_up(self, tenant_code: str, realm: str) -> bool:
"""SSH into 10.1.0.254 and run docker compose up -d"""
try:
import paramiko
client = paramiko.SSHClient()
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
client.connect(
settings.DOCKER_SSH_HOST,
username=settings.DOCKER_SSH_USER,
timeout=15,
)
deploy_dir = f"{settings.TENANT_DEPLOY_BASE}/{tenant_code}"
stdin, stdout, stderr = client.exec_command(
f"cd {deploy_dir} && docker compose up -d 2>&1"
)
exit_status = stdout.channel.recv_exit_status()
client.close()
return exit_status == 0
except Exception as e:
logger.error(f"SSH compose up failed for {tenant_code}: {e}")
return False

View File

@@ -0,0 +1,95 @@
"""
KeycloakClient — 直接呼叫 Keycloak REST API不使用 python-keycloak 套件。
管理租戶 realm 及帳號的建立/查詢。
"""
import logging
from typing import Optional
import httpx
from app.core.config import settings
logger = logging.getLogger(__name__)
TIMEOUT = 10.0
class KeycloakClient:
def __init__(self):
self._base = settings.KEYCLOAK_URL.rstrip("/")
self._admin_token: Optional[str] = None
def _get_admin_token(self) -> str:
"""取得 vmis-admin realm 的 admin access token"""
url = f"{self._base}/realms/{settings.KEYCLOAK_ADMIN_REALM}/protocol/openid-connect/token"
resp = httpx.post(
url,
data={
"grant_type": "client_credentials",
"client_id": settings.KEYCLOAK_ADMIN_CLIENT_ID,
"client_secret": settings.KEYCLOAK_ADMIN_CLIENT_SECRET,
},
timeout=TIMEOUT,
)
resp.raise_for_status()
return resp.json()["access_token"]
def _headers(self) -> dict:
if not self._admin_token:
self._admin_token = self._get_admin_token()
return {"Authorization": f"Bearer {self._admin_token}"}
def _admin_url(self, path: str) -> str:
return f"{self._base}/admin/realms/{path}"
def realm_exists(self, realm: str) -> bool:
try:
resp = httpx.get(self._admin_url(realm), headers=self._headers(), timeout=TIMEOUT)
return resp.status_code == 200
except Exception:
return False
def create_realm(self, realm: str, display_name: str) -> bool:
payload = {
"realm": realm,
"displayName": display_name,
"enabled": True,
"loginTheme": "keycloak",
}
resp = httpx.post(
f"{self._base}/admin/realms",
json=payload,
headers=self._headers(),
timeout=TIMEOUT,
)
return resp.status_code in (201, 204)
def get_user_uuid(self, realm: str, username: str) -> Optional[str]:
resp = httpx.get(
self._admin_url(f"{realm}/users"),
params={"username": username, "exact": "true"},
headers=self._headers(),
timeout=TIMEOUT,
)
resp.raise_for_status()
users = resp.json()
return users[0]["id"] if users else None
def create_user(self, realm: str, username: str, email: str, password: Optional[str]) -> Optional[str]:
payload = {
"username": username,
"email": email,
"enabled": True,
"emailVerified": True,
}
if password:
payload["credentials"] = [{"type": "password", "value": password, "temporary": True}]
resp = httpx.post(
self._admin_url(f"{realm}/users"),
json=payload,
headers=self._headers(),
timeout=TIMEOUT,
)
if resp.status_code == 201:
location = resp.headers.get("Location", "")
return location.rstrip("/").split("/")[-1]
return None

View File

@@ -0,0 +1,81 @@
"""
MailClient — 呼叫 Docker Mailserver Admin API (http://10.1.0.254:8080)
管理 mail domain 和 mailbox 的建立/查詢。
建立 domain 前必須驗證 MX DNS 設定(對 active 租戶)。
"""
import logging
from typing import Optional
import httpx
import dns.resolver
from app.core.config import settings
logger = logging.getLogger(__name__)
TIMEOUT = 10.0
class MailClient:
def __init__(self):
self._base = settings.MAIL_ADMIN_API_URL.rstrip("/")
self._headers = {"X-API-Key": settings.MAIL_ADMIN_API_KEY}
def check_mx_dns(self, domain: str) -> bool:
"""驗證 domain 的 MX record 是否指向正確的 mail server"""
try:
answers = dns.resolver.resolve(domain, "MX")
for rdata in answers:
if settings.MAIL_MX_HOST in str(rdata.exchange).rstrip("."):
return True
return False
except Exception as e:
logger.warning(f"MX DNS check failed for {domain}: {e}")
return False
def domain_exists(self, domain: str) -> bool:
try:
resp = httpx.get(
f"{self._base}/api/v1/domains/{domain}",
headers=self._headers,
timeout=TIMEOUT,
)
return resp.status_code == 200
except Exception:
return False
def create_domain(self, domain: str) -> bool:
try:
resp = httpx.post(
f"{self._base}/api/v1/domains",
json={"domain": domain},
headers=self._headers,
timeout=TIMEOUT,
)
return resp.status_code in (200, 201, 204)
except Exception as e:
logger.error(f"create_domain({domain}) failed: {e}")
return False
def mailbox_exists(self, email: str) -> bool:
try:
resp = httpx.get(
f"{self._base}/api/v1/mailboxes/{email}",
headers=self._headers,
timeout=TIMEOUT,
)
return resp.status_code == 200
except Exception:
return False
def create_mailbox(self, email: str, password: Optional[str], quota_gb: int = 20) -> bool:
try:
resp = httpx.post(
f"{self._base}/api/v1/mailboxes",
json={"email": email, "password": password or "", "quota": quota_gb},
headers=self._headers,
timeout=TIMEOUT,
)
return resp.status_code in (200, 201, 204)
except Exception as e:
logger.error(f"create_mailbox({email}) failed: {e}")
return False

View File

@@ -0,0 +1,85 @@
"""
NextcloudClient — Nextcloud OCS API
管理 NC 使用者的查詢/建立與 quota 統計。
"""
import logging
from typing import Optional
import httpx
logger = logging.getLogger(__name__)
OCS_HEADERS = {"OCS-APIRequest": "true"}
TIMEOUT = 15.0
class NextcloudClient:
def __init__(self, domain: str, admin_user: str = "admin", admin_password: str = ""):
self._base = f"https://{domain}"
self._auth = (admin_user, admin_password)
def user_exists(self, username: str) -> bool:
try:
resp = httpx.get(
f"{self._base}/ocs/v1.php/cloud/users/{username}",
auth=self._auth,
headers=OCS_HEADERS,
timeout=TIMEOUT,
)
return resp.status_code == 200
except Exception:
return False
def create_user(self, username: str, password: Optional[str], quota_gb: int = 20) -> bool:
try:
resp = httpx.post(
f"{self._base}/ocs/v1.php/cloud/users",
auth=self._auth,
headers=OCS_HEADERS,
data={
"userid": username,
"password": password or "",
"quota": f"{quota_gb}GB",
},
timeout=TIMEOUT,
)
return resp.status_code == 200
except Exception as e:
logger.error(f"NC create_user({username}) failed: {e}")
return False
def get_user_quota_used_gb(self, username: str) -> Optional[float]:
try:
resp = httpx.get(
f"{self._base}/ocs/v2.php/cloud/users/{username}",
auth=self._auth,
headers=OCS_HEADERS,
timeout=TIMEOUT,
)
if resp.status_code != 200:
return None
used_bytes = resp.json().get("ocs", {}).get("data", {}).get("quota", {}).get("used", 0)
return round(used_bytes / 1073741824, 4)
except Exception:
return None
def get_total_quota_used_gb(self) -> Optional[float]:
"""Sum all users' quota usage"""
try:
resp = httpx.get(
f"{self._base}/ocs/v2.php/cloud/users",
auth=self._auth,
headers=OCS_HEADERS,
params={"limit": 500},
timeout=TIMEOUT,
)
if resp.status_code != 200:
return None
users = resp.json().get("ocs", {}).get("data", {}).get("users", [])
total = 0.0
for uid in users:
used = self.get_user_quota_used_gb(uid)
if used:
total += used
return round(total, 4)
except Exception:
return None

View File

@@ -0,0 +1,59 @@
"""
Schedule dispatcher: routes schedule_id to the correct run function.
Also used by manual trigger API.
"""
import logging
from sqlalchemy.orm import Session
from app.core.database import SessionLocal
logger = logging.getLogger(__name__)
def dispatch_schedule(schedule_id: int, log_id: int = None, db: Session = None):
"""
Dispatch to correct schedule function.
When called from watchdog, db and log_id are provided.
When called from manual API, creates its own session and log.
"""
own_db = db is None
if own_db:
db = SessionLocal()
if log_id is None:
from datetime import datetime
from app.models.schedule import ScheduleLog, Schedule
schedule = db.get(Schedule, schedule_id)
if not schedule:
if own_db:
db.close()
return
log = ScheduleLog(
schedule_id=schedule_id,
schedule_name=schedule.name,
started_at=datetime.utcnow(),
status="running",
)
db.add(log)
db.commit()
db.refresh(log)
log_id = log.id
try:
if schedule_id == 1:
from app.services.scheduler.schedule_tenant import run_tenant_check
run_tenant_check(log_id, db)
elif schedule_id == 2:
from app.services.scheduler.schedule_account import run_account_check
run_account_check(log_id, db)
elif schedule_id == 3:
from app.services.scheduler.schedule_system import run_system_status
run_system_status(log_id, db)
else:
logger.warning(f"Unknown schedule_id: {schedule_id}")
except Exception as e:
logger.exception(f"dispatch_schedule({schedule_id}) error: {e}")
raise
finally:
if own_db:
db.close()

View File

@@ -0,0 +1,103 @@
"""
Schedule 2 — 帳號檢查(每 3 分鐘)
檢查每個 active 帳號的: SSO使用者 / Mailbox / NC使用者 / Quota
"""
import logging
from datetime import datetime
from sqlalchemy.orm import Session
from app.models.account import Account
from app.models.result import AccountScheduleResult
logger = logging.getLogger(__name__)
def run_account_check(schedule_log_id: int, db: Session):
from app.services.keycloak_client import KeycloakClient
from app.services.mail_client import MailClient
from app.services.nextcloud_client import NextcloudClient
accounts = (
db.query(Account)
.filter(Account.is_active == True)
.all()
)
kc = KeycloakClient()
mail = MailClient()
for account in accounts:
tenant = account.tenant
realm = tenant.keycloak_realm or tenant.code
result = AccountScheduleResult(
schedule_log_id=schedule_log_id,
account_id=account.id,
sso_account=account.sso_account,
recorded_at=datetime.utcnow(),
)
fail_reasons = []
# [1] SSO user check
try:
sso_uuid = kc.get_user_uuid(realm, account.sso_account)
if sso_uuid:
result.sso_result = True
result.sso_uuid = sso_uuid
if not account.sso_uuid:
account.sso_uuid = sso_uuid
else:
sso_uuid = kc.create_user(realm, account.sso_account, account.email, account.default_password)
result.sso_result = sso_uuid is not None
result.sso_uuid = sso_uuid
if sso_uuid and not account.sso_uuid:
account.sso_uuid = sso_uuid
result.sso_done_at = datetime.utcnow()
except Exception as e:
result.sso_result = False
result.sso_done_at = datetime.utcnow()
fail_reasons.append(f"sso: {e}")
# [2] Mailbox check (skip if mail domain not ready)
try:
email = account.email or f"{account.sso_account}@{tenant.domain}"
mb_exists = mail.mailbox_exists(email)
if mb_exists:
result.mailbox_result = True
else:
created = mail.create_mailbox(email, account.default_password, account.quota_limit)
result.mailbox_result = created
result.mailbox_done_at = datetime.utcnow()
except Exception as e:
result.mailbox_result = False
result.mailbox_done_at = datetime.utcnow()
fail_reasons.append(f"mailbox: {e}")
# [3] NC user check
try:
nc = NextcloudClient(tenant.domain)
nc_exists = nc.user_exists(account.sso_account)
if nc_exists:
result.nc_result = True
else:
created = nc.create_user(account.sso_account, account.default_password, account.quota_limit)
result.nc_result = created
result.nc_done_at = datetime.utcnow()
except Exception as e:
result.nc_result = False
result.nc_done_at = datetime.utcnow()
fail_reasons.append(f"nc: {e}")
# [4] Quota
try:
nc = NextcloudClient(tenant.domain)
result.quota_usage = nc.get_user_quota_used_gb(account.sso_account)
except Exception as e:
logger.warning(f"Quota check failed for {account.account_code}: {e}")
if fail_reasons:
result.fail_reason = "; ".join(fail_reasons)
db.add(result)
db.commit()
db.flush()
logger.info(f"Account check done: {len(accounts)} accounts processed")

View File

@@ -0,0 +1,94 @@
"""
Schedule 3 — 系統狀態(每日 08:00
Part A: 基礎設施服務功能驗證traefik/keycloak/mail/db
Part B: 伺服器 ping 檢查
"""
import logging
from datetime import datetime
from sqlalchemy.orm import Session
from app.models.server import SystemStatusLog, ServerStatusLog, Server
logger = logging.getLogger(__name__)
# Fixed 8 services: environment × service_name
SERVICES = [
{"environment": "test", "service_name": "traefik",
"service_desc": "測試環境反向代理", "host": "localhost", "port": 8080},
{"environment": "test", "service_name": "keycloak",
"service_desc": "測試環境 SSO",
"url": "https://auth.lab.taipei", "realm": "master"},
{"environment": "test", "service_name": "mail",
"service_desc": "測試環境 Mail Server", "host": "localhost", "port": 587},
{"environment": "test", "service_name": "db",
"service_desc": "10.1.0.20:5433 PostgreSQL",
"db_host": "10.1.0.20", "db_port": 5433},
{"environment": "prod", "service_name": "traefik",
"service_desc": "正式環境反向代理", "host": "localhost", "port": 8080},
{"environment": "prod", "service_name": "keycloak",
"service_desc": "正式環境 SSO",
"url": "https://auth.ease.taipei", "realm": "master"},
{"environment": "prod", "service_name": "mail",
"service_desc": "正式環境 Mail Server", "host": "10.1.0.254", "port": 587},
{"environment": "prod", "service_name": "db",
"service_desc": "10.1.0.254:5432 PostgreSQL",
"db_host": "10.1.0.254", "db_port": 5432},
]
def run_system_status(schedule_log_id: int, db: Session):
from app.services.system_checker import SystemChecker
checker = SystemChecker()
# Part A: Infrastructure services
for svc in SERVICES:
result = False
fail_reason = None
try:
if svc["service_name"] == "traefik":
result = checker.check_traefik(svc["host"], svc["port"])
elif svc["service_name"] == "keycloak":
result = checker.check_keycloak(svc["url"], svc["realm"])
elif svc["service_name"] == "mail":
result = checker.check_smtp(svc["host"], svc["port"])
elif svc["service_name"] == "db":
result = checker.check_postgres(svc["db_host"], svc["db_port"])
except Exception as e:
result = False
fail_reason = str(e)
db.add(SystemStatusLog(
schedule_log_id=schedule_log_id,
environment=svc["environment"],
service_name=svc["service_name"],
service_desc=svc["service_desc"],
result=result,
fail_reason=fail_reason,
recorded_at=datetime.utcnow(),
))
# Part B: Server ping
servers = db.query(Server).filter(Server.is_active == True).order_by(Server.sort_order).all()
for server in servers:
response_time = None
fail_reason = None
try:
response_time = checker.ping_server(server.ip_address)
result = response_time is not None
if not result:
fail_reason = "No response"
except Exception as e:
result = False
fail_reason = str(e)
db.add(ServerStatusLog(
schedule_log_id=schedule_log_id,
server_id=server.id,
result=result,
response_time=response_time,
fail_reason=fail_reason,
recorded_at=datetime.utcnow(),
))
db.commit()
logger.info(f"System status check done: {len(SERVICES)} services + {len(servers)} servers")

View File

@@ -0,0 +1,110 @@
"""
Schedule 1 — 租戶檢查(每 3 分鐘)
檢查每個 active 租戶的: Traefik路由 / SSO Realm / Mailbox Domain / NC容器 / OO容器 / Quota
"""
import logging
from datetime import datetime
from sqlalchemy.orm import Session
from app.models.tenant import Tenant
from app.models.result import TenantScheduleResult
logger = logging.getLogger(__name__)
def run_tenant_check(schedule_log_id: int, db: Session):
from app.services.keycloak_client import KeycloakClient
from app.services.mail_client import MailClient
from app.services.docker_client import DockerClient
from app.services.nextcloud_client import NextcloudClient
tenants = db.query(Tenant).filter(Tenant.is_active == True).all()
kc = KeycloakClient()
mail = MailClient()
docker = DockerClient()
for tenant in tenants:
realm = tenant.keycloak_realm or tenant.code
result = TenantScheduleResult(
schedule_log_id=schedule_log_id,
tenant_id=tenant.id,
recorded_at=datetime.utcnow(),
)
fail_reasons = []
# [1] Traefik
try:
result.traefik_status = docker.check_traefik_route(tenant.domain)
result.traefik_done_at = datetime.utcnow()
except Exception as e:
result.traefik_status = False
result.traefik_done_at = datetime.utcnow()
fail_reasons.append(f"traefik: {e}")
# [2] SSO
try:
exists = kc.realm_exists(realm)
if not exists:
kc.create_realm(realm, tenant.name)
result.sso_result = True
result.sso_done_at = datetime.utcnow()
except Exception as e:
result.sso_result = False
result.sso_done_at = datetime.utcnow()
fail_reasons.append(f"sso: {e}")
# [3] Mailbox Domain (with DNS check for active tenants)
try:
if tenant.status == "active":
dns_ok = mail.check_mx_dns(tenant.domain)
if not dns_ok:
result.mailbox_result = False
result.mailbox_done_at = datetime.utcnow()
fail_reasons.append("mailbox: MX record not configured")
db.add(result)
db.commit()
continue
domain_exists = mail.domain_exists(tenant.domain)
if not domain_exists:
mail.create_domain(tenant.domain)
result.mailbox_result = True
result.mailbox_done_at = datetime.utcnow()
except Exception as e:
result.mailbox_result = False
result.mailbox_done_at = datetime.utcnow()
fail_reasons.append(f"mailbox: {e}")
# [4] Nextcloud container
try:
nc_name = f"nc-{realm}"
result.nc_result = docker.ensure_container_running(nc_name, tenant.code, realm)
result.nc_done_at = datetime.utcnow()
except Exception as e:
result.nc_result = False
result.nc_done_at = datetime.utcnow()
fail_reasons.append(f"nc: {e}")
# [5] OnlyOffice container
try:
oo_name = f"oo-{realm}"
result.office_result = docker.ensure_container_running(oo_name, tenant.code, realm)
result.office_done_at = datetime.utcnow()
except Exception as e:
result.office_result = False
result.office_done_at = datetime.utcnow()
fail_reasons.append(f"office: {e}")
# [6] Quota
try:
nc = NextcloudClient(tenant.domain)
result.quota_usage = nc.get_total_quota_used_gb()
except Exception as e:
logger.warning(f"Quota check failed for {tenant.code}: {e}")
if fail_reasons:
result.fail_reason = "; ".join(fail_reasons)
db.add(result)
db.commit()
logger.info(f"Tenant check done: {len(tenants)} tenants processed")

View File

@@ -0,0 +1,107 @@
"""
Watchdog: APScheduler BackgroundScheduler每 3 分鐘掃描 schedules 表。
防重複執行:原子 UPDATE status='Going',影響 0 筆則跳過。
"""
import logging
from datetime import datetime
from apscheduler.schedulers.background import BackgroundScheduler
from croniter import croniter
from sqlalchemy import update
from sqlalchemy.orm import Session
from app.core.database import SessionLocal
from app.models.schedule import Schedule, ScheduleLog
logger = logging.getLogger(__name__)
_scheduler = BackgroundScheduler(timezone="Asia/Taipei")
def _watchdog_tick():
db: Session = SessionLocal()
try:
due = (
db.query(Schedule)
.filter(
Schedule.status == "Waiting",
Schedule.next_run_at <= datetime.utcnow(),
)
.all()
)
for schedule in due:
# Atomic lock: only one process wins
affected = db.execute(
update(Schedule)
.where(Schedule.id == schedule.id, Schedule.status == "Waiting")
.values(status="Going")
).rowcount
db.commit()
if affected == 0:
# Another process already grabbed it
continue
log = ScheduleLog(
schedule_id=schedule.id,
schedule_name=schedule.name,
started_at=datetime.utcnow(),
status="running",
)
db.add(log)
db.commit()
db.refresh(log)
try:
from app.services.scheduler.runner import dispatch_schedule
dispatch_schedule(schedule.id, log.id, db)
final_status = "ok"
except Exception as e:
logger.exception(f"Schedule {schedule.name} failed: {e}")
final_status = "error"
# Update log
log.ended_at = datetime.utcnow()
log.status = final_status
# Recalculate next_run_at
try:
cron = croniter(schedule.cron_timer, datetime.utcnow())
next_run = cron.get_next(datetime)
except Exception:
next_run = None
# Reset schedule
db.execute(
update(Schedule)
.where(Schedule.id == schedule.id)
.values(
status="Waiting",
last_run_at=datetime.utcnow(),
next_run_at=next_run,
last_status=final_status,
)
)
db.commit()
except Exception as e:
logger.exception(f"Watchdog tick error: {e}")
db.rollback()
finally:
db.close()
def start_watchdog():
_scheduler.add_job(
_watchdog_tick,
trigger="interval",
minutes=3,
id="watchdog",
replace_existing=True,
)
_scheduler.start()
logger.info("Watchdog scheduler started")
def stop_watchdog():
_scheduler.shutdown(wait=False)
logger.info("Watchdog scheduler stopped")

View File

@@ -0,0 +1,49 @@
"""Initial data seed: schedules + servers"""
from datetime import datetime
from croniter import croniter
from sqlalchemy.orm import Session
from app.models.schedule import Schedule
from app.models.server import Server
INITIAL_SCHEDULES = [
{"id": 1, "name": "租戶檢查", "cron_timer": "0 */3 * * * *"},
{"id": 2, "name": "帳號檢查", "cron_timer": "0 */3 * * * *"},
{"id": 3, "name": "系統狀態", "cron_timer": "0 0 8 * * *"},
]
INITIAL_SERVERS = [
{"id": 1, "name": "home", "ip_address": "10.1.0.254", "sort_order": 1,
"description": "核心服務主機 (Ubuntu 24.04 / Dell Inspiron 3910)"},
{"id": 2, "name": "小的NAS", "ip_address": "10.1.0.20", "sort_order": 2,
"description": "資料庫主機 (Synology DS716+II / DSM 6.2.4)"},
{"id": 3, "name": "大的NAS", "ip_address": "10.1.0.30", "sort_order": 3,
"description": "儲存主機 (Synology DS920+ / DSM 7.3.2)"},
{"id": 4, "name": "Porsche_KLI", "ip_address": "10.1.0.245", "sort_order": 4,
"description": "開發環境 (ASUS MINIPC PN62 / Windows 11)"},
]
def _calc_next_run(cron_timer: str) -> datetime:
# croniter: six-field cron (sec min hour day month weekday)
cron = croniter(cron_timer, datetime.utcnow())
return cron.get_next(datetime)
def seed_initial_data(db: Session) -> None:
"""Insert initial schedules and servers if not present."""
for s in INITIAL_SCHEDULES:
if not db.get(Schedule, s["id"]):
db.add(Schedule(
id=s["id"],
name=s["name"],
cron_timer=s["cron_timer"],
status="Waiting",
next_run_at=_calc_next_run(s["cron_timer"]),
))
for sv in INITIAL_SERVERS:
if not db.get(Server, sv["id"]):
db.add(Server(**sv))
db.commit()

View File

@@ -0,0 +1,105 @@
"""
SystemChecker — 功能驗證(不只 handshake
traefik: routers > 0 / keycloak: token 取得 / mail: EHLO / db: SELECT 1 / server: ping
"""
import logging
import smtplib
from typing import Optional
import httpx
import psycopg2
from app.core.config import settings
logger = logging.getLogger(__name__)
class SystemChecker:
def check_traefik(self, host: str = "localhost", port: int = 8080) -> bool:
"""Traefik API: overview + routers count > 0"""
try:
resp = httpx.get(f"http://{host}:{port}/api/overview", timeout=5.0)
if resp.status_code != 200:
return False
data = resp.json()
total_routers = data.get("http", {}).get("routers", {}).get("total", 0)
return total_routers > 0
except Exception as e:
logger.warning(f"Traefik check failed: {e}")
return False
def check_keycloak(self, base_url: str, realm: str = "master") -> bool:
"""
Step 1: GET /realms/master → 200
Step 2: POST /realms/master/protocol/openid-connect/token with client_credentials
"""
try:
resp = httpx.get(f"{base_url}/realms/{realm}", timeout=8.0)
if resp.status_code != 200:
return False
# Functional check: get admin token
token_resp = httpx.post(
f"{base_url}/realms/{settings.KEYCLOAK_ADMIN_REALM}/protocol/openid-connect/token",
data={
"grant_type": "client_credentials",
"client_id": settings.KEYCLOAK_ADMIN_CLIENT_ID,
"client_secret": settings.KEYCLOAK_ADMIN_CLIENT_SECRET,
},
timeout=8.0,
)
return token_resp.status_code == 200 and "access_token" in token_resp.json()
except Exception as e:
logger.warning(f"Keycloak check failed ({base_url}): {e}")
return False
def check_smtp(self, host: str, port: int = 587) -> bool:
"""SMTP connect + EHLO (functional protocol check)"""
try:
with smtplib.SMTP(host, port, timeout=8) as smtp:
smtp.ehlo()
return True
except Exception as e:
logger.warning(f"SMTP check failed ({host}:{port}): {e}")
return False
def check_postgres(self, host: str, port: int = 5432) -> bool:
"""psycopg2 connect + SELECT 1"""
try:
conn = psycopg2.connect(
host=host, port=port, dbname="postgres",
user="admin", password="DC1qaz2wsx",
connect_timeout=8,
)
cur = conn.cursor()
cur.execute("SELECT 1")
result = cur.fetchone()
conn.close()
return result == (1,)
except Exception as e:
logger.warning(f"PostgreSQL check failed ({host}:{port}): {e}")
return False
def ping_server(self, ip_address: str) -> Optional[float]:
"""
ICMP ping, returns response time in ms or None if unreachable.
Falls back to TCP port 22 if ping requires root privileges.
"""
try:
import ping3
result = ping3.ping(ip_address, timeout=3)
if result is not None and result is not False:
return round(result * 1000, 2) # convert to ms
except PermissionError:
# Fallback: TCP connect to port 22
import socket
import time
try:
start = time.time()
sock = socket.create_connection((ip_address, 22), timeout=3)
sock.close()
return round((time.time() - start) * 1000, 2)
except Exception:
pass
except Exception as e:
logger.warning(f"Ping failed for {ip_address}: {e}")
return None