feat(backend): Phase 1-4 全新開發完成,37/37 TDD 通過
[Phase 0 Reset]
- 清除舊版 app/、alembic/versions/、雜亂測試腳本
- 新 requirements.txt (移除 caldav/redis/keycloak-lib,加入 apscheduler/croniter/docker/paramiko/ping3/dnspython)
[Phase 1 資料庫]
- 9 張資料表 SQLAlchemy Models:tenants / accounts / schedules / schedule_logs /
tenant_schedule_results / account_schedule_results / servers / server_status_logs / system_status_logs
- Alembic migration 001_create_all_tables (已套用到 10.1.0.20:5433/virtual_mis)
- seed.py:schedules 初始 3 筆 / servers 初始 4 筆
[Phase 2 CRUD API]
- GET/POST/PUT/DELETE: /api/v1/tenants / accounts / servers / schedules
- /api/v1/system-status
- 帳號編碼自動產生 (prefix + seq_no 4碼左補0)
- 燈號 (lights) 從最新排程結果取得
[Phase 3 Watchdog]
- APScheduler interval 3分鐘,原子 UPDATE status=Going 防重複執行
- 手動觸發 API: POST /api/v1/schedules/{id}/run
[Phase 4 Service Clients]
- KeycloakClient:vmis-admin realm,REST API (不用 python-keycloak)
- MailClient:Docker Mailserver @ 10.1.0.254:8080,含 MX DNS 驗證
- DockerClient:docker-py 本機 + paramiko SSH 遠端 compose
- NextcloudClient:OCS API user/quota
- SystemChecker:功能驗證 (traefik routers>0 / keycloak token / SMTP EHLO / DB SELECT 1 / ping)
[TDD]
- 37 tests / 37 passed (2.11s)
- SQLite in-memory + StaticPool,無需外部 DB
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
0
backend/app/services/__init__.py
Normal file
0
backend/app/services/__init__.py
Normal file
86
backend/app/services/docker_client.py
Normal file
86
backend/app/services/docker_client.py
Normal file
@@ -0,0 +1,86 @@
|
||||
"""
|
||||
DockerClient — docker-py (本機 Docker socket) + paramiko SSH (遠端 docker compose)
|
||||
管理租戶的 NC / OO 容器。
|
||||
"""
|
||||
import logging
|
||||
from typing import Optional
|
||||
import httpx
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DockerClient:
|
||||
def __init__(self):
|
||||
self._docker = None
|
||||
|
||||
def _get_docker(self):
|
||||
if self._docker is None:
|
||||
import docker
|
||||
self._docker = docker.from_env()
|
||||
return self._docker
|
||||
|
||||
def check_traefik_route(self, domain: str) -> bool:
|
||||
"""
|
||||
Traefik API: GET http://localhost:8080/api/http/routers
|
||||
驗證 routers 中包含 domain,且 routers 數量 > 0
|
||||
"""
|
||||
try:
|
||||
resp = httpx.get("http://localhost:8080/api/overview", timeout=5.0)
|
||||
if resp.status_code != 200:
|
||||
return False
|
||||
data = resp.json()
|
||||
# Verify actual routes exist (functional check)
|
||||
http_count = data.get("http", {}).get("routers", {}).get("total", 0)
|
||||
if http_count == 0:
|
||||
return False
|
||||
# Check domain-specific router
|
||||
routers_resp = httpx.get("http://localhost:8080/api/http/routers", timeout=5.0)
|
||||
if routers_resp.status_code != 200:
|
||||
return False
|
||||
routers = routers_resp.json()
|
||||
return any(domain in str(r.get("rule", "")) for r in routers)
|
||||
except Exception as e:
|
||||
logger.warning(f"Traefik check failed for {domain}: {e}")
|
||||
return False
|
||||
|
||||
def ensure_container_running(self, container_name: str, tenant_code: str, realm: str) -> bool:
|
||||
"""Check container status; start if exited; deploy via SSH if not found."""
|
||||
try:
|
||||
docker_client = self._get_docker()
|
||||
container = docker_client.containers.get(container_name)
|
||||
if container.status == "running":
|
||||
return True
|
||||
elif container.status == "exited":
|
||||
container.start()
|
||||
container.reload()
|
||||
return container.status == "running"
|
||||
except Exception as e:
|
||||
if "Not Found" in str(e) or "404" in str(e):
|
||||
return self._ssh_compose_up(tenant_code, realm)
|
||||
logger.error(f"Docker check failed for {container_name}: {e}")
|
||||
return False
|
||||
return False
|
||||
|
||||
def _ssh_compose_up(self, tenant_code: str, realm: str) -> bool:
|
||||
"""SSH into 10.1.0.254 and run docker compose up -d"""
|
||||
try:
|
||||
import paramiko
|
||||
client = paramiko.SSHClient()
|
||||
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
client.connect(
|
||||
settings.DOCKER_SSH_HOST,
|
||||
username=settings.DOCKER_SSH_USER,
|
||||
timeout=15,
|
||||
)
|
||||
deploy_dir = f"{settings.TENANT_DEPLOY_BASE}/{tenant_code}"
|
||||
stdin, stdout, stderr = client.exec_command(
|
||||
f"cd {deploy_dir} && docker compose up -d 2>&1"
|
||||
)
|
||||
exit_status = stdout.channel.recv_exit_status()
|
||||
client.close()
|
||||
return exit_status == 0
|
||||
except Exception as e:
|
||||
logger.error(f"SSH compose up failed for {tenant_code}: {e}")
|
||||
return False
|
||||
95
backend/app/services/keycloak_client.py
Normal file
95
backend/app/services/keycloak_client.py
Normal file
@@ -0,0 +1,95 @@
|
||||
"""
|
||||
KeycloakClient — 直接呼叫 Keycloak REST API,不使用 python-keycloak 套件。
|
||||
管理租戶 realm 及帳號的建立/查詢。
|
||||
"""
|
||||
import logging
|
||||
from typing import Optional
|
||||
import httpx
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
TIMEOUT = 10.0
|
||||
|
||||
|
||||
class KeycloakClient:
|
||||
def __init__(self):
|
||||
self._base = settings.KEYCLOAK_URL.rstrip("/")
|
||||
self._admin_token: Optional[str] = None
|
||||
|
||||
def _get_admin_token(self) -> str:
|
||||
"""取得 vmis-admin realm 的 admin access token"""
|
||||
url = f"{self._base}/realms/{settings.KEYCLOAK_ADMIN_REALM}/protocol/openid-connect/token"
|
||||
resp = httpx.post(
|
||||
url,
|
||||
data={
|
||||
"grant_type": "client_credentials",
|
||||
"client_id": settings.KEYCLOAK_ADMIN_CLIENT_ID,
|
||||
"client_secret": settings.KEYCLOAK_ADMIN_CLIENT_SECRET,
|
||||
},
|
||||
timeout=TIMEOUT,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json()["access_token"]
|
||||
|
||||
def _headers(self) -> dict:
|
||||
if not self._admin_token:
|
||||
self._admin_token = self._get_admin_token()
|
||||
return {"Authorization": f"Bearer {self._admin_token}"}
|
||||
|
||||
def _admin_url(self, path: str) -> str:
|
||||
return f"{self._base}/admin/realms/{path}"
|
||||
|
||||
def realm_exists(self, realm: str) -> bool:
|
||||
try:
|
||||
resp = httpx.get(self._admin_url(realm), headers=self._headers(), timeout=TIMEOUT)
|
||||
return resp.status_code == 200
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def create_realm(self, realm: str, display_name: str) -> bool:
|
||||
payload = {
|
||||
"realm": realm,
|
||||
"displayName": display_name,
|
||||
"enabled": True,
|
||||
"loginTheme": "keycloak",
|
||||
}
|
||||
resp = httpx.post(
|
||||
f"{self._base}/admin/realms",
|
||||
json=payload,
|
||||
headers=self._headers(),
|
||||
timeout=TIMEOUT,
|
||||
)
|
||||
return resp.status_code in (201, 204)
|
||||
|
||||
def get_user_uuid(self, realm: str, username: str) -> Optional[str]:
|
||||
resp = httpx.get(
|
||||
self._admin_url(f"{realm}/users"),
|
||||
params={"username": username, "exact": "true"},
|
||||
headers=self._headers(),
|
||||
timeout=TIMEOUT,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
users = resp.json()
|
||||
return users[0]["id"] if users else None
|
||||
|
||||
def create_user(self, realm: str, username: str, email: str, password: Optional[str]) -> Optional[str]:
|
||||
payload = {
|
||||
"username": username,
|
||||
"email": email,
|
||||
"enabled": True,
|
||||
"emailVerified": True,
|
||||
}
|
||||
if password:
|
||||
payload["credentials"] = [{"type": "password", "value": password, "temporary": True}]
|
||||
resp = httpx.post(
|
||||
self._admin_url(f"{realm}/users"),
|
||||
json=payload,
|
||||
headers=self._headers(),
|
||||
timeout=TIMEOUT,
|
||||
)
|
||||
if resp.status_code == 201:
|
||||
location = resp.headers.get("Location", "")
|
||||
return location.rstrip("/").split("/")[-1]
|
||||
return None
|
||||
81
backend/app/services/mail_client.py
Normal file
81
backend/app/services/mail_client.py
Normal file
@@ -0,0 +1,81 @@
|
||||
"""
|
||||
MailClient — 呼叫 Docker Mailserver Admin API (http://10.1.0.254:8080)
|
||||
管理 mail domain 和 mailbox 的建立/查詢。
|
||||
建立 domain 前必須驗證 MX DNS 設定(對 active 租戶)。
|
||||
"""
|
||||
import logging
|
||||
from typing import Optional
|
||||
import httpx
|
||||
import dns.resolver
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
TIMEOUT = 10.0
|
||||
|
||||
|
||||
class MailClient:
|
||||
def __init__(self):
|
||||
self._base = settings.MAIL_ADMIN_API_URL.rstrip("/")
|
||||
self._headers = {"X-API-Key": settings.MAIL_ADMIN_API_KEY}
|
||||
|
||||
def check_mx_dns(self, domain: str) -> bool:
|
||||
"""驗證 domain 的 MX record 是否指向正確的 mail server"""
|
||||
try:
|
||||
answers = dns.resolver.resolve(domain, "MX")
|
||||
for rdata in answers:
|
||||
if settings.MAIL_MX_HOST in str(rdata.exchange).rstrip("."):
|
||||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.warning(f"MX DNS check failed for {domain}: {e}")
|
||||
return False
|
||||
|
||||
def domain_exists(self, domain: str) -> bool:
|
||||
try:
|
||||
resp = httpx.get(
|
||||
f"{self._base}/api/v1/domains/{domain}",
|
||||
headers=self._headers,
|
||||
timeout=TIMEOUT,
|
||||
)
|
||||
return resp.status_code == 200
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def create_domain(self, domain: str) -> bool:
|
||||
try:
|
||||
resp = httpx.post(
|
||||
f"{self._base}/api/v1/domains",
|
||||
json={"domain": domain},
|
||||
headers=self._headers,
|
||||
timeout=TIMEOUT,
|
||||
)
|
||||
return resp.status_code in (200, 201, 204)
|
||||
except Exception as e:
|
||||
logger.error(f"create_domain({domain}) failed: {e}")
|
||||
return False
|
||||
|
||||
def mailbox_exists(self, email: str) -> bool:
|
||||
try:
|
||||
resp = httpx.get(
|
||||
f"{self._base}/api/v1/mailboxes/{email}",
|
||||
headers=self._headers,
|
||||
timeout=TIMEOUT,
|
||||
)
|
||||
return resp.status_code == 200
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def create_mailbox(self, email: str, password: Optional[str], quota_gb: int = 20) -> bool:
|
||||
try:
|
||||
resp = httpx.post(
|
||||
f"{self._base}/api/v1/mailboxes",
|
||||
json={"email": email, "password": password or "", "quota": quota_gb},
|
||||
headers=self._headers,
|
||||
timeout=TIMEOUT,
|
||||
)
|
||||
return resp.status_code in (200, 201, 204)
|
||||
except Exception as e:
|
||||
logger.error(f"create_mailbox({email}) failed: {e}")
|
||||
return False
|
||||
85
backend/app/services/nextcloud_client.py
Normal file
85
backend/app/services/nextcloud_client.py
Normal file
@@ -0,0 +1,85 @@
|
||||
"""
|
||||
NextcloudClient — Nextcloud OCS API
|
||||
管理 NC 使用者的查詢/建立與 quota 統計。
|
||||
"""
|
||||
import logging
|
||||
from typing import Optional
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
OCS_HEADERS = {"OCS-APIRequest": "true"}
|
||||
TIMEOUT = 15.0
|
||||
|
||||
|
||||
class NextcloudClient:
|
||||
def __init__(self, domain: str, admin_user: str = "admin", admin_password: str = ""):
|
||||
self._base = f"https://{domain}"
|
||||
self._auth = (admin_user, admin_password)
|
||||
|
||||
def user_exists(self, username: str) -> bool:
|
||||
try:
|
||||
resp = httpx.get(
|
||||
f"{self._base}/ocs/v1.php/cloud/users/{username}",
|
||||
auth=self._auth,
|
||||
headers=OCS_HEADERS,
|
||||
timeout=TIMEOUT,
|
||||
)
|
||||
return resp.status_code == 200
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def create_user(self, username: str, password: Optional[str], quota_gb: int = 20) -> bool:
|
||||
try:
|
||||
resp = httpx.post(
|
||||
f"{self._base}/ocs/v1.php/cloud/users",
|
||||
auth=self._auth,
|
||||
headers=OCS_HEADERS,
|
||||
data={
|
||||
"userid": username,
|
||||
"password": password or "",
|
||||
"quota": f"{quota_gb}GB",
|
||||
},
|
||||
timeout=TIMEOUT,
|
||||
)
|
||||
return resp.status_code == 200
|
||||
except Exception as e:
|
||||
logger.error(f"NC create_user({username}) failed: {e}")
|
||||
return False
|
||||
|
||||
def get_user_quota_used_gb(self, username: str) -> Optional[float]:
|
||||
try:
|
||||
resp = httpx.get(
|
||||
f"{self._base}/ocs/v2.php/cloud/users/{username}",
|
||||
auth=self._auth,
|
||||
headers=OCS_HEADERS,
|
||||
timeout=TIMEOUT,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
used_bytes = resp.json().get("ocs", {}).get("data", {}).get("quota", {}).get("used", 0)
|
||||
return round(used_bytes / 1073741824, 4)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def get_total_quota_used_gb(self) -> Optional[float]:
|
||||
"""Sum all users' quota usage"""
|
||||
try:
|
||||
resp = httpx.get(
|
||||
f"{self._base}/ocs/v2.php/cloud/users",
|
||||
auth=self._auth,
|
||||
headers=OCS_HEADERS,
|
||||
params={"limit": 500},
|
||||
timeout=TIMEOUT,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
users = resp.json().get("ocs", {}).get("data", {}).get("users", [])
|
||||
total = 0.0
|
||||
for uid in users:
|
||||
used = self.get_user_quota_used_gb(uid)
|
||||
if used:
|
||||
total += used
|
||||
return round(total, 4)
|
||||
except Exception:
|
||||
return None
|
||||
0
backend/app/services/scheduler/__init__.py
Normal file
0
backend/app/services/scheduler/__init__.py
Normal file
59
backend/app/services/scheduler/runner.py
Normal file
59
backend/app/services/scheduler/runner.py
Normal file
@@ -0,0 +1,59 @@
|
||||
"""
|
||||
Schedule dispatcher: routes schedule_id to the correct run function.
|
||||
Also used by manual trigger API.
|
||||
"""
|
||||
import logging
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.database import SessionLocal
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def dispatch_schedule(schedule_id: int, log_id: int = None, db: Session = None):
|
||||
"""
|
||||
Dispatch to correct schedule function.
|
||||
When called from watchdog, db and log_id are provided.
|
||||
When called from manual API, creates its own session and log.
|
||||
"""
|
||||
own_db = db is None
|
||||
if own_db:
|
||||
db = SessionLocal()
|
||||
|
||||
if log_id is None:
|
||||
from datetime import datetime
|
||||
from app.models.schedule import ScheduleLog, Schedule
|
||||
schedule = db.get(Schedule, schedule_id)
|
||||
if not schedule:
|
||||
if own_db:
|
||||
db.close()
|
||||
return
|
||||
log = ScheduleLog(
|
||||
schedule_id=schedule_id,
|
||||
schedule_name=schedule.name,
|
||||
started_at=datetime.utcnow(),
|
||||
status="running",
|
||||
)
|
||||
db.add(log)
|
||||
db.commit()
|
||||
db.refresh(log)
|
||||
log_id = log.id
|
||||
|
||||
try:
|
||||
if schedule_id == 1:
|
||||
from app.services.scheduler.schedule_tenant import run_tenant_check
|
||||
run_tenant_check(log_id, db)
|
||||
elif schedule_id == 2:
|
||||
from app.services.scheduler.schedule_account import run_account_check
|
||||
run_account_check(log_id, db)
|
||||
elif schedule_id == 3:
|
||||
from app.services.scheduler.schedule_system import run_system_status
|
||||
run_system_status(log_id, db)
|
||||
else:
|
||||
logger.warning(f"Unknown schedule_id: {schedule_id}")
|
||||
except Exception as e:
|
||||
logger.exception(f"dispatch_schedule({schedule_id}) error: {e}")
|
||||
raise
|
||||
finally:
|
||||
if own_db:
|
||||
db.close()
|
||||
103
backend/app/services/scheduler/schedule_account.py
Normal file
103
backend/app/services/scheduler/schedule_account.py
Normal file
@@ -0,0 +1,103 @@
|
||||
"""
|
||||
Schedule 2 — 帳號檢查(每 3 分鐘)
|
||||
檢查每個 active 帳號的: SSO使用者 / Mailbox / NC使用者 / Quota
|
||||
"""
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.models.account import Account
|
||||
from app.models.result import AccountScheduleResult
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def run_account_check(schedule_log_id: int, db: Session):
|
||||
from app.services.keycloak_client import KeycloakClient
|
||||
from app.services.mail_client import MailClient
|
||||
from app.services.nextcloud_client import NextcloudClient
|
||||
|
||||
accounts = (
|
||||
db.query(Account)
|
||||
.filter(Account.is_active == True)
|
||||
.all()
|
||||
)
|
||||
kc = KeycloakClient()
|
||||
mail = MailClient()
|
||||
|
||||
for account in accounts:
|
||||
tenant = account.tenant
|
||||
realm = tenant.keycloak_realm or tenant.code
|
||||
result = AccountScheduleResult(
|
||||
schedule_log_id=schedule_log_id,
|
||||
account_id=account.id,
|
||||
sso_account=account.sso_account,
|
||||
recorded_at=datetime.utcnow(),
|
||||
)
|
||||
fail_reasons = []
|
||||
|
||||
# [1] SSO user check
|
||||
try:
|
||||
sso_uuid = kc.get_user_uuid(realm, account.sso_account)
|
||||
if sso_uuid:
|
||||
result.sso_result = True
|
||||
result.sso_uuid = sso_uuid
|
||||
if not account.sso_uuid:
|
||||
account.sso_uuid = sso_uuid
|
||||
else:
|
||||
sso_uuid = kc.create_user(realm, account.sso_account, account.email, account.default_password)
|
||||
result.sso_result = sso_uuid is not None
|
||||
result.sso_uuid = sso_uuid
|
||||
if sso_uuid and not account.sso_uuid:
|
||||
account.sso_uuid = sso_uuid
|
||||
result.sso_done_at = datetime.utcnow()
|
||||
except Exception as e:
|
||||
result.sso_result = False
|
||||
result.sso_done_at = datetime.utcnow()
|
||||
fail_reasons.append(f"sso: {e}")
|
||||
|
||||
# [2] Mailbox check (skip if mail domain not ready)
|
||||
try:
|
||||
email = account.email or f"{account.sso_account}@{tenant.domain}"
|
||||
mb_exists = mail.mailbox_exists(email)
|
||||
if mb_exists:
|
||||
result.mailbox_result = True
|
||||
else:
|
||||
created = mail.create_mailbox(email, account.default_password, account.quota_limit)
|
||||
result.mailbox_result = created
|
||||
result.mailbox_done_at = datetime.utcnow()
|
||||
except Exception as e:
|
||||
result.mailbox_result = False
|
||||
result.mailbox_done_at = datetime.utcnow()
|
||||
fail_reasons.append(f"mailbox: {e}")
|
||||
|
||||
# [3] NC user check
|
||||
try:
|
||||
nc = NextcloudClient(tenant.domain)
|
||||
nc_exists = nc.user_exists(account.sso_account)
|
||||
if nc_exists:
|
||||
result.nc_result = True
|
||||
else:
|
||||
created = nc.create_user(account.sso_account, account.default_password, account.quota_limit)
|
||||
result.nc_result = created
|
||||
result.nc_done_at = datetime.utcnow()
|
||||
except Exception as e:
|
||||
result.nc_result = False
|
||||
result.nc_done_at = datetime.utcnow()
|
||||
fail_reasons.append(f"nc: {e}")
|
||||
|
||||
# [4] Quota
|
||||
try:
|
||||
nc = NextcloudClient(tenant.domain)
|
||||
result.quota_usage = nc.get_user_quota_used_gb(account.sso_account)
|
||||
except Exception as e:
|
||||
logger.warning(f"Quota check failed for {account.account_code}: {e}")
|
||||
|
||||
if fail_reasons:
|
||||
result.fail_reason = "; ".join(fail_reasons)
|
||||
|
||||
db.add(result)
|
||||
|
||||
db.commit()
|
||||
db.flush()
|
||||
logger.info(f"Account check done: {len(accounts)} accounts processed")
|
||||
94
backend/app/services/scheduler/schedule_system.py
Normal file
94
backend/app/services/scheduler/schedule_system.py
Normal file
@@ -0,0 +1,94 @@
|
||||
"""
|
||||
Schedule 3 — 系統狀態(每日 08:00)
|
||||
Part A: 基礎設施服務功能驗證(traefik/keycloak/mail/db)
|
||||
Part B: 伺服器 ping 檢查
|
||||
"""
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.models.server import SystemStatusLog, ServerStatusLog, Server
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Fixed 8 services: environment × service_name
|
||||
SERVICES = [
|
||||
{"environment": "test", "service_name": "traefik",
|
||||
"service_desc": "測試環境反向代理", "host": "localhost", "port": 8080},
|
||||
{"environment": "test", "service_name": "keycloak",
|
||||
"service_desc": "測試環境 SSO",
|
||||
"url": "https://auth.lab.taipei", "realm": "master"},
|
||||
{"environment": "test", "service_name": "mail",
|
||||
"service_desc": "測試環境 Mail Server", "host": "localhost", "port": 587},
|
||||
{"environment": "test", "service_name": "db",
|
||||
"service_desc": "10.1.0.20:5433 PostgreSQL",
|
||||
"db_host": "10.1.0.20", "db_port": 5433},
|
||||
{"environment": "prod", "service_name": "traefik",
|
||||
"service_desc": "正式環境反向代理", "host": "localhost", "port": 8080},
|
||||
{"environment": "prod", "service_name": "keycloak",
|
||||
"service_desc": "正式環境 SSO",
|
||||
"url": "https://auth.ease.taipei", "realm": "master"},
|
||||
{"environment": "prod", "service_name": "mail",
|
||||
"service_desc": "正式環境 Mail Server", "host": "10.1.0.254", "port": 587},
|
||||
{"environment": "prod", "service_name": "db",
|
||||
"service_desc": "10.1.0.254:5432 PostgreSQL",
|
||||
"db_host": "10.1.0.254", "db_port": 5432},
|
||||
]
|
||||
|
||||
|
||||
def run_system_status(schedule_log_id: int, db: Session):
|
||||
from app.services.system_checker import SystemChecker
|
||||
checker = SystemChecker()
|
||||
|
||||
# Part A: Infrastructure services
|
||||
for svc in SERVICES:
|
||||
result = False
|
||||
fail_reason = None
|
||||
try:
|
||||
if svc["service_name"] == "traefik":
|
||||
result = checker.check_traefik(svc["host"], svc["port"])
|
||||
elif svc["service_name"] == "keycloak":
|
||||
result = checker.check_keycloak(svc["url"], svc["realm"])
|
||||
elif svc["service_name"] == "mail":
|
||||
result = checker.check_smtp(svc["host"], svc["port"])
|
||||
elif svc["service_name"] == "db":
|
||||
result = checker.check_postgres(svc["db_host"], svc["db_port"])
|
||||
except Exception as e:
|
||||
result = False
|
||||
fail_reason = str(e)
|
||||
|
||||
db.add(SystemStatusLog(
|
||||
schedule_log_id=schedule_log_id,
|
||||
environment=svc["environment"],
|
||||
service_name=svc["service_name"],
|
||||
service_desc=svc["service_desc"],
|
||||
result=result,
|
||||
fail_reason=fail_reason,
|
||||
recorded_at=datetime.utcnow(),
|
||||
))
|
||||
|
||||
# Part B: Server ping
|
||||
servers = db.query(Server).filter(Server.is_active == True).order_by(Server.sort_order).all()
|
||||
for server in servers:
|
||||
response_time = None
|
||||
fail_reason = None
|
||||
try:
|
||||
response_time = checker.ping_server(server.ip_address)
|
||||
result = response_time is not None
|
||||
if not result:
|
||||
fail_reason = "No response"
|
||||
except Exception as e:
|
||||
result = False
|
||||
fail_reason = str(e)
|
||||
|
||||
db.add(ServerStatusLog(
|
||||
schedule_log_id=schedule_log_id,
|
||||
server_id=server.id,
|
||||
result=result,
|
||||
response_time=response_time,
|
||||
fail_reason=fail_reason,
|
||||
recorded_at=datetime.utcnow(),
|
||||
))
|
||||
|
||||
db.commit()
|
||||
logger.info(f"System status check done: {len(SERVICES)} services + {len(servers)} servers")
|
||||
110
backend/app/services/scheduler/schedule_tenant.py
Normal file
110
backend/app/services/scheduler/schedule_tenant.py
Normal file
@@ -0,0 +1,110 @@
|
||||
"""
|
||||
Schedule 1 — 租戶檢查(每 3 分鐘)
|
||||
檢查每個 active 租戶的: Traefik路由 / SSO Realm / Mailbox Domain / NC容器 / OO容器 / Quota
|
||||
"""
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.models.tenant import Tenant
|
||||
from app.models.result import TenantScheduleResult
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def run_tenant_check(schedule_log_id: int, db: Session):
|
||||
from app.services.keycloak_client import KeycloakClient
|
||||
from app.services.mail_client import MailClient
|
||||
from app.services.docker_client import DockerClient
|
||||
from app.services.nextcloud_client import NextcloudClient
|
||||
|
||||
tenants = db.query(Tenant).filter(Tenant.is_active == True).all()
|
||||
kc = KeycloakClient()
|
||||
mail = MailClient()
|
||||
docker = DockerClient()
|
||||
|
||||
for tenant in tenants:
|
||||
realm = tenant.keycloak_realm or tenant.code
|
||||
result = TenantScheduleResult(
|
||||
schedule_log_id=schedule_log_id,
|
||||
tenant_id=tenant.id,
|
||||
recorded_at=datetime.utcnow(),
|
||||
)
|
||||
fail_reasons = []
|
||||
|
||||
# [1] Traefik
|
||||
try:
|
||||
result.traefik_status = docker.check_traefik_route(tenant.domain)
|
||||
result.traefik_done_at = datetime.utcnow()
|
||||
except Exception as e:
|
||||
result.traefik_status = False
|
||||
result.traefik_done_at = datetime.utcnow()
|
||||
fail_reasons.append(f"traefik: {e}")
|
||||
|
||||
# [2] SSO
|
||||
try:
|
||||
exists = kc.realm_exists(realm)
|
||||
if not exists:
|
||||
kc.create_realm(realm, tenant.name)
|
||||
result.sso_result = True
|
||||
result.sso_done_at = datetime.utcnow()
|
||||
except Exception as e:
|
||||
result.sso_result = False
|
||||
result.sso_done_at = datetime.utcnow()
|
||||
fail_reasons.append(f"sso: {e}")
|
||||
|
||||
# [3] Mailbox Domain (with DNS check for active tenants)
|
||||
try:
|
||||
if tenant.status == "active":
|
||||
dns_ok = mail.check_mx_dns(tenant.domain)
|
||||
if not dns_ok:
|
||||
result.mailbox_result = False
|
||||
result.mailbox_done_at = datetime.utcnow()
|
||||
fail_reasons.append("mailbox: MX record not configured")
|
||||
db.add(result)
|
||||
db.commit()
|
||||
continue
|
||||
domain_exists = mail.domain_exists(tenant.domain)
|
||||
if not domain_exists:
|
||||
mail.create_domain(tenant.domain)
|
||||
result.mailbox_result = True
|
||||
result.mailbox_done_at = datetime.utcnow()
|
||||
except Exception as e:
|
||||
result.mailbox_result = False
|
||||
result.mailbox_done_at = datetime.utcnow()
|
||||
fail_reasons.append(f"mailbox: {e}")
|
||||
|
||||
# [4] Nextcloud container
|
||||
try:
|
||||
nc_name = f"nc-{realm}"
|
||||
result.nc_result = docker.ensure_container_running(nc_name, tenant.code, realm)
|
||||
result.nc_done_at = datetime.utcnow()
|
||||
except Exception as e:
|
||||
result.nc_result = False
|
||||
result.nc_done_at = datetime.utcnow()
|
||||
fail_reasons.append(f"nc: {e}")
|
||||
|
||||
# [5] OnlyOffice container
|
||||
try:
|
||||
oo_name = f"oo-{realm}"
|
||||
result.office_result = docker.ensure_container_running(oo_name, tenant.code, realm)
|
||||
result.office_done_at = datetime.utcnow()
|
||||
except Exception as e:
|
||||
result.office_result = False
|
||||
result.office_done_at = datetime.utcnow()
|
||||
fail_reasons.append(f"office: {e}")
|
||||
|
||||
# [6] Quota
|
||||
try:
|
||||
nc = NextcloudClient(tenant.domain)
|
||||
result.quota_usage = nc.get_total_quota_used_gb()
|
||||
except Exception as e:
|
||||
logger.warning(f"Quota check failed for {tenant.code}: {e}")
|
||||
|
||||
if fail_reasons:
|
||||
result.fail_reason = "; ".join(fail_reasons)
|
||||
|
||||
db.add(result)
|
||||
|
||||
db.commit()
|
||||
logger.info(f"Tenant check done: {len(tenants)} tenants processed")
|
||||
107
backend/app/services/scheduler/watchdog.py
Normal file
107
backend/app/services/scheduler/watchdog.py
Normal file
@@ -0,0 +1,107 @@
|
||||
"""
|
||||
Watchdog: APScheduler BackgroundScheduler,每 3 分鐘掃描 schedules 表。
|
||||
防重複執行:原子 UPDATE status='Going',影響 0 筆則跳過。
|
||||
"""
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from apscheduler.schedulers.background import BackgroundScheduler
|
||||
from croniter import croniter
|
||||
from sqlalchemy import update
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.database import SessionLocal
|
||||
from app.models.schedule import Schedule, ScheduleLog
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_scheduler = BackgroundScheduler(timezone="Asia/Taipei")
|
||||
|
||||
|
||||
def _watchdog_tick():
|
||||
db: Session = SessionLocal()
|
||||
try:
|
||||
due = (
|
||||
db.query(Schedule)
|
||||
.filter(
|
||||
Schedule.status == "Waiting",
|
||||
Schedule.next_run_at <= datetime.utcnow(),
|
||||
)
|
||||
.all()
|
||||
)
|
||||
for schedule in due:
|
||||
# Atomic lock: only one process wins
|
||||
affected = db.execute(
|
||||
update(Schedule)
|
||||
.where(Schedule.id == schedule.id, Schedule.status == "Waiting")
|
||||
.values(status="Going")
|
||||
).rowcount
|
||||
db.commit()
|
||||
|
||||
if affected == 0:
|
||||
# Another process already grabbed it
|
||||
continue
|
||||
|
||||
log = ScheduleLog(
|
||||
schedule_id=schedule.id,
|
||||
schedule_name=schedule.name,
|
||||
started_at=datetime.utcnow(),
|
||||
status="running",
|
||||
)
|
||||
db.add(log)
|
||||
db.commit()
|
||||
db.refresh(log)
|
||||
|
||||
try:
|
||||
from app.services.scheduler.runner import dispatch_schedule
|
||||
dispatch_schedule(schedule.id, log.id, db)
|
||||
final_status = "ok"
|
||||
except Exception as e:
|
||||
logger.exception(f"Schedule {schedule.name} failed: {e}")
|
||||
final_status = "error"
|
||||
|
||||
# Update log
|
||||
log.ended_at = datetime.utcnow()
|
||||
log.status = final_status
|
||||
|
||||
# Recalculate next_run_at
|
||||
try:
|
||||
cron = croniter(schedule.cron_timer, datetime.utcnow())
|
||||
next_run = cron.get_next(datetime)
|
||||
except Exception:
|
||||
next_run = None
|
||||
|
||||
# Reset schedule
|
||||
db.execute(
|
||||
update(Schedule)
|
||||
.where(Schedule.id == schedule.id)
|
||||
.values(
|
||||
status="Waiting",
|
||||
last_run_at=datetime.utcnow(),
|
||||
next_run_at=next_run,
|
||||
last_status=final_status,
|
||||
)
|
||||
)
|
||||
db.commit()
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"Watchdog tick error: {e}")
|
||||
db.rollback()
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def start_watchdog():
|
||||
_scheduler.add_job(
|
||||
_watchdog_tick,
|
||||
trigger="interval",
|
||||
minutes=3,
|
||||
id="watchdog",
|
||||
replace_existing=True,
|
||||
)
|
||||
_scheduler.start()
|
||||
logger.info("Watchdog scheduler started")
|
||||
|
||||
|
||||
def stop_watchdog():
|
||||
_scheduler.shutdown(wait=False)
|
||||
logger.info("Watchdog scheduler stopped")
|
||||
49
backend/app/services/seed.py
Normal file
49
backend/app/services/seed.py
Normal file
@@ -0,0 +1,49 @@
|
||||
"""Initial data seed: schedules + servers"""
|
||||
from datetime import datetime
|
||||
from croniter import croniter
|
||||
from sqlalchemy.orm import Session
|
||||
from app.models.schedule import Schedule
|
||||
from app.models.server import Server
|
||||
|
||||
|
||||
INITIAL_SCHEDULES = [
|
||||
{"id": 1, "name": "租戶檢查", "cron_timer": "0 */3 * * * *"},
|
||||
{"id": 2, "name": "帳號檢查", "cron_timer": "0 */3 * * * *"},
|
||||
{"id": 3, "name": "系統狀態", "cron_timer": "0 0 8 * * *"},
|
||||
]
|
||||
|
||||
INITIAL_SERVERS = [
|
||||
{"id": 1, "name": "home", "ip_address": "10.1.0.254", "sort_order": 1,
|
||||
"description": "核心服務主機 (Ubuntu 24.04 / Dell Inspiron 3910)"},
|
||||
{"id": 2, "name": "小的NAS", "ip_address": "10.1.0.20", "sort_order": 2,
|
||||
"description": "資料庫主機 (Synology DS716+II / DSM 6.2.4)"},
|
||||
{"id": 3, "name": "大的NAS", "ip_address": "10.1.0.30", "sort_order": 3,
|
||||
"description": "儲存主機 (Synology DS920+ / DSM 7.3.2)"},
|
||||
{"id": 4, "name": "Porsche_KLI", "ip_address": "10.1.0.245", "sort_order": 4,
|
||||
"description": "開發環境 (ASUS MINIPC PN62 / Windows 11)"},
|
||||
]
|
||||
|
||||
|
||||
def _calc_next_run(cron_timer: str) -> datetime:
|
||||
# croniter: six-field cron (sec min hour day month weekday)
|
||||
cron = croniter(cron_timer, datetime.utcnow())
|
||||
return cron.get_next(datetime)
|
||||
|
||||
|
||||
def seed_initial_data(db: Session) -> None:
|
||||
"""Insert initial schedules and servers if not present."""
|
||||
for s in INITIAL_SCHEDULES:
|
||||
if not db.get(Schedule, s["id"]):
|
||||
db.add(Schedule(
|
||||
id=s["id"],
|
||||
name=s["name"],
|
||||
cron_timer=s["cron_timer"],
|
||||
status="Waiting",
|
||||
next_run_at=_calc_next_run(s["cron_timer"]),
|
||||
))
|
||||
|
||||
for sv in INITIAL_SERVERS:
|
||||
if not db.get(Server, sv["id"]):
|
||||
db.add(Server(**sv))
|
||||
|
||||
db.commit()
|
||||
105
backend/app/services/system_checker.py
Normal file
105
backend/app/services/system_checker.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""
|
||||
SystemChecker — 功能驗證(不只 handshake)
|
||||
traefik: routers > 0 / keycloak: token 取得 / mail: EHLO / db: SELECT 1 / server: ping
|
||||
"""
|
||||
import logging
|
||||
import smtplib
|
||||
from typing import Optional
|
||||
import httpx
|
||||
import psycopg2
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SystemChecker:
|
||||
|
||||
def check_traefik(self, host: str = "localhost", port: int = 8080) -> bool:
|
||||
"""Traefik API: overview + routers count > 0"""
|
||||
try:
|
||||
resp = httpx.get(f"http://{host}:{port}/api/overview", timeout=5.0)
|
||||
if resp.status_code != 200:
|
||||
return False
|
||||
data = resp.json()
|
||||
total_routers = data.get("http", {}).get("routers", {}).get("total", 0)
|
||||
return total_routers > 0
|
||||
except Exception as e:
|
||||
logger.warning(f"Traefik check failed: {e}")
|
||||
return False
|
||||
|
||||
def check_keycloak(self, base_url: str, realm: str = "master") -> bool:
|
||||
"""
|
||||
Step 1: GET /realms/master → 200
|
||||
Step 2: POST /realms/master/protocol/openid-connect/token with client_credentials
|
||||
"""
|
||||
try:
|
||||
resp = httpx.get(f"{base_url}/realms/{realm}", timeout=8.0)
|
||||
if resp.status_code != 200:
|
||||
return False
|
||||
# Functional check: get admin token
|
||||
token_resp = httpx.post(
|
||||
f"{base_url}/realms/{settings.KEYCLOAK_ADMIN_REALM}/protocol/openid-connect/token",
|
||||
data={
|
||||
"grant_type": "client_credentials",
|
||||
"client_id": settings.KEYCLOAK_ADMIN_CLIENT_ID,
|
||||
"client_secret": settings.KEYCLOAK_ADMIN_CLIENT_SECRET,
|
||||
},
|
||||
timeout=8.0,
|
||||
)
|
||||
return token_resp.status_code == 200 and "access_token" in token_resp.json()
|
||||
except Exception as e:
|
||||
logger.warning(f"Keycloak check failed ({base_url}): {e}")
|
||||
return False
|
||||
|
||||
def check_smtp(self, host: str, port: int = 587) -> bool:
|
||||
"""SMTP connect + EHLO (functional protocol check)"""
|
||||
try:
|
||||
with smtplib.SMTP(host, port, timeout=8) as smtp:
|
||||
smtp.ehlo()
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning(f"SMTP check failed ({host}:{port}): {e}")
|
||||
return False
|
||||
|
||||
def check_postgres(self, host: str, port: int = 5432) -> bool:
|
||||
"""psycopg2 connect + SELECT 1"""
|
||||
try:
|
||||
conn = psycopg2.connect(
|
||||
host=host, port=port, dbname="postgres",
|
||||
user="admin", password="DC1qaz2wsx",
|
||||
connect_timeout=8,
|
||||
)
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT 1")
|
||||
result = cur.fetchone()
|
||||
conn.close()
|
||||
return result == (1,)
|
||||
except Exception as e:
|
||||
logger.warning(f"PostgreSQL check failed ({host}:{port}): {e}")
|
||||
return False
|
||||
|
||||
def ping_server(self, ip_address: str) -> Optional[float]:
|
||||
"""
|
||||
ICMP ping, returns response time in ms or None if unreachable.
|
||||
Falls back to TCP port 22 if ping requires root privileges.
|
||||
"""
|
||||
try:
|
||||
import ping3
|
||||
result = ping3.ping(ip_address, timeout=3)
|
||||
if result is not None and result is not False:
|
||||
return round(result * 1000, 2) # convert to ms
|
||||
except PermissionError:
|
||||
# Fallback: TCP connect to port 22
|
||||
import socket
|
||||
import time
|
||||
try:
|
||||
start = time.time()
|
||||
sock = socket.create_connection((ip_address, 22), timeout=3)
|
||||
sock.close()
|
||||
return round((time.time() - start) * 1000, 2)
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning(f"Ping failed for {ip_address}: {e}")
|
||||
return None
|
||||
Reference in New Issue
Block a user