""" Schedule 1 — 租戶檢查(每 3 分鐘) 3-state: None=未設定(灰), True=正常(綠), False=異常(紅) - None → 自動嘗試建立/部署,記錄 done_at - False → 發送告警 email 給所有管理員 """ import logging import smtplib from email.mime.text import MIMEText from typing import Optional import httpx from sqlalchemy.orm import Session from app.core.config import settings from app.core.utils import now_tw from app.models.tenant import Tenant from app.models.result import TenantScheduleResult logger = logging.getLogger(__name__) PG_PORT_TRIAL = 5433 PG_PORT_ACTIVE = 5432 PG_USER = "admin" PG_PASS = "DC1qaz2wsx" PG_HOST_TRIAL = "10.1.0.20" PG_HOST_ACTIVE = "10.1.0.254" REDIS_HOST_TRIAL = "10.14.0.20" REDIS_HOST_ACTIVE = "10.1.0.254" REDIS_PORT = 6379 REDIS_PASS = "DC1qaz2wsx" KC_HOST_TRIAL = "auth.lab.taipei" KC_HOST_ACTIVE = "auth.ease.taipei" TRAEFIK_DYNAMIC_DIR = "/home/porsche/traefik/dynamic" TRAEFIK_API_URL = "http://10.1.0.254:8080" # ─── Docker Compose 範本產生 ───────────────────────────────────────────────── OO_AI_PLUGIN_GUID = "{9DC93CDB-B576-4F0C-B55E-FCC9C48DD007}" OO_AI_TRANSLATIONS_HOST = "/home/porsche/tenants/shared/oo-plugins/ai-translations" OO_AI_TRANSLATIONS_CONTAINER = ( f"/var/www/onlyoffice/documentserver/sdkjs-plugins/{OO_AI_PLUGIN_GUID}/translations" ) def _generate_tenant_compose(tenant, is_active: bool) -> str: """ 產生租戶 docker-compose.yml 內容。 包含 NC + OO 容器設定,OO 已加入繁中 AI plugin bind mount。 """ code = tenant.code suffix = "" if is_active else "-test" nc = f"nc-{code}{suffix}" oo = f"oo-{code}{suffix}" pg_host = PG_HOST_ACTIVE if is_active else PG_HOST_TRIAL pg_port = PG_PORT_ACTIVE if is_active else PG_PORT_TRIAL pg_db = f"nc_{code}_db" nc_domain = tenant.domain oo_host = f"office-{code}.ease.taipei" if is_active else f"office-{code}.lab.taipei" ai_base = OO_AI_TRANSLATIONS_HOST ai_cont = OO_AI_TRANSLATIONS_CONTAINER return f"""services: {nc}: image: nextcloud:31 container_name: {nc} restart: unless-stopped volumes: - {nc}-data:/var/www/html - {nc}-apps:/var/www/html/custom_apps - {nc}-config:/var/www/html/config environment: POSTGRES_HOST: {pg_host}:{pg_port} POSTGRES_DB: {pg_db} POSTGRES_USER: {PG_USER} POSTGRES_PASSWORD: ${{NC_DB_PASSWORD}} NEXTCLOUD_ADMIN_USER: ${{NC_ADMIN_USER}} NEXTCLOUD_ADMIN_PASSWORD: ${{NC_ADMIN_PASSWORD}} NEXTCLOUD_TRUSTED_DOMAINS: {nc_domain} OVERWRITEPROTOCOL: https TRUSTED_PROXIES: 172.18.0.0/16 TZ: Asia/Taipei networks: - traefik-network labels: - "traefik.enable=false" {oo}: image: onlyoffice/documentserver:latest container_name: {oo} restart: unless-stopped environment: - JWT_SECRET=${{OO_JWT_SECRET}} volumes: - {oo}-data:/var/www/onlyoffice/Data - {oo}-log:/var/log/onlyoffice - {ai_base}/zh-TW.json:{ai_cont}/zh-TW.json:ro - {ai_base}/zh-TW.json.gz:{ai_cont}/zh-TW.json.gz:ro - {ai_base}/langs.json:{ai_cont}/langs.json:ro - {ai_base}/langs.json.gz:{ai_cont}/langs.json.gz:ro networks: - traefik-network labels: - "traefik.enable=true" - "traefik.docker.network=traefik-network" - "traefik.http.routers.{oo}.rule=Host(`{oo_host}`)" - "traefik.http.routers.{oo}.entrypoints=websecure" - "traefik.http.routers.{oo}.tls=true" - "traefik.http.routers.{oo}.tls.certresolver=letsencrypt" - "traefik.http.services.{oo}.loadbalancer.server.port=80" - "traefik.http.middlewares.{oo}-headers.headers.customrequestheaders.X-Forwarded-Proto=https" - "traefik.http.routers.{oo}.middlewares={oo}-headers" networks: traefik-network: external: true volumes: {nc}-data: {nc}-apps: {nc}-config: {oo}-data: {oo}-log: """ def _ensure_tenant_compose(tenant, is_active: bool) -> bool: """ 確保租戶 docker-compose.yml 存在且為最新範本。 若不存在則自動產生並寫入,同時補齊 .env。 """ try: import paramiko code = tenant.code deploy_dir = f"{settings.TENANT_DEPLOY_BASE}/{code}" compose_path = f"{deploy_dir}/docker-compose.yml" env_path = f"{deploy_dir}/.env" client = paramiko.SSHClient() client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) client.connect(settings.DOCKER_SSH_HOST, username=settings.DOCKER_SSH_USER, timeout=15) # 確保目錄存在 _, out, _ = client.exec_command(f"mkdir -p {deploy_dir}") out.channel.recv_exit_status() # 若 docker-compose.yml 不存在才寫入(避免覆蓋手動調整) _, stdout, _ = client.exec_command(f"test -f {compose_path} && echo exists || echo missing") exists = stdout.read().decode().strip() == "exists" if not exists: content = _generate_tenant_compose(tenant, is_active) sftp = client.open_sftp() with sftp.open(compose_path, "w") as f: f.write(content) sftp.close() logger.info(f"docker-compose.yml generated for {code}") # 確保 .env 存在(含必要變數) _, stdout2, _ = client.exec_command(f"test -f {env_path} && echo exists || echo missing") env_exists = stdout2.read().decode().strip() == "exists" if not env_exists: from app.core.config import settings as _cfg env_content = ( f"NC_DB_PASSWORD={PG_PASS}\n" f"NC_ADMIN_USER={_cfg.NC_ADMIN_USER}\n" f"NC_ADMIN_PASSWORD={_cfg.NC_ADMIN_PASSWORD}\n" f"OO_JWT_SECRET={_cfg.OO_JWT_SECRET}\n" ) sftp2 = client.open_sftp() with sftp2.open(env_path, "w") as f: f.write(env_content) sftp2.close() logger.info(f".env generated for {code}") client.close() return True except Exception as e: logger.error(f"_ensure_tenant_compose {tenant.code}: {e}") return False # ─── Traefik file provider helpers ─────────────────────────────────────────── def _generate_tenant_route_yaml(tenant, is_active: bool) -> str: """ 產生租戶 Traefik 路由 YAML 內容。 租戶網域根路徑直接指向 NC(無路徑前綴)。 is_manager=true 的租戶額外加入 /admin 路由(priority 200,比 drive 高)。 """ code = tenant.code domain = tenant.domain nc_url = f"http://nc-{code}:80" if is_active else f"http://nc-{code}-test:80" lines = ["http:"] if tenant.is_manager: lines += [ " middlewares:", " vmis-strip-admin:", " stripPrefix:", ' prefixes: ["/admin"]', "", ] lines += [" routers:"] if tenant.is_manager: lines += [ f" {code}-admin:", f' rule: "Host(`{domain}`) && PathPrefix(`/admin`)"', f" service: {code}-vmis", " entryPoints: [websecure]", " middlewares: [vmis-strip-admin]", " tls:", " certResolver: letsencrypt", " priority: 200", "", f" {code}-api:", f' rule: "Host(`{domain}`) && PathPrefix(`/api`)"', f" service: {code}-vmis", " entryPoints: [websecure]", " tls:", " certResolver: letsencrypt", " priority: 200", "", ] lines += [ f" {code}-drive:", f' rule: "Host(`{domain}`)"', f" service: {code}-drive", " entryPoints: [websecure]", " tls:", " certResolver: letsencrypt", "", f" {code}-http:", f' rule: "Host(`{domain}`)"', " entryPoints: [web]", " middlewares: [redirect-https]", f" service: {code}-drive", "", " services:", f" {code}-drive:", " loadBalancer:", " servers:", f' - url: "{nc_url}"', ] if tenant.is_manager: lines += [ f" {code}-vmis:", " loadBalancer:", " servers:", ' - url: "http://vmis-backend:10281"', ] return "\n".join(lines) + "\n" def _ensure_traefik_routes(tenant, is_active: bool) -> bool: """ 確保租戶 Traefik 路由檔案存在且內容正確。 使用 SFTP 寫入 /home/porsche/traefik/dynamic/{code}.yml,Traefik 熱重載。 回傳 True=路由已生效, False=失敗 """ import time try: import paramiko code = tenant.code file_path = f"{TRAEFIK_DYNAMIC_DIR}/{code}.yml" expected = _generate_tenant_route_yaml(tenant, is_active) client = paramiko.SSHClient() client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) client.connect(settings.DOCKER_SSH_HOST, username=settings.DOCKER_SSH_USER, timeout=15) sftp = client.open_sftp() needs_write = True try: with sftp.open(file_path, "r") as f: existing = f.read().decode() if existing == expected: needs_write = False logger.info(f"Traefik route {code}.yml: already correct") except FileNotFoundError: logger.info(f"Traefik route {code}.yml: not found, creating") if needs_write: with sftp.open(file_path, "w") as f: f.write(expected) logger.info(f"Traefik route {code}.yml: written") sftp.close() client.close() # 驗證 Traefik 已載入路由(最多等待 4 秒) route_name = f"{code}-drive@file" for attempt in range(2): try: resp = httpx.get( f"{TRAEFIK_API_URL}/api/http/routers/{route_name}", timeout=5.0, ) if resp.status_code == 200 and resp.json().get("status") == "enabled": logger.info(f"Traefik route {route_name}: enabled ✓") return True except Exception: pass if attempt == 0: time.sleep(2) logger.warning(f"Traefik route {route_name}: not visible in API after write") return False except Exception as e: logger.error(f"_ensure_traefik_routes {tenant.code}: {e}") return False # ─── Keycloak helpers ──────────────────────────────────────────────────────── def _check_kc_realm(host: str, realm: str) -> Optional[bool]: """ None = realm 不存在(未設定) True = realm 存在且可連線 False = 連線失敗 """ try: resp = httpx.get( f"https://{host}/realms/{realm}/.well-known/openid-configuration", timeout=5.0, ) if resp.status_code == 200: return True if resp.status_code == 404: return None return False except Exception as e: logger.warning(f"KC realm check failed {host}/{realm}: {e}") return False def _create_kc_realm(realm: str, tenant_name: str): from app.services.keycloak_client import get_keycloak_client kc = get_keycloak_client() kc.create_realm(realm, tenant_name) kc.update_realm_token_settings(realm, access_code_lifespan=600) def _ensure_kc_drive_client(realm: str, domain: str) -> Optional[str]: """ 確保 Keycloak realm 中存在 'drive' confidential client(NC OIDC 用)。 回傳 client_secret,若失敗回傳 None。 """ try: from app.services.keycloak_client import get_keycloak_client kc = get_keycloak_client() # redirectUri 為根路徑(NC 直接服務租戶網域,無路徑前綴) status = kc.create_confidential_client( realm, "drive", [f"https://{domain}/*"] ) if status in ("exists", "created"): return kc.get_client_secret(realm, "drive") logger.error(f"Failed to ensure drive client in realm {realm}: {status}") return None except Exception as e: logger.error(f"_ensure_kc_drive_client {realm}: {e}") return None def _nc_db_check(container_name: str, pg_host: str, pg_db: str, nc_domain: str, pg_port: int = PG_PORT_TRIAL) -> bool: """ 驗證 NC 是否正確安裝並使用 PostgreSQL。 若偵測到 SQLite(常見於 volumes 持久但重新部署的情況),自動修復: 1. 刪除 config.php 2. 清空 PostgreSQL DB schema 3. 清除 data 目錄中的 SQLite 檔案 4. 重新執行 occ maintenance:install 使用 pgsql 5. 設定 overwritehost 回傳 True=已正確使用 pgsql, False=仍有問題 """ try: import paramiko, json as _json, psycopg2 client = paramiko.SSHClient() client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) client.connect(settings.DOCKER_SSH_HOST, username=settings.DOCKER_SSH_USER, timeout=15) # 取得 NC 安裝狀態 _, stdout, _ = client.exec_command( f"docker exec -u www-data {container_name} " f"php /var/www/html/occ status --output=json 2>/dev/null" ) status_raw = stdout.read().decode().strip() installed = False try: installed = _json.loads(status_raw).get("installed", False) except Exception: pass if not installed: logger.info(f"NC {container_name}: not installed yet, installing with pgsql...") # 重置 PostgreSQL DB schema(確保乾淨狀態) conn = psycopg2.connect( host=pg_host, port=pg_port, dbname=pg_db, user=PG_USER, password=PG_PASS, connect_timeout=5, ) conn.autocommit = True cur = conn.cursor() cur.execute("DROP SCHEMA public CASCADE; CREATE SCHEMA public; GRANT ALL ON SCHEMA public TO PUBLIC;") conn.close() logger.info(f"PostgreSQL {pg_db}@{pg_host}: schema reset for fresh install") install_cmd = ( f"docker exec -u www-data {container_name} php /var/www/html/occ maintenance:install " f"-n --admin-user admin --admin-pass NC1qaz2wsx " f"--database pgsql --database-name '{pg_db}' " f"--database-user {PG_USER} --database-pass {PG_PASS} " f"--database-host '{pg_host}:{pg_port}' 2>&1" ) _, stdout_inst, _ = client.exec_command(install_cmd) stdout_inst.channel.settimeout(120) try: install_out = stdout_inst.read().decode().strip() except Exception: install_out = "" logger.info(f"NC fresh install output: {install_out}") # 設定 overwritehost for cfg_cmd in [ f"docker exec -u www-data {container_name} php /var/www/html/occ config:system:set overwritehost --value={nc_domain}", f"docker exec -u www-data {container_name} php /var/www/html/occ config:system:set overwrite.cli.url --value=https://{nc_domain}", ]: _, out_cfg, _ = client.exec_command(cfg_cmd) out_cfg.channel.settimeout(30) try: out_cfg.read() except Exception: pass client.close() success = "successfully installed" in install_out if success: logger.info(f"NC {container_name}: installed with pgsql ✓") else: logger.error(f"NC {container_name}: fresh install failed: {install_out}") return success # 檢查 dbtype _, stdout2, _ = client.exec_command( f"docker exec -u www-data {container_name} " f"php /var/www/html/occ config:system:get dbtype 2>/dev/null" ) dbtype = stdout2.read().decode().strip() client.close() if dbtype == "pgsql": logger.info(f"NC {container_name}: already using pgsql ✓") return True # ─── 偵測到 SQLite,自動修復 ─────────────────────────────────────── logger.warning(f"NC {container_name}: dbtype={dbtype}, fixing to pgsql...") # 1. 刪除 config.php 和 SQLite 殘留資料 client2 = paramiko.SSHClient() client2.set_missing_host_key_policy(paramiko.AutoAddPolicy()) client2.connect(settings.DOCKER_SSH_HOST, username=settings.DOCKER_SSH_USER, timeout=15) for cmd in [ f"docker exec {container_name} rm -f /var/www/html/config/config.php", f"docker exec {container_name} sh -c " f"'rm -rf /var/www/html/data/admin /var/www/html/data/appdata_* " f"/var/www/html/data/*.db /var/www/html/data/nextcloud.log'", ]: _, out, _ = client2.exec_command(cmd) out.channel.recv_exit_status() logger.info(f"Cleanup: {cmd}") # 2. 重置 PostgreSQL DB schema conn = psycopg2.connect( host=pg_host, port=pg_port, dbname=pg_db, user=PG_USER, password=PG_PASS, connect_timeout=5, ) conn.autocommit = True cur = conn.cursor() cur.execute("DROP SCHEMA public CASCADE; CREATE SCHEMA public; GRANT ALL ON SCHEMA public TO PUBLIC;") conn.close() logger.info(f"PostgreSQL {pg_db}@{pg_host}: schema reset") # 3. 重新安裝 NC 使用 PostgreSQL install_cmd = ( f"docker exec -u www-data {container_name} php /var/www/html/occ maintenance:install " f"-n --admin-user admin --admin-pass NC1qaz2wsx " f"--database pgsql --database-name '{pg_db}' " f"--database-user {PG_USER} --database-pass {PG_PASS} " f"--database-host '{pg_host}:{pg_port}' 2>&1" ) _, stdout3, _ = client2.exec_command(install_cmd) stdout3.channel.recv_exit_status() install_out = stdout3.read().decode().strip() logger.info(f"NC reinstall output: {install_out}") # 4. 設定 overwritehost for cfg_cmd in [ f"docker exec -u www-data {container_name} php /var/www/html/occ config:system:set overwritehost --value={nc_domain}", f"docker exec -u www-data {container_name} php /var/www/html/occ config:system:set overwrite.cli.url --value=https://{nc_domain}", ]: _, out4, _ = client2.exec_command(cfg_cmd) out4.channel.recv_exit_status() client2.close() success = "successfully installed" in install_out if success: logger.info(f"NC {container_name}: fixed to pgsql ✓") else: logger.error(f"NC {container_name}: reinstall failed: {install_out}") return success except Exception as e: logger.error(f"_nc_db_check {container_name}: {e}") return False def _nc_initialized(container_name: str) -> bool: """ 以 force_language == zh_TW 作為 NC 已完成初始化的判斷依據。 """ try: import paramiko client = paramiko.SSHClient() client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) client.connect(settings.DOCKER_SSH_HOST, username=settings.DOCKER_SSH_USER, timeout=15) _, stdout, _ = client.exec_command( f"docker exec -u www-data {container_name} " f"php /var/www/html/occ config:system:get force_language 2>/dev/null" ) result = stdout.read().decode().strip() client.close() return result == "zh_TW" except Exception as e: logger.warning(f"NC initialized check failed {container_name}: {e}") return False def _nc_initialize( container_name: str, kc_host: str, realm: str, client_secret: str, nc_domain: str, oo_container: str, oo_url: str, is_active: bool = False, ) -> bool: """ NC 容器首次完整初始化: Init-1: 語言設定(zh_TW) Init-2: 安裝必要 Apps(contacts / calendar / mail / onlyoffice) Init-3: OIDC Provider 設定(呼叫 _configure_nc_oidc) Init-4: SSO 強制模式(allow_multiple_user_backends=0) Init-5: OnlyOffice 整合設定 ⚠️ Init-4 必須在 Init-3 之後執行,否則無法登入 """ try: import paramiko client = paramiko.SSHClient() client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) client.connect(settings.DOCKER_SSH_HOST, username=settings.DOCKER_SSH_USER, timeout=15) redis_host = REDIS_HOST_ACTIVE if is_active else REDIS_HOST_TRIAL # Init-1: 語言設定 for cfg_key, cfg_val in [ ("default_language", "zh_TW"), ("default_locale", "zh_TW"), ("force_language", "zh_TW"), ("force_locale", "zh_TW"), ]: _, out, _ = client.exec_command( f"docker exec -u www-data {container_name} " f"php /var/www/html/occ config:system:set {cfg_key} --value={cfg_val} 2>&1" ) out.channel.recv_exit_status() # Init-1b: Redis + APCu memcache(OIDC session 持久化必須) for cfg_cmd in [ f"docker exec -u www-data {container_name} php /var/www/html/occ config:system:set memcache.local --value='\\OC\\Memcache\\APCu' 2>&1", f"docker exec -u www-data {container_name} php /var/www/html/occ config:system:set memcache.distributed --value='\\OC\\Memcache\\Redis' 2>&1", f"docker exec -u www-data {container_name} php /var/www/html/occ config:system:set memcache.locking --value='\\OC\\Memcache\\Redis' 2>&1", f"docker exec -u www-data {container_name} php /var/www/html/occ config:system:set redis host --value={redis_host} 2>&1", f"docker exec -u www-data {container_name} php /var/www/html/occ config:system:set redis port --value={REDIS_PORT} --type=integer 2>&1", f"docker exec -u www-data {container_name} php /var/www/html/occ config:system:set redis password --value={REDIS_PASS} 2>&1", ]: _, out, _ = client.exec_command(cfg_cmd) out.channel.recv_exit_status() logger.info(f"NC {container_name}: Redis memcache configured (host={redis_host})") # Init-2: 安裝必要 Apps(已安裝時不影響結果) for app in ["contacts", "calendar", "mail", "onlyoffice"]: _, out, _ = client.exec_command( f"docker exec -u www-data {container_name} " f"php /var/www/html/occ app:install {app} 2>&1" ) out.channel.recv_exit_status() text = out.read().decode().strip() logger.info(f"NC {container_name} app:install {app}: {text}") client.close() # Init-3: OIDC Provider 設定(複用現有函式) _configure_nc_oidc(container_name, kc_host, realm, client_secret, nc_domain) # Init-4: 強制 SSO(禁用本地登入)+ Init-5: OnlyOffice 整合設定 client2 = paramiko.SSHClient() client2.set_missing_host_key_policy(paramiko.AutoAddPolicy()) client2.connect(settings.DOCKER_SSH_HOST, username=settings.DOCKER_SSH_USER, timeout=15) _, out, _ = client2.exec_command( f"docker exec -u www-data {container_name} " f"php /var/www/html/occ config:app:set user_oidc allow_multiple_user_backends --value=0 2>&1" ) out.channel.recv_exit_status() # Init-5: OnlyOffice 整合設定 from app.core.config import settings as _cfg for oo_key, oo_val in [ ("DocumentServerUrl", f"{oo_url}/"), ("DocumentServerInternalUrl", f"http://{oo_container}/"), ("StorageUrl", f"https://{nc_domain}/"), ("jwt_secret", _cfg.OO_JWT_SECRET), ]: _, out, _ = client2.exec_command( f"docker exec -u www-data {container_name} " f"php /var/www/html/occ config:app:set onlyoffice {oo_key} --value='{oo_val}' 2>&1" ) out.channel.recv_exit_status() client2.close() logger.info(f"NC {container_name}: initialization complete ✓") return True except Exception as e: logger.error(f"NC initialization failed {container_name}: {e}") return False def _nc_oidc_configured(container_name: str) -> bool: """檢查 NC 容器是否已設定 OIDC provider(user_oidc app 已安裝且有 provider)""" try: import paramiko client = paramiko.SSHClient() client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) client.connect( settings.DOCKER_SSH_HOST, username=settings.DOCKER_SSH_USER, timeout=15, ) _, stdout, _ = client.exec_command( f"docker exec -u www-data {container_name} " f"php /var/www/html/occ user_oidc:providers 2>/dev/null | grep -q clientId && echo yes || echo no" ) result = stdout.read().decode().strip() client.close() return result == "yes" except Exception as e: logger.warning(f"NC OIDC check failed for {container_name}: {e}") return False def _configure_nc_oidc( container_name: str, kc_host: str, realm: str, client_secret: str, nc_domain: str ) -> bool: """ 設定 NC OIDC provider(使用 provider name='drive',對應 Keycloak client_id=drive)。 同時設定 overwritehost 確保 OIDC callback URL 正確。 """ try: import paramiko client = paramiko.SSHClient() client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) client.connect( settings.DOCKER_SSH_HOST, username=settings.DOCKER_SSH_USER, timeout=15, ) discovery = f"https://{kc_host}/realms/{realm}/.well-known/openid-configuration" # 設定 overwritehost(OIDC callback 必須) for cfg_cmd in [ f"docker exec -u www-data {container_name} php /var/www/html/occ config:system:set overwritehost --value={nc_domain}", f"docker exec -u www-data {container_name} php /var/www/html/occ config:system:set overwrite.cli.url --value=https://{nc_domain}", ]: _, out, _ = client.exec_command(cfg_cmd) out.channel.recv_exit_status() # 啟用 user_oidc app _, out2, _ = client.exec_command( f"docker exec -u www-data {container_name} php /var/www/html/occ app:enable user_oidc 2>&1" ) out2.channel.recv_exit_status() # 設定 OIDC provider(name=drive,與 Keycloak client_id 一致) oidc_cmd = ( f"docker exec -u www-data {container_name} php /var/www/html/occ user_oidc:provider drive " f"--clientid=drive " f"--clientsecret={client_secret} " f"--discoveryuri={discovery} " f"--mapping-uid=preferred_username " f"--mapping-display-name=name " f"--mapping-email=email " f"--unique-uid=0 " f"--check-bearer=0 --send-id-token-hint=1 2>&1" ) _, stdout, _ = client.exec_command(oidc_cmd) stdout.channel.recv_exit_status() out_text = stdout.read().decode().strip() client.close() logger.info(f"NC OIDC configure output: {out_text}") return True except Exception as e: logger.error(f"NC OIDC configure failed for {container_name}: {e}") return False # ─── PostgreSQL helpers ─────────────────────────────────────────────────────── def _ensure_nc_db(host: str, dbname: str, port: int = PG_PORT_TRIAL) -> bool: """ 確保 NC 用的 PostgreSQL DB 存在,並授予 public schema 建表權限給所有用戶。 NC Docker 會自動建立 oc_admin* 用戶,需預先開放 public schema CREATE 權限。 """ try: import psycopg2 # Connect to postgres DB to create NC DB conn = psycopg2.connect( host=host, port=port, dbname="postgres", user=PG_USER, password=PG_PASS, connect_timeout=5, ) conn.autocommit = True cur = conn.cursor() cur.execute("SELECT 1 FROM pg_database WHERE datname = %s", (dbname,)) if not cur.fetchone(): cur.execute(f'CREATE DATABASE "{dbname}" OWNER {PG_USER}') logger.info(f"Created database {dbname}@{host}") conn.close() # Grant CREATE on public schema to all users (for NC oc_admin* users) conn2 = psycopg2.connect( host=host, port=port, dbname=dbname, user=PG_USER, password=PG_PASS, connect_timeout=5, ) conn2.autocommit = True cur2 = conn2.cursor() cur2.execute("GRANT ALL ON SCHEMA public TO PUBLIC") conn2.close() return True except Exception as e: logger.error(f"_ensure_nc_db {dbname}@{host}: {e}") return False def _get_pg_db_size_gb(host: str, dbname: str, port: int = PG_PORT_TRIAL) -> Optional[float]: try: import psycopg2 conn = psycopg2.connect( host=host, port=port, dbname=dbname, user=PG_USER, password=PG_PASS, connect_timeout=5, ) cur = conn.cursor() cur.execute("SELECT pg_database_size(%s)", (dbname,)) size_bytes = cur.fetchone()[0] cur.close() conn.close() return round(size_bytes / (1024 ** 3), 3) except Exception as e: logger.warning(f"PG size check failed {dbname}@{host}: {e}") return None # ─── Email notification ─────────────────────────────────────────────────────── def _get_admin_emails(db: Session) -> list[str]: """取得所有管理員租戶下所有啟用帳號的 notification_email""" from app.models.account import Account rows = ( db.query(Account.email) .join(Tenant, Account.tenant_id == Tenant.id) .filter( Tenant.is_manager == True, Account.is_active == True, Account.email != None, Account.email != "", ) .all() ) return list({r.email for r in rows}) def _send_failure_alert( tenant_code: str, tenant_name: str, domain: str, failed_items: list[str], admin_emails: list[str], ): """任何檢查項目 False 時,統一發送告警 email 給所有管理員""" if not admin_emails: logger.warning(f"No admin emails for failure alert on {tenant_code}") return try: item_lines = "\n".join(f" ✗ {item}" for item in failed_items) body = ( f"【Virtual MIS 告警】租戶服務異常\n\n" f"租戶代碼 : {tenant_code}\n" f"租戶名稱 : {tenant_name}\n" f"網域 : {domain}\n" f"時間 : {now_tw().strftime('%Y-%m-%d %H:%M:%S')}\n\n" f"異常項目:\n{item_lines}\n\n" f"請盡速登入 Virtual MIS 後台確認並處理。" ) msg = MIMEText(body, "plain", "utf-8") msg["Subject"] = f"[VirtualMIS] 服務異常告警 — {tenant_code} ({domain})" msg["From"] = f"vmis-alert@{settings.MAIL_MX_HOST}" msg["To"] = ", ".join(admin_emails) with smtplib.SMTP(settings.MAIL_MX_HOST, 25, timeout=10) as smtp: smtp.sendmail(msg["From"], admin_emails, msg.as_string()) logger.info(f"Failure alert sent to {admin_emails} for {tenant_code}: {failed_items}") except Exception as e: logger.error(f"Failed to send failure alert for {tenant_code}: {e}") # ─── Main check ────────────────────────────────────────────────────────────── def run_tenant_check(schedule_log_id: int, db: Session): from app.services.mail_client import MailClient from app.services.docker_client import DockerClient tenants = db.query(Tenant).filter(Tenant.is_active == True).all() mail = MailClient() docker = DockerClient() admin_emails = _get_admin_emails(db) for tenant in tenants: realm = tenant.keycloak_realm or tenant.code is_active = tenant.status == "active" nc_name = f"nc-{tenant.code}" if is_active else f"nc-{tenant.code}-test" oo_name = f"oo-{tenant.code}" if is_active else f"oo-{tenant.code}-test" kc_host = KC_HOST_ACTIVE if is_active else KC_HOST_TRIAL pg_host = PG_HOST_ACTIVE if is_active else PG_HOST_TRIAL pg_port = PG_PORT_ACTIVE if is_active else PG_PORT_TRIAL result = TenantScheduleResult( schedule_log_id=schedule_log_id, tenant_id=tenant.id, recorded_at=now_tw(), ) fail_reasons = [] # ── [1] Traefik 路由檔案確認 ───────────────────────────────────────── # 使用 file provider,scheduler 直接寫入 /home/porsche/traefik/dynamic/{code}.yml try: ok = _ensure_traefik_routes(tenant, is_active) result.traefik_status = ok if not ok: fail_reasons.append("traefik: route not loaded after write") result.traefik_done_at = now_tw() except Exception as e: result.traefik_status = False result.traefik_done_at = now_tw() fail_reasons.append(f"traefik: {e}") # ── [2] SSO (Keycloak realm + drive client) ────────────────────────── kc_drive_secret: Optional[str] = None try: sso_state = _check_kc_realm(kc_host, realm) if sso_state is None: # Realm 不存在 → 建立 realm + drive client try: _create_kc_realm(realm, tenant.name) kc_drive_secret = _ensure_kc_drive_client(realm, tenant.domain) result.sso_result = True if kc_drive_secret else False if not kc_drive_secret: fail_reasons.append("sso: realm created but drive client failed") except Exception as ce: result.sso_result = False fail_reasons.append(f"sso create: {ce}") elif sso_state is True: # Realm 存在 → 確保 drive client 存在,並確保 token 逾時設定 kc_drive_secret = _ensure_kc_drive_client(realm, tenant.domain) result.sso_result = True if not kc_drive_secret: fail_reasons.append("sso: drive client missing/failed") result.sso_result = False else: try: from app.services.keycloak_client import get_keycloak_client get_keycloak_client().update_realm_token_settings(realm, access_code_lifespan=600) except Exception: pass else: result.sso_result = False fail_reasons.append("sso: realm unreachable") result.sso_done_at = now_tw() except Exception as e: result.sso_result = False result.sso_done_at = now_tw() fail_reasons.append(f"sso: {e}") # ── [3] Mailbox domain ─────────────────────────────────────────────── try: if mail.domain_exists(tenant.domain): result.mailbox_result = True else: # Domain 未設定 → 建立 ok = mail.create_domain(tenant.domain) result.mailbox_result = True if ok else False if not ok: fail_reasons.append("mailbox: create domain failed") result.mailbox_done_at = now_tw() except Exception as e: result.mailbox_result = False result.mailbox_done_at = now_tw() fail_reasons.append(f"mailbox: {e}") # ── [4] NC container + DB 驗證 + OIDC 設定 ───────────────────────── pg_db = f"nc_{tenant.code}_db" if tenant.is_manager: # Manager 租戶無需 NC/OO 容器,直接標記成功 result.nc_result = True result.nc_done_at = now_tw() result.office_result = True result.office_done_at = now_tw() else: try: nc_state = docker.check_container_ssh(nc_name) if nc_state is None: # 容器不存在 → 確保 docker-compose.yml + DB + 部署 logger.info(f"NC {nc_name}: not found, ensuring compose/DB and deploying") _ensure_tenant_compose(tenant, is_active) _ensure_nc_db(pg_host, pg_db, pg_port) ok = docker.ssh_compose_up(tenant.code) result.nc_result = True if ok else False if not ok: fail_reasons.append("nc: deploy failed") else: # 部署成功後驗證 NC 是否正確使用 PostgreSQL if not _nc_db_check(nc_name, pg_host, pg_db, tenant.domain, pg_port): result.nc_result = False fail_reasons.append("nc: installed but not using pgsql") elif nc_state is False: # 容器存在但已停止 → 重啟 logger.info(f"NC {nc_name}: stopped, restarting") ok = docker.ssh_compose_up(tenant.code) result.nc_result = True if ok else False if not ok: fail_reasons.append("nc: start failed") else: # 容器正常運行 → 驗證 DB 類型(防止 sqlite3 殘留問題) db_ok = _nc_db_check(nc_name, pg_host, pg_db, tenant.domain, pg_port) if not db_ok: result.nc_result = False fail_reasons.append("nc: DB check failed (possible sqlite3 issue)") else: result.nc_result = True if kc_drive_secret: if not _nc_initialized(nc_name): # 首次初始化:語言 + Apps + OIDC + SSO 強制模式 + OO 整合 oo_url = (f"https://office-{tenant.code}.ease.taipei" if is_active else f"https://office-{tenant.code}.lab.taipei") ok = _nc_initialize(nc_name, kc_host, realm, kc_drive_secret, tenant.domain, oo_name, oo_url, is_active) if not ok: fail_reasons.append("nc: initialization failed") else: # 已初始化:僅同步 OIDC secret(確保與 KC 一致) ok = _configure_nc_oidc(nc_name, kc_host, realm, kc_drive_secret, tenant.domain) if not ok: fail_reasons.append("nc: OIDC sync failed") result.nc_done_at = now_tw() except Exception as e: result.nc_result = False result.nc_done_at = now_tw() fail_reasons.append(f"nc: {e}") # ── [5] OO container ───────────────────────────────────────────────── try: oo_state = docker.check_container_ssh(oo_name) if oo_state is None: ok = docker.ssh_compose_up(tenant.code) result.office_result = True if ok else False if not ok: fail_reasons.append("oo: deploy failed") elif oo_state is False: ok = docker.ssh_compose_up(tenant.code) result.office_result = True if ok else False if not ok: fail_reasons.append("oo: start failed") else: result.office_result = True result.office_done_at = now_tw() except Exception as e: result.office_result = False result.office_done_at = now_tw() fail_reasons.append(f"oo: {e}") # ── [6] Quota (OO disk + PG DB size) ──────────────────────────────── if not tenant.is_manager: try: oo_gb = docker.get_oo_disk_usage_gb(oo_name) or 0.0 pg_gb = _get_pg_db_size_gb(pg_host, pg_db, pg_port) or 0.0 result.quota_usage = round(oo_gb + pg_gb, 3) except Exception as e: logger.warning(f"Quota check failed for {tenant.code}: {e}") if fail_reasons: result.fail_reason = "; ".join(fail_reasons) # 任何項目 False → 統一發送告警給所有管理員 failed_items = [] if result.traefik_status is False: failed_items.append("Traefik 路由") if result.sso_result is False: failed_items.append("SSO (Keycloak Realm)") if result.mailbox_result is False: failed_items.append("Mailbox Domain") if result.nc_result is False: failed_items.append("Nextcloud 容器") if result.office_result is False: failed_items.append("OnlyOffice 容器") if failed_items: _send_failure_alert( tenant.code, tenant.name, tenant.domain, failed_items, admin_emails ) db.add(result) db.commit() logger.info(f"Tenant check done: {len(tenants)} tenants processed")