hyungi_document_server/app/core/config.py

"""설정 로딩 — config.yaml + credentials.env"""

import os
from pathlib import Path

import yaml
from pydantic import BaseModel


class UploadConfig(BaseModel):
    max_bytes: int = 100_000_000
    content_length_slack_ratio: float = 1.05
    stream_chunk_bytes: int = 1_048_576
    # orphan cleanup (`*.uploading` — 크래시/abort 후 잔존물)
    orphan_max_age_sec: int = 3600
    cleanup_warn_threshold: int = 10


class AIModelConfig(BaseModel):
    endpoint: str
    model: str
    max_tokens: int = 4096
    timeout: int = 60
    daily_budget_usd: float | None = None
    require_explicit_trigger: bool = False
    # B-0: 4B/26B 에 부여한 실사용 컨텍스트 상한 (char). triage=120k, primary=260k.
    # classify_worker 가 에스컬레이션 판정 시 참고. 0/None 이면 상한 무시.
    context_char_limit: int | None = None
    # P1 of family-adaptive-bengio (2026-05-23): config-driven sampling profile.
    # None = MLX/OpenAI server default. Anthropic branch 는 미적용 (별 plan 범위).
    temperature: float | None = None
    top_p: float | None = None
    # mlx 네이티브 샘플링 — 한국어 장문 코드스위칭(CJK/라틴 누수)·반복루프 억제용.
    # Qwen3 권장: top_k=20, repetition_penalty 1.05~1.1. None = 서버 기본값(주입 안 함).
    # OpenAI 호환 분기(mlx)만 적용 — Anthropic 분기는 미적용(별 범위).
    repetition_penalty: float | None = None
    top_k: int | None = None
    # 2노드 이관 (2026-07-02): rerank 백엔드 프로토콜 판별자.
    # "tei" = TEI POST /rerank {"query","texts"} → [{"index","score"}] (기본, 무회귀)
    # "llamacpp" = llama.cpp POST /v1/rerank {"model","query","documents"}
    #              → {"results":[{"index","relevance_score"}]} (맥미니 :8807)
    # 미지원 값 = client.rerank 가 ValueError (silent fallback 금지). rerank 블록 외 무시.
    protocol: str = "tei"


class DeepSummaryBacklogConfig(BaseModel):
    """B-1 R2 — deep_summary enqueue 폭발 억제 임계치."""
    ratio_threshold: float = 0.3     # 지난 window 의 deep_n/classify_n
    pending_threshold: int = 5       # deep_summary pending+processing
    window_minutes: int = 30


class SearchAskBackendConfig(BaseModel):
    """PR-2 of DS AI routing policy ([[document-server-ai-routing-policy]], 2026-05-23):
    /api/search/ask backend dispatcher 가 llm-router :8890 단일 경유.

    - backend 미지정 / "gemma-macmini" / "mac-mini-default" → router 가 tier_b
    - backend "qwen-macbook" → router 가 named upstream (M5 Max)
    - backend "claude-cloud" → router 가 503 명시 (scaffold)
    - backend "auto" → router 의 rule + LLM triage

    Unavailable → BackendUnavailable → 503 명시 (silent fallback 0).
    Rollback: DS_BACKENDS_VIA_ROUTER=false 로 legacy 직접 호출 path.
    legacy macmini_url / macbook_url / macbook_model 은 fallback 시만 사용.
    """

    # PR-2 신규: llm-router URL. 비면 env LLM_ROUTER_URL 또는 hardcoded default.
    router_url: str = ""
    # Legacy fields (DS_BACKENDS_VIA_ROUTER=false 시만 사용)
    macmini_url: str = "http://100.76.254.116:8801"
    macbook_url: str = "http://100.118.112.84:8810"
    macbook_model: str = "mlx-community/Qwen3.6-27B-8bit"
    timeout_connect_s: int = 5
    timeout_read_s: int = 60


class SearchAskReactConfig(BaseModel):
    """PR-DocSrv-Ask-ToolCalling-ReAct-1: /api/search/ask/react ReAct loop.

    qwen-macbook only (endpoint 자체가 implicit opt-in). G0-2 counter semantics:
    max_tool_rounds=2 → LLM 호출 최대 3회 (tool round 2 + final 1), search 실행 최대 2회.
    """

    enabled: bool = True
    max_tool_rounds: int = 2
    search_tool_limit: int = 5
    search_tool_mode: str = "hybrid"


class SearchAskConfig(BaseModel):
    backend: SearchAskBackendConfig = SearchAskBackendConfig()
    react: SearchAskReactConfig = SearchAskReactConfig()


class SearchConfig(BaseModel):
    ask: SearchAskConfig = SearchAskConfig()


class AIConfig(BaseModel):
    gateway_endpoint: str
    # B-0: 3-tier routing. triage/primary = Mac mini 26B MLX (PR #20 endpoint 통합). fallback = Claude Sonnet 4 API.
    triage: AIModelConfig
    primary: AIModelConfig
    fallback: AIModelConfig
    premium: AIModelConfig
    embedding: AIModelConfig
    rerank: AIModelConfig
    # Phase 3.5a: answerability classifier (optional — 없으면 score-only gate). PR #20 이후 Mac mini 26B MLX endpoint (initial = exaone3.5).
    classifier: AIModelConfig | None = None
    # Phase 3.5b: semantic verifier (optional — 없으면 grounding-only). PR #20 이후 Mac mini 26B MLX endpoint (initial = exaone3.5).
    verifier: AIModelConfig | None = None
    # ds-macbook-offload-1: 심층 전용 슬롯 (optional). 맥북 M5 Max Qwen3.6-27B — llm-router :8890
    # 경유(model=qwen-macbook alias, wake preflight 재사용). 부재 시 deep_summary 는 기존
    # primary(맥미니 26B) 경로 그대로 = 기능 미활성. 명시 opt-in — silent fallback 없음.
    deep: AIModelConfig | None = None
    # Legacy: vision 슬롯 (현재 사용처 0 — Document Server 는 OCR/STT 별도 서비스).
    # 제거 진행 중이므로 optional 로 관대한 로딩 유지.
    vision: AIModelConfig | None = None
    # B-1 R2: backlog guard 임계치
    deep_summary_backlog: DeepSummaryBacklogConfig = DeepSummaryBacklogConfig()


class Settings(BaseModel):
    # DB
    database_url: str = ""

    # AI
    ai: AIConfig | None = None

    # PR-MacBook-RAG-Backend-1: /api/search/ask backend dispatcher
    search: SearchConfig = SearchConfig()

    # NAS
    nas_mount_path: str = "/documents"
    nas_pkm_root: str = "/documents/PKM"

    # 인증
    jwt_secret: str = ""
    totp_secret: str = ""

    # Phase 3.5: eval runner shared secret — X-Source=eval / X-Eval-Case-Id 헤더 신뢰 검증.
    # 비어있으면 모든 eval 헤더 거부 (부재 = 비활성).
    eval_runner_token: str = ""

    # kordoc
    kordoc_endpoint: str = "http://kordoc-service:3100"

    # OCR (Surya)
    ocr_endpoint: str = "http://ocr-service:3200"

    # STT (faster-whisper, §3)
    stt_endpoint: str = "http://stt-service:3300"

    # 2노드 이관 (2026-07-02): GPU CUDA 서비스(Surya OCR / faster-whisper) 폐기 대응 명시 게이트.
    # false = 해당 경로 명시 비활성 — OCR 은 _call_ocr 이 경고 로그 후 None(기존 soft-fail 의미론),
    # STT 는 터미널 skip + extract_meta 기록. silent 저품질 fallback 아님 (로그/메타로 가시).
    ocr_enabled: bool = True
    stt_enabled: bool = True

    # §3 file_watcher: Roon 음원 경로 (prefix match 로 skip).
    # 빈 문자열이면 skip 없음. 예: "/documents/PKM/../Music/roon-library" 또는
    # NFS 경유 별도 마운트된 Roon 라이브러리.
    roon_library_path: str = ""

    # KGS Code 등 외부 작성 마크다운 자료 추가 스캔 경로 (PKM 상대 경로, 쉼표 구분).
    # env: ADDITIONAL_WATCH_TARGETS=Knowledge/Industrial_Safety/가스기사/KGS_Code,...
    # 모두 expected_category="library" 로 처리 (md/pdf/docx 등 문서 확장자만 수락).
    # Inbox/Recordings/Videos 기본 스캔 외에 추가만 허용.
    additional_watch_targets: list[str] = []

    # 분류 체계
    taxonomy: dict = {}
    document_types: list[str] = []

    # 업로드 한도 (authoritative policy)
    upload: UploadConfig = UploadConfig()

    # 생성 LLM 홀드 (2026-06-11): config.yaml pipeline.held_stages 에 든 이름의
    # 컨슈머/워커는 claim 자체를 하지 않는다 (attempts 미소모, pending 적체 = 의도).
    # 유효 키 = 큐 stage 명(classify/summarize/deep_summary) + cron/컨슈머 키(digest,
    # briefing, study_explanation, study_session_analysis, study_memo_card).
    # 빈 리스트 = 무동작 (기존 동작 그대로).
    pipeline_held_stages: list[str] = []

    # mlx gate 동시 실행 상한 (2026-06-12, config.yaml pipeline.mlx_gate_concurrency).
    # 1 = 구 single-inference 동작. 2 = continuous batching 활용 (llm_gate docstring 참조).
    mlx_gate_concurrency: int = 1

    # digest/briefing 생성 LLM 호출 파라미터 (2026-06-15, 모델 교체 후 타임아웃 단일소스화).
    # 구 하드코딩 25s(빠른 Gemma 기준)가 Qwen3.6-27B-6bit(콜당 ~90~300s) 교체 sweep 에서
    # 누락돼 digest 600s 하드캡 초과·briefing 4/4 폴백을 유발 → config 단일소스로 이관.
    # 동시성은 별 키 아님 — 전역 mlx_gate_concurrency(게이트 단일 budget)가 담당.
    digest_llm_timeout_s: int = 200
    digest_llm_attempts: int = 2
    digest_pipeline_hard_cap_s: int = 1800
    # 2026-06-20: study/analyze 단일 primary-call 타임아웃 (구 하드코딩 30~60s = 빠른 Gemma 기준,
    # Qwen 27B 교체 sweep 누락 → 사용자 대면 504 + 워커 영구 stuck). digest 와 동형 단일소스.
    llm_call_timeout_s: int = 200

    # PR-MacMini-Derived-Worker-1: study explanation owner = Mac mini
    # GPU 측은 false 로 설정 (.env), explanation 분기 skip guard 트리거.
    study_explanation_enabled: bool = True
    # 공부 암기노트 Phase 1: card_extract 폴러/consumer 게이트. owner 분리 시 false 로.
    study_card_extract_enabled: bool = True
    # 발행 레이어(docsrv-viewer-publish): publish_outbox 워커 게이트. 저자/4-A enqueue 결선(P0-1b) 후 true.
    study_publish_enabled: bool = False
    digest_publish_enabled: bool = False  # docsrv-viewer-publish P1-1 (뉴스/다이제스트 발행 feed gate)
    maintenance_mode: bool = False  # P1-4: 점검/실험 중 = 가공현황 배너(표면 != 데이터)
    maintenance_note: str = ""
    # 뷰어 write-back ingest(study-to-viewer P2) 게이트. /ingest/study/attempts 활성. 기본 false=inert(503).
    study_ingest_enabled: bool = False

    # internal endpoint Bearer token (Mac mini derived-worker 호출용)
    internal_worker_token: str = ""

    # 뷰어↔DS 발행 채널 Bearer token (publish read API P0-2 + ingest P2). Mac mini 토큰과 분리(폭발반경 격리).
    viewer_sync_token: str = ""


def load_settings() -> Settings:
    """config.yaml + 환경변수에서 설정 로딩"""
    # 환경변수 (docker-compose에서 주입)
    database_url = os.getenv("DATABASE_URL", "")
    study_explanation_enabled = os.getenv("STUDY_EXPLANATION_ENABLED", "true").lower() in ("1", "true", "yes")
    study_card_extract_enabled = os.getenv("STUDY_CARD_EXTRACT_ENABLED", "true").lower() in ("1", "true", "yes")
    study_publish_enabled = os.getenv("STUDY_PUBLISH_ENABLED", "false").lower() in ("1", "true", "yes")
    digest_publish_enabled = os.getenv("DIGEST_PUBLISH_ENABLED", "false").lower() in ("1", "true", "yes")
    maintenance_mode = os.getenv("MAINTENANCE_MODE", "false").lower() in ("1", "true", "yes")
    maintenance_note = os.getenv("MAINTENANCE_NOTE", "")
    study_ingest_enabled = os.getenv("STUDY_INGEST_ENABLED", "false").lower() in ("1", "true", "yes")
    internal_worker_token = os.getenv("INTERNAL_WORKER_TOKEN", "")
    viewer_sync_token = os.getenv("VIEWER_SYNC_TOKEN", "")
    jwt_secret = os.getenv("JWT_SECRET", "")
    totp_secret = os.getenv("TOTP_SECRET", "")
    eval_runner_token = os.getenv("EVAL_RUNNER_TOKEN", "")
    kordoc_endpoint = os.getenv("KORDOC_ENDPOINT", "http://kordoc-service:3100")
    ocr_endpoint = os.getenv("OCR_ENDPOINT", "http://ocr-service:3200")
    stt_endpoint = os.getenv("STT_ENDPOINT", "http://stt-service:3300")
    ocr_enabled = os.getenv("OCR_ENABLED", "true").lower() in ("1", "true", "yes")
    stt_enabled = os.getenv("STT_ENABLED", "true").lower() in ("1", "true", "yes")
    roon_library_path = os.getenv("ROON_LIBRARY_PATH", "")

    # ADDITIONAL_WATCH_TARGETS — 쉼표 구분 (공백 제거)
    awt_raw = os.getenv("ADDITIONAL_WATCH_TARGETS", "")
    additional_watch_targets = [p.strip() for p in awt_raw.split(",") if p.strip()]

    # config.yaml — Docker 컨테이너 내부(/app/config.yaml) 또는 프로젝트 루트
    config_path = Path("/app/config.yaml")
    if not config_path.exists():
        config_path = Path(__file__).parent.parent.parent / "config.yaml"
    ai_config = None
    nas_mount = "/documents"
    nas_pkm = "/documents/PKM"

    if config_path.exists():
        with open(config_path) as f:
            raw = yaml.safe_load(f)

        if "ai" in raw:
            ai_raw = raw["ai"]
            models = ai_raw.get("models", {})
            # B-0: triage 는 config.yaml 에 없을 수도 있는 신규 슬롯. 구버전 호환을 위해
            # 없으면 fallback 를 triage 로 대체 (동일 모델 재사용).
            triage_raw = models.get("triage") or models.get("fallback")
            if triage_raw is None:
                raise ValueError("config.yaml: ai.models.triage (or fallback) required")
            ai_config = AIConfig(
                gateway_endpoint=ai_raw.get("gateway", {}).get("endpoint", ""),
                triage=AIModelConfig(**triage_raw),
                primary=AIModelConfig(**models["primary"]),
                fallback=AIModelConfig(**models["fallback"]),
                premium=AIModelConfig(**models["premium"]),
                embedding=AIModelConfig(**models["embedding"]),
                rerank=AIModelConfig(**models["rerank"]),
                vision=(AIModelConfig(**models["vision"]) if "vision" in models else None),
                classifier=(
                    AIModelConfig(**models["classifier"]) if "classifier" in models else None
                ),
                verifier=(
                    AIModelConfig(**models["verifier"]) if "verifier" in models else None
                ),
                deep=(AIModelConfig(**models["deep"]) if "deep" in models else None),
                deep_summary_backlog=DeepSummaryBacklogConfig(
                    **ai_raw.get("deep_summary_backlog", {})
                ),
            )

        if "nas" in raw:
            nas_mount = raw["nas"].get("mount_path", nas_mount)
            nas_pkm = raw["nas"].get("pkm_root", nas_pkm)

    search_cfg = SearchConfig()
    if config_path.exists() and raw and "search" in raw:
        ask_raw = (raw.get("search") or {}).get("ask", {}) or {}
        sb = ask_raw.get("backend", {}) or {}
        sr = ask_raw.get("react", {}) or {}
        search_cfg = SearchConfig(
            ask=SearchAskConfig(
                backend=SearchAskBackendConfig(**sb),
                react=SearchAskReactConfig(**sr),
            )
        )

    pipeline_held_stages: list[str] = []
    mlx_gate_concurrency = 1
    digest_llm_timeout_s = 200
    digest_llm_attempts = 2
    digest_pipeline_hard_cap_s = 1800
    llm_call_timeout_s = 200
    if config_path.exists() and raw and "pipeline" in raw:
        held_raw = (raw.get("pipeline") or {}).get("held_stages") or []
        # 스칼라(문자열) 오기입 시 char-split 방지 — 단일 항목 리스트로 수용.
        if not isinstance(held_raw, (list, tuple)):
            held_raw = [held_raw]
        pipeline_held_stages = [str(s) for s in held_raw]
        try:
            mlx_gate_concurrency = max(
                1, int((raw.get("pipeline") or {}).get("mlx_gate_concurrency", 1))
            )
        except (TypeError, ValueError):
            mlx_gate_concurrency = 1
        _pl = raw.get("pipeline") or {}
        try:
            digest_llm_timeout_s = max(1, int(_pl.get("digest_llm_timeout_s", 200)))
        except (TypeError, ValueError):
            digest_llm_timeout_s = 200
        try:
            digest_llm_attempts = max(1, int(_pl.get("digest_llm_attempts", 2)))
        except (TypeError, ValueError):
            digest_llm_attempts = 2
        try:
            digest_pipeline_hard_cap_s = max(60, int(_pl.get("digest_pipeline_hard_cap_s", 1800)))
        except (TypeError, ValueError):
            digest_pipeline_hard_cap_s = 1800
        try:
            llm_call_timeout_s = max(1, int(_pl.get("llm_call_timeout_s", 200)))
        except (TypeError, ValueError):
            llm_call_timeout_s = 200

    taxonomy = raw.get("taxonomy", {}) if config_path.exists() and raw else {}
    document_types = raw.get("document_types", []) if config_path.exists() and raw else []
    upload_cfg = (
        UploadConfig(**raw["upload"])
        if config_path.exists() and raw and "upload" in raw
        else UploadConfig()
    )

    return Settings(
        database_url=database_url,
        ai=ai_config,
        search=search_cfg,
        nas_mount_path=nas_mount,
        nas_pkm_root=nas_pkm,
        jwt_secret=jwt_secret,
        totp_secret=totp_secret,
        eval_runner_token=eval_runner_token,
        kordoc_endpoint=kordoc_endpoint,
        ocr_endpoint=ocr_endpoint,
        stt_endpoint=stt_endpoint,
        ocr_enabled=ocr_enabled,
        stt_enabled=stt_enabled,
        roon_library_path=roon_library_path,
        additional_watch_targets=additional_watch_targets,
        taxonomy=taxonomy,
        document_types=document_types,
        upload=upload_cfg,
        study_explanation_enabled=study_explanation_enabled,
        study_card_extract_enabled=study_card_extract_enabled,
        study_publish_enabled=study_publish_enabled,
        digest_publish_enabled=digest_publish_enabled,
        maintenance_mode=maintenance_mode,
        maintenance_note=maintenance_note,
        study_ingest_enabled=study_ingest_enabled,
        internal_worker_token=internal_worker_token,
        viewer_sync_token=viewer_sync_token,
        pipeline_held_stages=pipeline_held_stages,
        mlx_gate_concurrency=mlx_gate_concurrency,
        digest_llm_timeout_s=digest_llm_timeout_s,
        digest_llm_attempts=digest_llm_attempts,
        digest_pipeline_hard_cap_s=digest_pipeline_hard_cap_s,
        llm_call_timeout_s=llm_call_timeout_s,
    )


settings = load_settings()