"""설정 로딩 — config.yaml + credentials.env""" import os from pathlib import Path import yaml from pydantic import BaseModel class UploadConfig(BaseModel): max_bytes: int = 100_000_000 content_length_slack_ratio: float = 1.05 stream_chunk_bytes: int = 1_048_576 # orphan cleanup (`*.uploading` — 크래시/abort 후 잔존물) orphan_max_age_sec: int = 3600 cleanup_warn_threshold: int = 10 class AIModelConfig(BaseModel): endpoint: str model: str max_tokens: int = 4096 timeout: int = 60 daily_budget_usd: float | None = None require_explicit_trigger: bool = False # B-0: 4B/26B 에 부여한 실사용 컨텍스트 상한 (char). triage=120k, primary=260k. # classify_worker 가 에스컬레이션 판정 시 참고. 0/None 이면 상한 무시. context_char_limit: int | None = None # P1 of family-adaptive-bengio (2026-05-23): config-driven sampling profile. # None = MLX/OpenAI server default. Anthropic branch 는 미적용 (별 plan 범위). temperature: float | None = None top_p: float | None = None class DeepSummaryBacklogConfig(BaseModel): """B-1 R2 — deep_summary enqueue 폭발 억제 임계치.""" ratio_threshold: float = 0.3 # 지난 window 의 deep_n/classify_n pending_threshold: int = 5 # deep_summary pending+processing window_minutes: int = 30 class SearchAskBackendConfig(BaseModel): """PR-2 of DS AI routing policy ([[document-server-ai-routing-policy]], 2026-05-23): /api/search/ask backend dispatcher 가 llm-router :8890 단일 경유. - backend 미지정 / "gemma-macmini" / "mac-mini-default" → router 가 tier_b - backend "qwen-macbook" → router 가 named upstream (M5 Max) - backend "claude-cloud" → router 가 503 명시 (scaffold) - backend "auto" → router 의 rule + LLM triage Unavailable → BackendUnavailable → 503 명시 (silent fallback 0). Rollback: DS_BACKENDS_VIA_ROUTER=false 로 legacy 직접 호출 path. legacy macmini_url / macbook_url / macbook_model 은 fallback 시만 사용. """ # PR-2 신규: llm-router URL. 비면 env LLM_ROUTER_URL 또는 hardcoded default. router_url: str = "" # Legacy fields (DS_BACKENDS_VIA_ROUTER=false 시만 사용) macmini_url: str = "http://100.76.254.116:8801" macbook_url: str = "http://100.118.112.84:8810" macbook_model: str = "mlx-community/Qwen3.6-27B-8bit" timeout_connect_s: int = 5 timeout_read_s: int = 60 class SearchAskReactConfig(BaseModel): """PR-DocSrv-Ask-ToolCalling-ReAct-1: /api/search/ask/react ReAct loop. qwen-macbook only (endpoint 자체가 implicit opt-in). G0-2 counter semantics: max_tool_rounds=2 → LLM 호출 최대 3회 (tool round 2 + final 1), search 실행 최대 2회. """ enabled: bool = True max_tool_rounds: int = 2 search_tool_limit: int = 5 search_tool_mode: str = "hybrid" class SearchAskConfig(BaseModel): backend: SearchAskBackendConfig = SearchAskBackendConfig() react: SearchAskReactConfig = SearchAskReactConfig() class SearchConfig(BaseModel): ask: SearchAskConfig = SearchAskConfig() class AIConfig(BaseModel): gateway_endpoint: str # B-0: 3-tier routing. triage/primary = Mac mini 26B MLX (PR #20 endpoint 통합). fallback = Claude Sonnet 4 API. triage: AIModelConfig primary: AIModelConfig fallback: AIModelConfig premium: AIModelConfig embedding: AIModelConfig rerank: AIModelConfig # Phase 3.5a: answerability classifier (optional — 없으면 score-only gate). PR #20 이후 Mac mini 26B MLX endpoint (initial = exaone3.5). classifier: AIModelConfig | None = None # Phase 3.5b: semantic verifier (optional — 없으면 grounding-only). PR #20 이후 Mac mini 26B MLX endpoint (initial = exaone3.5). verifier: AIModelConfig | None = None # ds-macbook-offload-1: 심층 전용 슬롯 (optional). 맥북 M5 Max Qwen3.6-27B — llm-router :8890 # 경유(model=qwen-macbook alias, wake preflight 재사용). 부재 시 deep_summary 는 기존 # primary(맥미니 26B) 경로 그대로 = 기능 미활성. 명시 opt-in — silent fallback 없음. deep: AIModelConfig | None = None # Legacy: vision 슬롯 (현재 사용처 0 — Document Server 는 OCR/STT 별도 서비스). # 제거 진행 중이므로 optional 로 관대한 로딩 유지. vision: AIModelConfig | None = None # B-1 R2: backlog guard 임계치 deep_summary_backlog: DeepSummaryBacklogConfig = DeepSummaryBacklogConfig() class Settings(BaseModel): # DB database_url: str = "" # AI ai: AIConfig | None = None # PR-MacBook-RAG-Backend-1: /api/search/ask backend dispatcher search: SearchConfig = SearchConfig() # NAS nas_mount_path: str = "/documents" nas_pkm_root: str = "/documents/PKM" # 인증 jwt_secret: str = "" totp_secret: str = "" # Phase 3.5: eval runner shared secret — X-Source=eval / X-Eval-Case-Id 헤더 신뢰 검증. # 비어있으면 모든 eval 헤더 거부 (부재 = 비활성). eval_runner_token: str = "" # kordoc kordoc_endpoint: str = "http://kordoc-service:3100" # OCR (Surya) ocr_endpoint: str = "http://ocr-service:3200" # STT (faster-whisper, §3) stt_endpoint: str = "http://stt-service:3300" # §3 file_watcher: Roon 음원 경로 (prefix match 로 skip). # 빈 문자열이면 skip 없음. 예: "/documents/PKM/../Music/roon-library" 또는 # NFS 경유 별도 마운트된 Roon 라이브러리. roon_library_path: str = "" # KGS Code 등 외부 작성 마크다운 자료 추가 스캔 경로 (PKM 상대 경로, 쉼표 구분). # env: ADDITIONAL_WATCH_TARGETS=Knowledge/Industrial_Safety/가스기사/KGS_Code,... # 모두 expected_category="library" 로 처리 (md/pdf/docx 등 문서 확장자만 수락). # Inbox/Recordings/Videos 기본 스캔 외에 추가만 허용. additional_watch_targets: list[str] = [] # 분류 체계 taxonomy: dict = {} document_types: list[str] = [] # 업로드 한도 (authoritative policy) upload: UploadConfig = UploadConfig() # 생성 LLM 홀드 (2026-06-11): config.yaml pipeline.held_stages 에 든 이름의 # 컨슈머/워커는 claim 자체를 하지 않는다 (attempts 미소모, pending 적체 = 의도). # 유효 키 = 큐 stage 명(classify/summarize/deep_summary) + cron/컨슈머 키(digest, # briefing, study_explanation, study_session_analysis, study_memo_card). # 빈 리스트 = 무동작 (기존 동작 그대로). pipeline_held_stages: list[str] = [] # mlx gate 동시 실행 상한 (2026-06-12, config.yaml pipeline.mlx_gate_concurrency). # 1 = 구 single-inference 동작. 2 = continuous batching 활용 (llm_gate docstring 참조). mlx_gate_concurrency: int = 1 # PR-MacMini-Derived-Worker-1: study explanation owner = Mac mini # GPU 측은 false 로 설정 (.env), explanation 분기 skip guard 트리거. study_explanation_enabled: bool = True # 공부 암기노트 Phase 1: card_extract 폴러/consumer 게이트. owner 분리 시 false 로. study_card_extract_enabled: bool = True # internal endpoint Bearer token (Mac mini derived-worker 호출용) internal_worker_token: str = "" def load_settings() -> Settings: """config.yaml + 환경변수에서 설정 로딩""" # 환경변수 (docker-compose에서 주입) database_url = os.getenv("DATABASE_URL", "") study_explanation_enabled = os.getenv("STUDY_EXPLANATION_ENABLED", "true").lower() in ("1", "true", "yes") study_card_extract_enabled = os.getenv("STUDY_CARD_EXTRACT_ENABLED", "true").lower() in ("1", "true", "yes") internal_worker_token = os.getenv("INTERNAL_WORKER_TOKEN", "") jwt_secret = os.getenv("JWT_SECRET", "") totp_secret = os.getenv("TOTP_SECRET", "") eval_runner_token = os.getenv("EVAL_RUNNER_TOKEN", "") kordoc_endpoint = os.getenv("KORDOC_ENDPOINT", "http://kordoc-service:3100") ocr_endpoint = os.getenv("OCR_ENDPOINT", "http://ocr-service:3200") stt_endpoint = os.getenv("STT_ENDPOINT", "http://stt-service:3300") roon_library_path = os.getenv("ROON_LIBRARY_PATH", "") # ADDITIONAL_WATCH_TARGETS — 쉼표 구분 (공백 제거) awt_raw = os.getenv("ADDITIONAL_WATCH_TARGETS", "") additional_watch_targets = [p.strip() for p in awt_raw.split(",") if p.strip()] # config.yaml — Docker 컨테이너 내부(/app/config.yaml) 또는 프로젝트 루트 config_path = Path("/app/config.yaml") if not config_path.exists(): config_path = Path(__file__).parent.parent.parent / "config.yaml" ai_config = None nas_mount = "/documents" nas_pkm = "/documents/PKM" if config_path.exists(): with open(config_path) as f: raw = yaml.safe_load(f) if "ai" in raw: ai_raw = raw["ai"] models = ai_raw.get("models", {}) # B-0: triage 는 config.yaml 에 없을 수도 있는 신규 슬롯. 구버전 호환을 위해 # 없으면 fallback 를 triage 로 대체 (동일 모델 재사용). triage_raw = models.get("triage") or models.get("fallback") if triage_raw is None: raise ValueError("config.yaml: ai.models.triage (or fallback) required") ai_config = AIConfig( gateway_endpoint=ai_raw.get("gateway", {}).get("endpoint", ""), triage=AIModelConfig(**triage_raw), primary=AIModelConfig(**models["primary"]), fallback=AIModelConfig(**models["fallback"]), premium=AIModelConfig(**models["premium"]), embedding=AIModelConfig(**models["embedding"]), rerank=AIModelConfig(**models["rerank"]), vision=(AIModelConfig(**models["vision"]) if "vision" in models else None), classifier=( AIModelConfig(**models["classifier"]) if "classifier" in models else None ), verifier=( AIModelConfig(**models["verifier"]) if "verifier" in models else None ), deep=(AIModelConfig(**models["deep"]) if "deep" in models else None), deep_summary_backlog=DeepSummaryBacklogConfig( **ai_raw.get("deep_summary_backlog", {}) ), ) if "nas" in raw: nas_mount = raw["nas"].get("mount_path", nas_mount) nas_pkm = raw["nas"].get("pkm_root", nas_pkm) search_cfg = SearchConfig() if config_path.exists() and raw and "search" in raw: ask_raw = (raw.get("search") or {}).get("ask", {}) or {} sb = ask_raw.get("backend", {}) or {} sr = ask_raw.get("react", {}) or {} search_cfg = SearchConfig( ask=SearchAskConfig( backend=SearchAskBackendConfig(**sb), react=SearchAskReactConfig(**sr), ) ) pipeline_held_stages: list[str] = [] mlx_gate_concurrency = 1 if config_path.exists() and raw and "pipeline" in raw: held_raw = (raw.get("pipeline") or {}).get("held_stages") or [] # 스칼라(문자열) 오기입 시 char-split 방지 — 단일 항목 리스트로 수용. if not isinstance(held_raw, (list, tuple)): held_raw = [held_raw] pipeline_held_stages = [str(s) for s in held_raw] try: mlx_gate_concurrency = max( 1, int((raw.get("pipeline") or {}).get("mlx_gate_concurrency", 1)) ) except (TypeError, ValueError): mlx_gate_concurrency = 1 taxonomy = raw.get("taxonomy", {}) if config_path.exists() and raw else {} document_types = raw.get("document_types", []) if config_path.exists() and raw else [] upload_cfg = ( UploadConfig(**raw["upload"]) if config_path.exists() and raw and "upload" in raw else UploadConfig() ) return Settings( database_url=database_url, ai=ai_config, search=search_cfg, nas_mount_path=nas_mount, nas_pkm_root=nas_pkm, jwt_secret=jwt_secret, totp_secret=totp_secret, eval_runner_token=eval_runner_token, kordoc_endpoint=kordoc_endpoint, ocr_endpoint=ocr_endpoint, stt_endpoint=stt_endpoint, roon_library_path=roon_library_path, additional_watch_targets=additional_watch_targets, taxonomy=taxonomy, document_types=document_types, upload=upload_cfg, study_explanation_enabled=study_explanation_enabled, study_card_extract_enabled=study_card_extract_enabled, internal_worker_token=internal_worker_token, pipeline_held_stages=pipeline_held_stages, mlx_gate_concurrency=mlx_gate_concurrency, ) settings = load_settings()