Files
hyungi_document_server/app/core/config.py
T
hyungi bcf644f893 refactor(search): /api/search/ask dispatcher route via llm-router
PR-2 of DS AI routing policy (2026-05-23, see plan
~/.claude/plans/document-server-ai-cheeky-reddy.md +
memory project_document_server_ai_routing_policy).

DS 의 모든 backend 호출이 llm-router :8890 단일 경유. 정칙 정합:
- 신규 RouterBackend (services/llm/backends.py) — alias 별 router POST
  + requires_gate 분기 (mac-mini-default 만 llm_gate FOREGROUND 보호).
- 기존 GemmaMacMiniBackend + QwenMacBookBackend = legacy 보존
  (DS_BACKENDS_VIA_ROUTER=false rollback safety only). 1주 후 별
  cleanup PR (PR-DS-Backends-Legacy-Cleanup-1) 로 폐기.
- get_backend factory dual-path (env flag) — backward-compat
  (gemma-macmini alias → mac-mini-default 매핑).
- search.py:457 Query pattern 확장: mac-mini-default|claude-cloud|auto
  추가. /ask/react 의 isinstance(QwenMacBookBackend) → hasattr
  duck-typing (RouterBackend + Legacy 모두 generate_with_tools 구현).
- SearchAskBackendConfig 에 router_url 신규 (env LLM_ROUTER_URL 또는
  hardcoded MVP default http://100.76.254.116:8890).
- docker-compose.yml fastapi env 에 LLM_ROUTER_URL +
  DS_BACKENDS_VIA_ROUTER 추가.

AIClient (_call_chat, call_triage, call_primary, call_fallback) 경유
path 는 별 PR (PR-AIClient-Router-Migration-1) — MVP scope C 채택,
회귀 risk 최소화.

Closure (즉시 fixture/matrix):
- factory smoke 6 alias (None/mac-mini-default/gemma-macmini/
  qwen-macbook/claude-cloud/auto) + 1 invalid (nonsense → ValueError).
- live 3 case: mac-mini-default 200 \"pong! 🏓\" + qwen-macbook cold
  502 upstream_502_primary=ConnectError + claude-cloud 503
  provider_not_configured.
- silent fallback 0 + direct M5/Mac mini socket 0
  (RouterBackend 만 router 호출).

Backup: ~/.local/share/ds-routing-pr2-backups/20260523/
(backends.py + config.py + search.py + docker-compose.yml).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 03:41:29 +00:00

266 lines
10 KiB
Python

"""설정 로딩 — config.yaml + credentials.env"""
import os
from pathlib import Path
import yaml
from pydantic import BaseModel
class UploadConfig(BaseModel):
max_bytes: int = 100_000_000
content_length_slack_ratio: float = 1.05
stream_chunk_bytes: int = 1_048_576
# orphan cleanup (`*.uploading` — 크래시/abort 후 잔존물)
orphan_max_age_sec: int = 3600
cleanup_warn_threshold: int = 10
class AIModelConfig(BaseModel):
endpoint: str
model: str
max_tokens: int = 4096
timeout: int = 60
daily_budget_usd: float | None = None
require_explicit_trigger: bool = False
# B-0: 4B/26B 에 부여한 실사용 컨텍스트 상한 (char). triage=120k, primary=260k.
# classify_worker 가 에스컬레이션 판정 시 참고. 0/None 이면 상한 무시.
context_char_limit: int | None = None
class DeepSummaryBacklogConfig(BaseModel):
"""B-1 R2 — deep_summary enqueue 폭발 억제 임계치."""
ratio_threshold: float = 0.3 # 지난 window 의 deep_n/classify_n
pending_threshold: int = 5 # deep_summary pending+processing
window_minutes: int = 30
class SearchAskBackendConfig(BaseModel):
"""PR-2 of DS AI routing policy ([[document-server-ai-routing-policy]], 2026-05-23):
/api/search/ask backend dispatcher 가 llm-router :8890 단일 경유.
- backend 미지정 / "gemma-macmini" / "mac-mini-default" → router 가 tier_b
- backend "qwen-macbook" → router 가 named upstream (M5 Max)
- backend "claude-cloud" → router 가 503 명시 (scaffold)
- backend "auto" → router 의 rule + LLM triage
Unavailable → BackendUnavailable → 503 명시 (silent fallback 0).
Rollback: DS_BACKENDS_VIA_ROUTER=false 로 legacy 직접 호출 path.
legacy macmini_url / macbook_url / macbook_model 은 fallback 시만 사용.
"""
# PR-2 신규: llm-router URL. 비면 env LLM_ROUTER_URL 또는 hardcoded default.
router_url: str = ""
# Legacy fields (DS_BACKENDS_VIA_ROUTER=false 시만 사용)
macmini_url: str = "http://100.76.254.116:8801"
macbook_url: str = "http://100.118.112.84:8810"
macbook_model: str = "mlx-community/Qwen3.6-27B-8bit"
timeout_connect_s: int = 5
timeout_read_s: int = 60
class SearchAskReactConfig(BaseModel):
"""PR-DocSrv-Ask-ToolCalling-ReAct-1: /api/search/ask/react ReAct loop.
qwen-macbook only (endpoint 자체가 implicit opt-in). G0-2 counter semantics:
max_tool_rounds=2 → LLM 호출 최대 3회 (tool round 2 + final 1), search 실행 최대 2회.
"""
enabled: bool = True
max_tool_rounds: int = 2
search_tool_limit: int = 5
search_tool_mode: str = "hybrid"
class SearchAskConfig(BaseModel):
backend: SearchAskBackendConfig = SearchAskBackendConfig()
react: SearchAskReactConfig = SearchAskReactConfig()
class SearchConfig(BaseModel):
ask: SearchAskConfig = SearchAskConfig()
class AIConfig(BaseModel):
gateway_endpoint: str
# B-0: 3-tier routing. triage/primary = Mac mini 26B MLX (PR #20 endpoint 통합). fallback = Claude Sonnet 4 API.
triage: AIModelConfig
primary: AIModelConfig
fallback: AIModelConfig
premium: AIModelConfig
embedding: AIModelConfig
rerank: AIModelConfig
# Phase 3.5a: answerability classifier (optional — 없으면 score-only gate). PR #20 이후 Mac mini 26B MLX endpoint (initial = exaone3.5).
classifier: AIModelConfig | None = None
# Phase 3.5b: semantic verifier (optional — 없으면 grounding-only). PR #20 이후 Mac mini 26B MLX endpoint (initial = exaone3.5).
verifier: AIModelConfig | None = None
# Legacy: vision 슬롯 (현재 사용처 0 — Document Server 는 OCR/STT 별도 서비스).
# 제거 진행 중이므로 optional 로 관대한 로딩 유지.
vision: AIModelConfig | None = None
# B-1 R2: backlog guard 임계치
deep_summary_backlog: DeepSummaryBacklogConfig = DeepSummaryBacklogConfig()
class Settings(BaseModel):
# DB
database_url: str = ""
# AI
ai: AIConfig | None = None
# PR-MacBook-RAG-Backend-1: /api/search/ask backend dispatcher
search: SearchConfig = SearchConfig()
# NAS
nas_mount_path: str = "/documents"
nas_pkm_root: str = "/documents/PKM"
# 인증
jwt_secret: str = ""
totp_secret: str = ""
# Phase 3.5: eval runner shared secret — X-Source=eval / X-Eval-Case-Id 헤더 신뢰 검증.
# 비어있으면 모든 eval 헤더 거부 (부재 = 비활성).
eval_runner_token: str = ""
# kordoc
kordoc_endpoint: str = "http://kordoc-service:3100"
# OCR (Surya)
ocr_endpoint: str = "http://ocr-service:3200"
# STT (faster-whisper, §3)
stt_endpoint: str = "http://stt-service:3300"
# §3 file_watcher: Roon 음원 경로 (prefix match 로 skip).
# 빈 문자열이면 skip 없음. 예: "/documents/PKM/../Music/roon-library" 또는
# NFS 경유 별도 마운트된 Roon 라이브러리.
roon_library_path: str = ""
# KGS Code 등 외부 작성 마크다운 자료 추가 스캔 경로 (PKM 상대 경로, 쉼표 구분).
# env: ADDITIONAL_WATCH_TARGETS=Knowledge/Industrial_Safety/가스기사/KGS_Code,...
# 모두 expected_category="library" 로 처리 (md/pdf/docx 등 문서 확장자만 수락).
# Inbox/Recordings/Videos 기본 스캔 외에 추가만 허용.
additional_watch_targets: list[str] = []
# 분류 체계
taxonomy: dict = {}
document_types: list[str] = []
# 업로드 한도 (authoritative policy)
upload: UploadConfig = UploadConfig()
# PR-MacMini-Derived-Worker-1: study explanation owner = Mac mini
# GPU 측은 false 로 설정 (.env), explanation 분기 skip guard 트리거.
study_explanation_enabled: bool = True
# internal endpoint Bearer token (Mac mini derived-worker 호출용)
internal_worker_token: str = ""
def load_settings() -> Settings:
"""config.yaml + 환경변수에서 설정 로딩"""
# 환경변수 (docker-compose에서 주입)
database_url = os.getenv("DATABASE_URL", "")
study_explanation_enabled = os.getenv("STUDY_EXPLANATION_ENABLED", "true").lower() in ("1", "true", "yes")
internal_worker_token = os.getenv("INTERNAL_WORKER_TOKEN", "")
jwt_secret = os.getenv("JWT_SECRET", "")
totp_secret = os.getenv("TOTP_SECRET", "")
eval_runner_token = os.getenv("EVAL_RUNNER_TOKEN", "")
kordoc_endpoint = os.getenv("KORDOC_ENDPOINT", "http://kordoc-service:3100")
ocr_endpoint = os.getenv("OCR_ENDPOINT", "http://ocr-service:3200")
stt_endpoint = os.getenv("STT_ENDPOINT", "http://stt-service:3300")
roon_library_path = os.getenv("ROON_LIBRARY_PATH", "")
# ADDITIONAL_WATCH_TARGETS — 쉼표 구분 (공백 제거)
awt_raw = os.getenv("ADDITIONAL_WATCH_TARGETS", "")
additional_watch_targets = [p.strip() for p in awt_raw.split(",") if p.strip()]
# config.yaml — Docker 컨테이너 내부(/app/config.yaml) 또는 프로젝트 루트
config_path = Path("/app/config.yaml")
if not config_path.exists():
config_path = Path(__file__).parent.parent.parent / "config.yaml"
ai_config = None
nas_mount = "/documents"
nas_pkm = "/documents/PKM"
if config_path.exists():
with open(config_path) as f:
raw = yaml.safe_load(f)
if "ai" in raw:
ai_raw = raw["ai"]
models = ai_raw.get("models", {})
# B-0: triage 는 config.yaml 에 없을 수도 있는 신규 슬롯. 구버전 호환을 위해
# 없으면 fallback 를 triage 로 대체 (동일 모델 재사용).
triage_raw = models.get("triage") or models.get("fallback")
if triage_raw is None:
raise ValueError("config.yaml: ai.models.triage (or fallback) required")
ai_config = AIConfig(
gateway_endpoint=ai_raw.get("gateway", {}).get("endpoint", ""),
triage=AIModelConfig(**triage_raw),
primary=AIModelConfig(**models["primary"]),
fallback=AIModelConfig(**models["fallback"]),
premium=AIModelConfig(**models["premium"]),
embedding=AIModelConfig(**models["embedding"]),
rerank=AIModelConfig(**models["rerank"]),
vision=(AIModelConfig(**models["vision"]) if "vision" in models else None),
classifier=(
AIModelConfig(**models["classifier"]) if "classifier" in models else None
),
verifier=(
AIModelConfig(**models["verifier"]) if "verifier" in models else None
),
deep_summary_backlog=DeepSummaryBacklogConfig(
**ai_raw.get("deep_summary_backlog", {})
),
)
if "nas" in raw:
nas_mount = raw["nas"].get("mount_path", nas_mount)
nas_pkm = raw["nas"].get("pkm_root", nas_pkm)
search_cfg = SearchConfig()
if config_path.exists() and raw and "search" in raw:
ask_raw = (raw.get("search") or {}).get("ask", {}) or {}
sb = ask_raw.get("backend", {}) or {}
sr = ask_raw.get("react", {}) or {}
search_cfg = SearchConfig(
ask=SearchAskConfig(
backend=SearchAskBackendConfig(**sb),
react=SearchAskReactConfig(**sr),
)
)
taxonomy = raw.get("taxonomy", {}) if config_path.exists() and raw else {}
document_types = raw.get("document_types", []) if config_path.exists() and raw else []
upload_cfg = (
UploadConfig(**raw["upload"])
if config_path.exists() and raw and "upload" in raw
else UploadConfig()
)
return Settings(
database_url=database_url,
ai=ai_config,
search=search_cfg,
nas_mount_path=nas_mount,
nas_pkm_root=nas_pkm,
jwt_secret=jwt_secret,
totp_secret=totp_secret,
eval_runner_token=eval_runner_token,
kordoc_endpoint=kordoc_endpoint,
ocr_endpoint=ocr_endpoint,
stt_endpoint=stt_endpoint,
roon_library_path=roon_library_path,
additional_watch_targets=additional_watch_targets,
taxonomy=taxonomy,
document_types=document_types,
upload=upload_cfg,
study_explanation_enabled=study_explanation_enabled,
internal_worker_token=internal_worker_token,
)
settings = load_settings()