# hyungi_Document_Server 설정 ai: gateway: endpoint: "http://ai-gateway:8080" models: # ─── 2-tier routing (PR-B) ─── # triage: 상시 분류·요약·근거 선별. GPU Ollama gemma-4b (Q8_0, ~11.6GB). # concurrent OK — llm_gate Semaphore 경유 불필요. triage: endpoint: "http://ollama:11434/v1/chat/completions" model: "gemma4:e4b-it-q8_0" max_tokens: 4096 timeout: 30 context_char_limit: 120000 # primary: 에스컬레이션 전용. 26B MLX (맥미니 Semaphore(1) 보호 대상). primary: endpoint: "http://100.76.254.116:8801/v1/chat/completions" model: "mlx-community/gemma-4-26b-a4b-it-8bit" max_tokens: 8192 timeout: 180 context_char_limit: 260000 # fallback: primary 장애 시 최후 방어선. triage 와 동일 모델 — gemma-4b 로 퇴행 허용. fallback: endpoint: "http://ollama:11434/v1/chat/completions" model: "gemma4:e4b-it-q8_0" max_tokens: 4096 timeout: 120 premium: endpoint: "https://api.anthropic.com/v1/messages" model: "claude-sonnet-4-20250514" max_tokens: 8192 daily_budget_usd: 5.00 require_explicit_trigger: true embedding: endpoint: "http://ollama:11434/api/embeddings" model: "bge-m3" rerank: endpoint: "http://ollama:11434/api/rerank" model: "bge-reranker-v2-m3" # Phase 3.5a answerability classifier. 모델은 gemma4:e4b 로 통일 (exaone 제거 반영). # classifier_service 가 hasattr 체크로 optional 이므로 이 섹션 제거 시 classifier gate # 는 자동 skip (score-only). 지금은 의도적으로 유지. classifier: endpoint: "http://ollama:11434/v1/chat/completions" model: "gemma4:e4b-it-q8_0" max_tokens: 512 timeout: 10 # 제거: vision (미사용) # ─── deep_summary enqueue 폭발 억제 (B-1 R2) ─── # 초기 튜닝 전 deep_summary 큐에 soft escalate 가 과발생하면 MLX 26B 가 포화된다. # 아래 임계치 중 하나라도 초과하면 soft escalate (recommend_deep_summary 만) 를 # suppress. hard escalate (long_context / triage_json_invalid / low_confidence)는 # 절대 suppress 되지 않는다. deep_summary_backlog: ratio_threshold: 0.3 # 지난 window 의 deep_n/classify_n pending_threshold: 5 # deep_summary stage 의 pending+processing window_minutes: 30 nas: mount_path: "/documents" pkm_root: "/documents/PKM" # ─── 업로드 한도 정책 (authoritative) ─── # 프록시(home-caddy 등) request_body 한도는 max_bytes * content_length_slack_ratio 이상 유지. upload: max_bytes: 100000000 # 100 MB (SI). 업로드 실제 제한의 단일 진실 공급원. content_length_slack_ratio: 1.05 # multipart form 오버헤드(헤더/바운더리) 여유. stream_chunk_bytes: 1048576 # 1 MiB 단위 스트리밍 read/write. # ─── 문서 분류 체계 ─── taxonomy: Philosophy: Ethics: [] Metaphysics: [] Epistemology: [] Logic: [] Aesthetics: [] Eastern_Philosophy: [] Western_Philosophy: [] Language: Korean: [] English: [] Japanese: [] Translation: [] Linguistics: [] Engineering: Mechanical: [Piping, HVAC, Equipment] Electrical: [Power, Instrumentation] Chemical: [Process, Material] Civil: [] Network: [Server, Security, Infrastructure] Industrial_Safety: Legislation: [Act, Decree, Foreign_Law, Korea_Law_Archive, Enforcement_Rule, Public_Notice, SAPA] Theory: [Industrial_Safety_General, Safety_Health_Fundamentals] Academic_Papers: [Safety_General, Risk_Assessment_Research] Cases: [Domestic, International] Practice: [Checklist, Contractor_Management, Safety_Education, Emergency_Plan, Patrol_Inspection, Permit_to_Work, PPE, Safety_Plan] Risk_Assessment: [KRAS, JSA, Checklist_Method] Safety_Manager: [Appointment, Duty_Record, Improvement, Inspection, Meeting] Health_Manager: [Appointment, Duty_Record, Ergonomics, Health_Checkup, Mental_Health, MSDS, Work_Environment] Programming: Programming_Language: [Python, JavaScript, Go, Rust] Framework: [FastAPI, SvelteKit, React] DevOps: [Docker, CI_CD, Linux_Administration] AI_ML: [Large_Language_Model, Computer_Vision, Data_Science] Database: [] Software_Architecture: [] General: Reading_Notes: [] Self_Development: [] Business: [] Science: [] History: [] document_types: - Reference - Standard - Manual - Drawing - Template - Note - Academic_Paper - Law_Document - Report - Memo - Checklist - Meeting_Minutes - Specification - 발주서 - 세금계산서 - 명세표 - 도면 - 증명서 - 계획서 - 시방서 schedule: law_monitor: "07:00" mailplus_archive: ["07:00", "18:00"] daily_digest: "20:00" file_watcher_interval_minutes: 5 queue_consumer_interval_minutes: 10