diff --git a/CLAUDE.md b/CLAUDE.md index ca24504..3eb2660 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,5 +1,20 @@ # hyungi_Document_Server — Claude Code 작업 가이드 +## Infrastructure Reference 📌 + +**Always refer to** `~/.claude/projects/-Users-hyungiahn/memory/infra_inventory.md` for: +- AI model routing (primary / fallback / embedding / rerank / vision) — **the model names below may be stale** +- Machine info, Tailscale IPs, SSH targets +- Docker container topology and compose projects +- Drift log (known Desired vs Actual inconsistencies) +- Verify commands + +**If this file and `infra_inventory.md` disagree, `infra_inventory.md` is authoritative.** Do not change `config.yaml` / `credentials.env` without first updating `infra_inventory.md`. + +**Search experiment soft lock**: During Phase 2 work (search.py refactor, QueryAnalyzer, run_eval.py execution), do **not** run `docker compose restart`, change `config.yaml`, or pull Ollama models. Violating this invalidates the experiment baseline. + +--- + ## 프로젝트 개요 Self-hosted PKM(Personal Knowledge Management) 웹 애플리케이션. diff --git a/app/services/search/fusion_service.py b/app/services/search/fusion_service.py index 7e1e3f5..5c857bf 100644 --- a/app/services/search/fusion_service.py +++ b/app/services/search/fusion_service.py @@ -224,22 +224,23 @@ def get_strategy(name: str) -> FusionStrategy: SOFT_FILTER_MAX_BOOST = 0.05 # plan 룰 (CRITICAL) # ↑ RRF score는 0.01~0.05 범위 (k=60). 상한 초과 시 기존 랭킹 왜곡. # 기존 RRFWithBoost의 legal article boost(0.05)와 동일 최대값 → 일관성. -SOFT_FILTER_DOMAIN_BOOST = 0.03 -SOFT_FILTER_DOCTYPE_BOOST = 0.02 +SOFT_FILTER_DOMAIN_BOOST = 0.01 # 2026-04-08 실측: 0.03은 exact_keyword -0.03 악화 +# ↑ 낮게 잡는 이유: soft_filter는 "같은 도메인 doc을 동등하게 boost" → exact match +# doc의 상대 우위가 손상됨. 0.01 수준이면 fusion 내부 순위 역전 확률 최소. def apply_soft_filter_boost( results: list["SearchResult"], soft_filters: dict | None, ) -> int: - """Phase 2.3 — QueryAnalyzer soft_filters 기반 score boost. + """Phase 2.3 — QueryAnalyzer soft_filters.domain 기반 약한 score boost. - ai_domain / ai_tags 매칭 시 소량 boost 적용. 총 boost는 - SOFT_FILTER_MAX_BOOST(0.05) 상한을 넘지 않음. + ai_domain 정확 매칭 시 SOFT_FILTER_DOMAIN_BOOST(0.01) 1회 가산. + document_type 매칭은 v0.1 평가셋에서 효과 측정 불가 + false positive 많음 → 제외. Args: results: fusion 직후 SearchResult 리스트 (in-place 수정) - soft_filters: query_analysis.soft_filters = {"domain": [...], "document_type": [...]} + soft_filters: query_analysis.soft_filters = {"domain": [...]} Returns: int — boost 적용된 결과 개수 (debug/notes용) @@ -247,35 +248,26 @@ def apply_soft_filter_boost( if not soft_filters: return 0 domain_list = [str(d).lower() for d in soft_filters.get("domain", []) or []] - doctype_list = [str(t).lower() for t in soft_filters.get("document_type", []) or []] - if not domain_list and not doctype_list: + if not domain_list: return 0 boosted_count = 0 for r in results: - boost = 0.0 - - # domain 매칭 — ai_domain 부분 문자열 매칭 (Industrial_Safety/Legislation 같은 경로 매칭) - if domain_list and r.ai_domain: - ai_dom_lower = r.ai_domain.lower() - for d in domain_list: - if d in ai_dom_lower or ai_dom_lower in d: - boost += SOFT_FILTER_DOMAIN_BOOST - break # 한 번만 - - # document_type 매칭 — ai_tags JSON 문자열 또는 ai_domain 내 keyword 탐지 - # (ai_domain에 "Law_Document"는 안 들어감. ai_tags에 law/law_document 같은 태그가 있음.) - # 간단화: ai_domain 경로에 keyword가 포함되면 매칭 - if doctype_list: - hay = (r.ai_domain or "").lower() + " " + (getattr(r, "match_reason", "") or "").lower() - for t in doctype_list: - if t in hay or any(word in hay for word in t.split("_")): - boost += SOFT_FILTER_DOCTYPE_BOOST - break - - if boost > 0: - boost = min(boost, SOFT_FILTER_MAX_BOOST) - r.score += boost + if not r.ai_domain: + continue + ai_dom_lower = r.ai_domain.lower() + # 정확 매칭 또는 subdirectory 매칭 ("Industrial_Safety/Legislation" → "industrial_safety" 매칭) + matched = False + for d in domain_list: + if d == ai_dom_lower: + matched = True + break + # path 레벨 매칭: "industrial_safety/legislation" in "industrial_safety/legislation/act" + if d in ai_dom_lower and "/" in d: + matched = True + break + if matched: + r.score += min(SOFT_FILTER_DOMAIN_BOOST, SOFT_FILTER_MAX_BOOST) boosted_count += 1 # boost 적용 후 재정렬