From 0c7211e24b0cc835fea3e30457eac8ed7e23bf6d Mon Sep 17 00:00:00 2001 From: hyungi Date: Mon, 29 Jun 2026 04:35:12 +0000 Subject: [PATCH] feat(search): domain_bucket scope filter on AxisFilter (include/exclude) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 검색 retrieval 에 domain_bucket(377) 포함/제외 필터 추가. - AxisFilter.domain_buckets(= ANY) / exclude_buckets(<> ALL) + active() - _axis_sql 2절 — 전 leg documents alias(d / chunk df JOIN) 경유, 미지정시 byte-불변(무회귀) - search.py: domain_bucket / exclude_bucket Query 파라미터(CSV) 검증: exclude_bucket=News → News 0건(금리 10→0·인공지능 15→0·반도체 11→0), domain_bucket=Safety → Knowledge/Industrial_Safety 드리프트까지 정규화 포함. Co-Authored-By: Claude Opus 4.8 (1M context) --- app/api/search.py | 4 ++++ app/services/search/retrieval_service.py | 11 ++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/app/api/search.py b/app/api/search.py index c4fb1c7..50add2b 100644 --- a/app/api/search.py +++ b/app/api/search.py @@ -211,6 +211,8 @@ async def search( None, description="안전 자료실 C-1: 관할 필터 (KR/US/EU/JP/GB/INT)"), year_from: int | None = Query(None, ge=1900, le=2100, description="published_date 연도 하한 (NULL=created_at fallback)"), year_to: int | None = Query(None, ge=1900, le=2100, description="published_date 연도 상한"), + domain_bucket: str | None = Query(None, description="377: domain_bucket 스코프 CSV (Safety,Engineering,Law,Philosophy,Programming,General,News). domain_bucket = ANY"), + exclude_bucket: str | None = Query(None, description="377: domain_bucket 제외 CSV (예: News). 지식질의 시 News 기본제외용"), facets: bool = Query(False, description="안전 자료실 C-1 후속: top-K 결과 분류 축 분포(material_type/jurisdiction/version_status)를 응답 facets 에 집계. 미지정=계산/노출 0"), ): """문서 검색 — FTS + ILIKE + 벡터 결합 (Phase 3.1 이후 run_search wrapper)""" @@ -221,6 +223,8 @@ async def search( jurisdiction=jurisdiction, year_from=year_from, year_to=year_to, + domain_buckets=[b.strip() for b in domain_bucket.split(",") if b.strip()] if domain_bucket else None, + exclude_buckets=[b.strip() for b in exclude_bucket.split(",") if b.strip()] if exclude_bucket else None, ) pr = await run_search( session, diff --git a/app/services/search/retrieval_service.py b/app/services/search/retrieval_service.py index 4f22eec..936ed1e 100644 --- a/app/services/search/retrieval_service.py +++ b/app/services/search/retrieval_service.py @@ -76,10 +76,13 @@ class AxisFilter: jurisdiction: str | None = None year_from: int | None = None year_to: int | None = None + domain_buckets: list[str] | None = None # 377: domain_bucket = ANY (도메인 스코프) + exclude_buckets: list[str] | None = None # 377: domain_bucket <> ALL (예: News 제외) def active(self) -> bool: return bool(self.material_types or self.jurisdiction - or self.year_from is not None or self.year_to is not None) + or self.year_from is not None or self.year_to is not None + or self.domain_buckets or self.exclude_buckets) def _axis_sql(alias: str, af: "AxisFilter | None", params: dict) -> str: @@ -104,6 +107,12 @@ def _axis_sql(alias: str, af: "AxisFilter | None", params: dict) -> str: if af.year_to is not None: cl.append(f"COALESCE({p}published_date, {p}created_at::date) <= make_date(:af_yt, 12, 31)") params["af_yt"] = af.year_to + if af.domain_buckets: + cl.append(f"{p}domain_bucket = ANY(:af_db)") + params["af_db"] = af.domain_buckets + if af.exclude_buckets: + cl.append(f"{p}domain_bucket <> ALL(:af_xdb)") + params["af_xdb"] = af.exclude_buckets return " AND " + " AND ".join(cl)