feat(search): domain_bucket scope filter on AxisFilter (include/exclude)

검색 retrieval 에 domain_bucket(377) 포함/제외 필터 추가.
- AxisFilter.domain_buckets(= ANY) / exclude_buckets(<> ALL) + active()
- _axis_sql 2절 — 전 leg documents alias(d / chunk df JOIN) 경유, 미지정시 byte-불변(무회귀)
- search.py: domain_bucket / exclude_bucket Query 파라미터(CSV)
검증: exclude_bucket=News → News 0건(금리 10→0·인공지능 15→0·반도체 11→0),
domain_bucket=Safety → Knowledge/Industrial_Safety 드리프트까지 정규화 포함.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
hyungi
2026-06-29 04:35:12 +00:00
parent 94b172e314
commit 0c7211e24b
2 changed files with 14 additions and 1 deletions
+4
View File
@@ -211,6 +211,8 @@ async def search(
None, description="안전 자료실 C-1: 관할 필터 (KR/US/EU/JP/GB/INT)"), None, description="안전 자료실 C-1: 관할 필터 (KR/US/EU/JP/GB/INT)"),
year_from: int | None = Query(None, ge=1900, le=2100, description="published_date 연도 하한 (NULL=created_at fallback)"), year_from: int | None = Query(None, ge=1900, le=2100, description="published_date 연도 하한 (NULL=created_at fallback)"),
year_to: int | None = Query(None, ge=1900, le=2100, description="published_date 연도 상한"), year_to: int | None = Query(None, ge=1900, le=2100, description="published_date 연도 상한"),
domain_bucket: str | None = Query(None, description="377: domain_bucket 스코프 CSV (Safety,Engineering,Law,Philosophy,Programming,General,News). domain_bucket = ANY"),
exclude_bucket: str | None = Query(None, description="377: domain_bucket 제외 CSV (예: News). 지식질의 시 News 기본제외용"),
facets: bool = Query(False, description="안전 자료실 C-1 후속: top-K 결과 분류 축 분포(material_type/jurisdiction/version_status)를 응답 facets 에 집계. 미지정=계산/노출 0"), facets: bool = Query(False, description="안전 자료실 C-1 후속: top-K 결과 분류 축 분포(material_type/jurisdiction/version_status)를 응답 facets 에 집계. 미지정=계산/노출 0"),
): ):
"""문서 검색 — FTS + ILIKE + 벡터 결합 (Phase 3.1 이후 run_search wrapper)""" """문서 검색 — FTS + ILIKE + 벡터 결합 (Phase 3.1 이후 run_search wrapper)"""
@@ -221,6 +223,8 @@ async def search(
jurisdiction=jurisdiction, jurisdiction=jurisdiction,
year_from=year_from, year_from=year_from,
year_to=year_to, year_to=year_to,
domain_buckets=[b.strip() for b in domain_bucket.split(",") if b.strip()] if domain_bucket else None,
exclude_buckets=[b.strip() for b in exclude_bucket.split(",") if b.strip()] if exclude_bucket else None,
) )
pr = await run_search( pr = await run_search(
session, session,
+10 -1
View File
@@ -76,10 +76,13 @@ class AxisFilter:
jurisdiction: str | None = None jurisdiction: str | None = None
year_from: int | None = None year_from: int | None = None
year_to: int | None = None year_to: int | None = None
domain_buckets: list[str] | None = None # 377: domain_bucket = ANY (도메인 스코프)
exclude_buckets: list[str] | None = None # 377: domain_bucket <> ALL (예: News 제외)
def active(self) -> bool: def active(self) -> bool:
return bool(self.material_types or self.jurisdiction return bool(self.material_types or self.jurisdiction
or self.year_from is not None or self.year_to is not None) or self.year_from is not None or self.year_to is not None
or self.domain_buckets or self.exclude_buckets)
def _axis_sql(alias: str, af: "AxisFilter | None", params: dict) -> str: def _axis_sql(alias: str, af: "AxisFilter | None", params: dict) -> str:
@@ -104,6 +107,12 @@ def _axis_sql(alias: str, af: "AxisFilter | None", params: dict) -> str:
if af.year_to is not None: if af.year_to is not None:
cl.append(f"COALESCE({p}published_date, {p}created_at::date) <= make_date(:af_yt, 12, 31)") cl.append(f"COALESCE({p}published_date, {p}created_at::date) <= make_date(:af_yt, 12, 31)")
params["af_yt"] = af.year_to params["af_yt"] = af.year_to
if af.domain_buckets:
cl.append(f"{p}domain_bucket = ANY(:af_db)")
params["af_db"] = af.domain_buckets
if af.exclude_buckets:
cl.append(f"{p}domain_bucket <> ALL(:af_xdb)")
params["af_xdb"] = af.exclude_buckets
return " AND " + " AND ".join(cl) return " AND " + " AND ".join(cl)