feat(search): domain_bucket scope filter on AxisFilter (include/exclude)
검색 retrieval 에 domain_bucket(377) 포함/제외 필터 추가. - AxisFilter.domain_buckets(= ANY) / exclude_buckets(<> ALL) + active() - _axis_sql 2절 — 전 leg documents alias(d / chunk df JOIN) 경유, 미지정시 byte-불변(무회귀) - search.py: domain_bucket / exclude_bucket Query 파라미터(CSV) 검증: exclude_bucket=News → News 0건(금리 10→0·인공지능 15→0·반도체 11→0), domain_bucket=Safety → Knowledge/Industrial_Safety 드리프트까지 정규화 포함. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -211,6 +211,8 @@ async def search(
|
||||
None, description="안전 자료실 C-1: 관할 필터 (KR/US/EU/JP/GB/INT)"),
|
||||
year_from: int | None = Query(None, ge=1900, le=2100, description="published_date 연도 하한 (NULL=created_at fallback)"),
|
||||
year_to: int | None = Query(None, ge=1900, le=2100, description="published_date 연도 상한"),
|
||||
domain_bucket: str | None = Query(None, description="377: domain_bucket 스코프 CSV (Safety,Engineering,Law,Philosophy,Programming,General,News). domain_bucket = ANY"),
|
||||
exclude_bucket: str | None = Query(None, description="377: domain_bucket 제외 CSV (예: News). 지식질의 시 News 기본제외용"),
|
||||
facets: bool = Query(False, description="안전 자료실 C-1 후속: top-K 결과 분류 축 분포(material_type/jurisdiction/version_status)를 응답 facets 에 집계. 미지정=계산/노출 0"),
|
||||
):
|
||||
"""문서 검색 — FTS + ILIKE + 벡터 결합 (Phase 3.1 이후 run_search wrapper)"""
|
||||
@@ -221,6 +223,8 @@ async def search(
|
||||
jurisdiction=jurisdiction,
|
||||
year_from=year_from,
|
||||
year_to=year_to,
|
||||
domain_buckets=[b.strip() for b in domain_bucket.split(",") if b.strip()] if domain_bucket else None,
|
||||
exclude_buckets=[b.strip() for b in exclude_bucket.split(",") if b.strip()] if exclude_bucket else None,
|
||||
)
|
||||
pr = await run_search(
|
||||
session,
|
||||
|
||||
@@ -76,10 +76,13 @@ class AxisFilter:
|
||||
jurisdiction: str | None = None
|
||||
year_from: int | None = None
|
||||
year_to: int | None = None
|
||||
domain_buckets: list[str] | None = None # 377: domain_bucket = ANY (도메인 스코프)
|
||||
exclude_buckets: list[str] | None = None # 377: domain_bucket <> ALL (예: News 제외)
|
||||
|
||||
def active(self) -> bool:
|
||||
return bool(self.material_types or self.jurisdiction
|
||||
or self.year_from is not None or self.year_to is not None)
|
||||
or self.year_from is not None or self.year_to is not None
|
||||
or self.domain_buckets or self.exclude_buckets)
|
||||
|
||||
|
||||
def _axis_sql(alias: str, af: "AxisFilter | None", params: dict) -> str:
|
||||
@@ -104,6 +107,12 @@ def _axis_sql(alias: str, af: "AxisFilter | None", params: dict) -> str:
|
||||
if af.year_to is not None:
|
||||
cl.append(f"COALESCE({p}published_date, {p}created_at::date) <= make_date(:af_yt, 12, 31)")
|
||||
params["af_yt"] = af.year_to
|
||||
if af.domain_buckets:
|
||||
cl.append(f"{p}domain_bucket = ANY(:af_db)")
|
||||
params["af_db"] = af.domain_buckets
|
||||
if af.exclude_buckets:
|
||||
cl.append(f"{p}domain_bucket <> ALL(:af_xdb)")
|
||||
params["af_xdb"] = af.exclude_buckets
|
||||
return " AND " + " AND ".join(cl)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user