feat(ask): Phase 3.5 A0 — ask_events source/eval_case_id + eval auth boundary

- migrations 138~142: source TEXT DEFAULT 'document_server' + eval_case_id TEXT
  추가, 인덱스 2개, backfill, 1주 관찰 후 NOT NULL (140 적용 분리)
- app/models/ask_event.py: source / eval_case_id ORM 필드 (138~141 단계 nullable)
- app/services/search_telemetry.py: record_ask_event 시그니처에 source / eval_case_id
- app/core/config.py: settings.eval_runner_token + EVAL_RUNNER_TOKEN env 로드
- app/api/search.py:
  - X-Source / X-Eval-Case-Id / X-Eval-Token 헤더 수신
  - _resolve_eval_identity(): hmac.compare_digest 로 token 검증, 실패 시 source
    'document_server' 강등 + warning log + eval_case_id=None
  - 두 record_ask_event 호출에 검증된 source/eval_case_id 전달
- credentials.env.example: EVAL_RUNNER_TOKEN= (empty default = 모든 eval claim 거부)
- tests/test_ask_eval_auth.py: 9 케이스 — token 없음/틀림/일치, env 미설정,
  case_id only, non-eval source forces case_id None

trust boundary: 일반 client 의 X-Source=eval / X-Eval-Case-Id 시도는 무시되어
calibration telemetry 오염 불가. eval runner 만 EVAL_RUNNER_TOKEN 으로 인증.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hyungi Ahn
2026-04-17 08:00:36 +09:00
committed by hyungi
parent 0807574986
commit 09883d0358
11 changed files with 185 additions and 1 deletions
+62 -1
View File
@@ -10,17 +10,20 @@
"""
import asyncio
import hmac
import time
from typing import Annotated, Literal
from fastapi import APIRouter, BackgroundTasks, Depends, Query
from fastapi import APIRouter, BackgroundTasks, Depends, Header, Query
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
from core.auth import get_current_user
from core.config import settings
from core.database import get_session
from core.utils import setup_logger
from models.user import User
from services.document_telemetry import sanitize_source
from services.search.classifier_service import ClassifierResult, classify
from services.search.evidence_service import EvidenceItem, extract_evidence
from services.search.fusion_service import DEFAULT_FUSION
@@ -367,6 +370,48 @@ def _build_ask_debug(
)
def _resolve_eval_identity(
x_source: str | None,
x_eval_case_id: str | None,
x_eval_token: str | None,
) -> tuple[str, str | None]:
"""X-Source/X-Eval-Case-Id 신뢰 검증 (Phase 3.5 fix2).
규칙:
- 기본값: source='document_server', eval_case_id=None
- X-Source=eval 또는 X-Eval-Case-Id 가 들어왔다면 eval claim 으로 간주
- eval claim 은 X-Eval-Token == settings.eval_runner_token 일 때만 수용
(constant-time compare, env 미설정 시 항상 거부)
- 거부 시: 헤더 무시 + warning log + source=sanitize(non-eval) / eval_case_id=None
- 통과 시: source='eval', eval_case_id=x_eval_case_id
반환: (source, eval_case_id)
"""
claimed_source = sanitize_source(x_source)
is_eval_claim = (claimed_source == "eval") or bool(x_eval_case_id)
if not is_eval_claim:
# 일반 호출 — eval_case_id 강제 None (source != 'eval' 이면 case_id 의미 없음)
return claimed_source, None
# eval claim — token 검증
expected = settings.eval_runner_token
presented = x_eval_token or ""
token_valid = bool(expected) and hmac.compare_digest(presented, expected)
if not token_valid:
logger.warning(
"eval header rejected: source=%s case_id=%s token_present=%s expected_set=%s",
x_source, x_eval_case_id, bool(x_eval_token), bool(expected),
)
# 일반 호출로 강등 — source='eval' 주장은 무시, case_id 도 무시
# claimed_source 가 'eval' 이면 default 'document_server' 로
if claimed_source == "eval":
return "document_server", None
return claimed_source, None
# token OK — eval 라벨 수용
return "eval", x_eval_case_id
@router.get("/ask", response_model=AskResponse)
async def ask(
q: str,
@@ -375,14 +420,24 @@ async def ask(
background_tasks: BackgroundTasks,
limit: int = Query(10, ge=1, le=20, description="synthesis 입력 상한"),
debug: bool = Query(False, description="evidence/synthesis 중간 상태 노출"),
x_source: Annotated[str | None, Header(alias="X-Source")] = None,
x_eval_case_id: Annotated[str | None, Header(alias="X-Eval-Case-Id")] = None,
x_eval_token: Annotated[str | None, Header(alias="X-Eval-Token")] = None,
):
"""근거 기반 AI 답변 (Phase 3.5a).
Phase 3.3 기반 + classifier parallel + refusal gate + grounding re-gate.
실패 경로에서도 `results` 는 항상 반환.
Phase 3.5 calibration trust boundary (fix2):
- X-Source / X-Eval-Case-Id 는 X-Eval-Token 이 EVAL_RUNNER_TOKEN 와 일치하는
trusted internal eval runner 에서만 수용된다.
- 일반 client 의 X-Source=eval 시도는 무시되고 source='document_server' 로 강제.
- source != 'eval' 이면 eval_case_id 항상 None.
"""
t_total = time.perf_counter()
defense_log: dict = {} # per-layer flag snapshot
source, eval_case_id = _resolve_eval_identity(x_source, x_eval_case_id, x_eval_token)
# 1. 검색 파이프라인
pr = await run_search(
@@ -500,6 +555,9 @@ async def ask(
missing_aspects=classifier_result.missing_aspects or None,
model_name=resolve_primary_model(),
prompt_version=ASK_PROMPT_VERSION,
# Phase 3.5 calibration
source=source,
eval_case_id=eval_case_id,
)
debug_obj = None
if debug:
@@ -697,6 +755,9 @@ async def ask(
missing_aspects=missing_aspects,
model_name=resolve_primary_model(),
prompt_version=ASK_PROMPT_VERSION,
# Phase 3.5 calibration
source=source,
eval_case_id=eval_case_id,
)
debug_obj = None
+6
View File
@@ -51,6 +51,10 @@ class Settings(BaseModel):
jwt_secret: str = ""
totp_secret: str = ""
# Phase 3.5: eval runner shared secret — X-Source=eval / X-Eval-Case-Id 헤더 신뢰 검증.
# 비어있으면 모든 eval 헤더 거부 (부재 = 비활성).
eval_runner_token: str = ""
# kordoc
kordoc_endpoint: str = "http://kordoc-service:3100"
@@ -71,6 +75,7 @@ def load_settings() -> Settings:
database_url = os.getenv("DATABASE_URL", "")
jwt_secret = os.getenv("JWT_SECRET", "")
totp_secret = os.getenv("TOTP_SECRET", "")
eval_runner_token = os.getenv("EVAL_RUNNER_TOKEN", "")
kordoc_endpoint = os.getenv("KORDOC_ENDPOINT", "http://kordoc-service:3100")
ocr_endpoint = os.getenv("OCR_ENDPOINT", "http://ocr-service:3200")
@@ -127,6 +132,7 @@ def load_settings() -> Settings:
nas_pkm_root=nas_pkm,
jwt_secret=jwt_secret,
totp_secret=totp_secret,
eval_runner_token=eval_runner_token,
kordoc_endpoint=kordoc_endpoint,
ocr_endpoint=ocr_endpoint,
taxonomy=taxonomy,
+4
View File
@@ -39,6 +39,10 @@ class AskEvent(Base):
missing_aspects: Mapped[list[Any] | None] = mapped_column(JSONB)
model_name: Mapped[str | None] = mapped_column(Text)
prompt_version: Mapped[str | None] = mapped_column(Text)
# Phase 3.5 calibration: eval/production 분리 + golden join 키
# 138~141 단계: nullable. 142 적용 후 source 는 NOT NULL (DB 강제, 앱은 항상 채움).
source: Mapped[str | None] = mapped_column(Text)
eval_case_id: Mapped[str | None] = mapped_column(Text)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=datetime.now, nullable=False
)
+9
View File
@@ -333,6 +333,9 @@ async def record_ask_event(
missing_aspects: list[str] | None = None,
model_name: str | None = None,
prompt_version: str | None = None,
# Phase 3.5 calibration: source 분리 + golden join
source: str | None = None,
eval_case_id: str | None = None,
) -> None:
"""ask_events INSERT. background task에서 호출 — 에러 삼킴.
@@ -341,6 +344,10 @@ async def record_ask_event(
- covered_aspects / missing_aspects: classifier 결과 그대로
- model_name: resolve_primary_model() 또는 호출사이트 명시
- prompt_version: ASK_PROMPT_VERSION 상수
Phase 3.5 calibration:
- source: sanitize_source(X-Source 헤더) eval/ui_search/ui_detail/...
- eval_case_id: X-Eval-Case-Id 헤더 (eval 호출만 채움)
"""
try:
async with async_session() as session:
@@ -364,6 +371,8 @@ async def record_ask_event(
missing_aspects=missing_aspects,
model_name=model_name,
prompt_version=prompt_version,
source=source,
eval_case_id=eval_case_id,
)
session.add(row)
await session.commit()
+7
View File
@@ -50,3 +50,10 @@ NYT_API_KEY=
# ─── 국가법령정보센터 (법령 모니터링) ───
LAW_OC=
# ─── Phase 3.5 fix2: eval runner shared secret ───
# /ask 엔드포인트의 X-Source=eval / X-Eval-Case-Id 헤더 신뢰 검증 토큰.
# 비어있거나 클라이언트 X-Eval-Token 와 불일치 시 eval 헤더 거부 (warning log + source='document_server' 강등).
# 충분히 긴 random secret 권장 (예: openssl rand -hex 32).
# scripts/run_eval_ask.py runner 가 동일 값을 X-Eval-Token 헤더로 전송해야 eval telemetry 적재됨.
EVAL_RUNNER_TOKEN=
@@ -0,0 +1 @@
ALTER TABLE ask_events ADD COLUMN IF NOT EXISTS source TEXT DEFAULT 'document_server', ADD COLUMN IF NOT EXISTS eval_case_id TEXT
@@ -0,0 +1 @@
CREATE INDEX IF NOT EXISTS idx_ask_events_source_created ON ask_events(source, created_at DESC)
@@ -0,0 +1 @@
CREATE INDEX IF NOT EXISTS idx_ask_events_eval_case_id ON ask_events(eval_case_id) WHERE eval_case_id IS NOT NULL
@@ -0,0 +1 @@
UPDATE ask_events SET source = 'document_server' WHERE source IS NULL
@@ -0,0 +1 @@
ALTER TABLE ask_events ALTER COLUMN source SET NOT NULL
+92
View File
@@ -0,0 +1,92 @@
"""Phase 3.5 fix2: /ask 의 X-Source / X-Eval-Case-Id trust boundary.
`_resolve_eval_identity()` 단위 테스트.
- token 없음/틀림 + X-Source=eval source='document_server', eval_case_id=None
- token 일치 + X-Source=eval + X-Eval-Case-Id=case_xxx ('eval', 'case_xxx')
- token 틀림 + X-Eval-Case-Id (X-Source 미지정) eval_case_id=None
- 일반 호출 (X-Source=ui_search, no eval headers) ('ui_search', None)
- env 미설정 (eval_runner_token='') 모든 eval claim 거부
"""
from __future__ import annotations
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "app"))
import pytest
@pytest.fixture
def resolve_with_token(monkeypatch):
"""settings.eval_runner_token 을 monkey-patch 해서 _resolve_eval_identity 테스트."""
def _make(token: str):
from core import config as cfg_mod
from api import search as search_mod
# 두 모듈 모두에서 settings 객체 참조하므로 직접 attr 변경
monkeypatch.setattr(search_mod.settings, "eval_runner_token", token)
return search_mod._resolve_eval_identity
return _make
def test_no_token_no_eval_headers_default(resolve_with_token):
"""일반 호출 — eval 헤더 없음, source 기본값."""
resolve = resolve_with_token("secret123")
assert resolve(None, None, None) == ("document_server", None)
def test_normal_source_with_token(resolve_with_token):
"""ui_search 호출 — eval 클레임 아님이라 token 무관."""
resolve = resolve_with_token("secret123")
assert resolve("ui_search", None, None) == ("ui_search", None)
def test_eval_claim_no_token_rejected(resolve_with_token):
"""X-Source=eval 인데 token 없음 → 거부, source='document_server'."""
resolve = resolve_with_token("secret123")
assert resolve("eval", "case_001", None) == ("document_server", None)
def test_eval_claim_wrong_token_rejected(resolve_with_token):
"""token 틀림 → 거부."""
resolve = resolve_with_token("secret123")
assert resolve("eval", "case_001", "wrong_token") == ("document_server", None)
def test_eval_claim_correct_token_accepted(resolve_with_token):
"""token 일치 → 'eval' source + case_id 적재."""
resolve = resolve_with_token("secret123")
assert resolve("eval", "case_001", "secret123") == ("eval", "case_001")
def test_eval_case_id_only_no_source_no_token(resolve_with_token):
"""X-Eval-Case-Id 만 있고 token 없음 → 거부, case_id=None."""
resolve = resolve_with_token("secret123")
assert resolve(None, "case_001", None) == ("document_server", None)
def test_eval_case_id_only_wrong_token(resolve_with_token):
"""X-Eval-Case-Id 만 + token 틀림 → 거부."""
resolve = resolve_with_token("secret123")
assert resolve(None, "case_001", "wrong") == ("document_server", None)
def test_env_unset_rejects_even_correct_format(resolve_with_token):
"""settings.eval_runner_token='' 인 환경 → 모든 eval 클레임 거부."""
resolve = resolve_with_token("")
# token 헤더가 와도 server side 가 비어있으면 거부 (constant-time False)
assert resolve("eval", "case_001", "") == ("document_server", None)
assert resolve("eval", "case_001", "anything") == ("document_server", None)
def test_non_eval_source_forces_case_id_none(resolve_with_token):
"""X-Source=ui_detail + X-Eval-Case-Id (실수로 같이 보냄) → case_id=None.
eval claim 아님 (source != 'eval' 이고 case_id fallback 으로 eval claim 트리거)
이지만 source claim 명시적으로 non-eval 이라 token 검증 case_id None.
"""
resolve = resolve_with_token("secret123")
# case_id 가 있으면 eval claim 으로 처리됨 → token 없으면 거부 → ('ui_detail' 클레임,
# 하지만 거부 분기에서 claimed_source != 'eval' 이라 그대로 'ui_detail' 반환, case_id=None)
assert resolve("ui_detail", "case_001", None) == ("ui_detail", None)