diff --git a/app/api/search.py b/app/api/search.py index 023ffc9..5d6f559 100644 --- a/app/api/search.py +++ b/app/api/search.py @@ -592,631 +592,3 @@ def _resolve_eval_identity( return "eval", x_eval_case_id -@router.get("/ask", response_model=AskResponse) -async def ask( - q: str, - user: Annotated[User, Depends(get_current_user)], - session: Annotated[AsyncSession, Depends(get_session)], - background_tasks: BackgroundTasks, - limit: int = Query(10, ge=1, le=20, description="synthesis 입력 상한"), - debug: bool = Query(False, description="evidence/synthesis 중간 상태 노출"), - backend: Annotated[ - str | None, - Query( - pattern="^(qwen-macbook|gemma-macmini|mac-mini-default|claude-cloud|auto)$", - description=( - "PR-2 of DS AI routing policy (2026-05-23) — 명시 backend opt-in via llm-router. " - "미지정 = mac-mini-default (gemma-macmini alias, default). " - "'mac-mini-default' = router 가 tier_b (Mac mini gemma-4-26b). " - "'qwen-macbook' = router 가 named upstream (M5 Max Qwen 3.6 27B). " - "'claude-cloud' = router 가 503 provider_not_configured (활성화 별 PR). " - "'auto' = router 의 rule + LLM triage. " - "backend unavailable 시 503 + error_reason=macbook_unavailable / router_* " - "(자동 fallback 없음 — 다시 호출하거나 backend 인자 제거 후 재시도)." - ), - ), - ] = None, - corpus_variant: str | None = Query( - None, - pattern=r"^(prehier|hier_sim_raw|hier_sim_clean)$", - description=( - "⚠️ EVAL-ONLY (Hier-PassageRAG-Diagnose-1). evidence retrieval 의 chunk leg 를 측정 뷰로 " - "교체 — prehier(legacy) | hier_sim_raw | hier_sim_clean. 운영 UI 미사용. " - "미지정 = production corpus_chunks (기존 /ask 동작 동일)." - ), - ), - exact_knn: bool = Query( - False, - description=( - "⚠️ EVAL-ONLY (Hier-PassageRAG-Diagnose-1). vector leg exact KNN (ivfflat 근사 제거). " - "passage 변종 공정 비교용. 운영 미사용. 미지정(false) = 기존 /ask 동작 동일." - ), - ), - x_source: Annotated[str | None, Header(alias="X-Source")] = None, - x_eval_case_id: Annotated[str | None, Header(alias="X-Eval-Case-Id")] = None, - x_eval_token: Annotated[str | None, Header(alias="X-Eval-Token")] = None, -): - """근거 기반 AI 답변 (Phase 3.5a). - - Phase 3.3 기반 + classifier parallel + refusal gate + grounding re-gate. - 실패 경로에서도 `results` 는 항상 반환. - - Phase 3.5 calibration trust boundary (fix2): - - X-Source / X-Eval-Case-Id 는 X-Eval-Token 이 EVAL_RUNNER_TOKEN 와 일치하는 - trusted internal eval runner 에서만 수용된다. - - 일반 client 의 X-Source=eval 시도는 무시되고 source='document_server' 로 강제. - - source != 'eval' 이면 eval_case_id 항상 None. - """ - # 오케스트레이션은 _run_ask 로 분리(라우터=입력+deps 해소만). 동작 무변경 (refactor: ask-orchestrator). - return await _run_ask( - q=q, - user=user, - session=session, - background_tasks=background_tasks, - limit=limit, - debug=debug, - backend=backend, - corpus_variant=corpus_variant, - exact_knn=exact_knn, - x_source=x_source, - x_eval_case_id=x_eval_case_id, - x_eval_token=x_eval_token, - ) - - -async def _run_ask( - q: str, - user: User, - session: AsyncSession, - background_tasks: BackgroundTasks, - limit: int, - debug: bool, - backend: str | None, - corpus_variant: str | None, - exact_knn: bool, - x_source: str | None, - x_eval_case_id: str | None, - x_eval_token: str | None, -) -> "AskResponse | JSONResponse": - """/ask 오케스트레이션 — 검색→evidence/classifier→refusal→synthesis→grounding/verifier→ - 7-tier 재게이트→telemetry. ask() 가 FastAPI deps 해소 후 호출. 동작은 기존 핸들러와 동일.""" - t_total = time.perf_counter() - defense_log: dict = {} # per-layer flag snapshot - source, eval_case_id = _resolve_eval_identity(x_source, x_eval_case_id, x_eval_token) - - # 1. 검색 파이프라인 (corpus_variant/exact_knn = EVAL-ONLY, 미지정 시 기존 동작 동일) - pr = await run_search( - session, q, mode="hybrid", limit=limit, - fusion=DEFAULT_FUSION, rerank=True, analyze=True, - corpus_variant=corpus_variant, exact_knn=exact_knn, - ) - - # 1.5. ask_includable=false 문서를 evidence 입력에서 제외 - # 검색 결과 자체는 유지 (사용자에게 보여줌), evidence만 필터 - if pr.results: - from sqlalchemy import select as sa_select - from models.document import Document as DocModel - ask_doc_ids = set() - excluded_ids = {r.id for r in pr.results} - rows = await session.execute( - sa_select(DocModel.id, DocModel.ask_includable).where( - DocModel.id.in_(excluded_ids) - ) - ) - for doc_id, includable in rows: - if includable is False: - ask_doc_ids.add(doc_id) - evidence_results = [r for r in pr.results if r.id not in ask_doc_ids] - else: - evidence_results = pr.results - - # 2. Evidence + Classifier 병렬 - t_ev = time.perf_counter() - evidence_task = asyncio.create_task(extract_evidence(q, evidence_results)) - - # classifier input: top 3 chunks meta + rerank scores - top_chunks = [ - { - "title": r.title or "", - "section": r.section_title or "", - "snippet": (r.snippet or "")[:200], - } - for r in pr.results[:3] - ] - rerank_scores_top = [ - r.rerank_score if r.rerank_score is not None else r.score - for r in pr.results[:3] - ] - classifier_task = asyncio.create_task( - classify(q, top_chunks, rerank_scores_top) - ) - - evidence, ev_skip = await evidence_task - ev_ms = (time.perf_counter() - t_ev) * 1000 - - # classifier await (timeout 보호 — classifier_service 내부에도 있지만 여기서 이중 보호) - # 2026-05-17: 6s outer wrapper 가 classifier_service.LLM_TIMEOUT_MS (30s) 를 override → 동시 부하 시 - # 거의 모든 classifier 호출 timeout → conservative_refuse(no_classifier) 경로. 15s 로 상향 — classifier - # 가 실제 작동하도록 (단, ask 전체 응답 시간 상한 영향: ev_ms + max(classifier_wait, evidence_extract) + - # synth_ms + verifier 누적). - # 2026-05-17 B-3: 15s 도 동시 부하 시 부족 (classifier_service LLM_TIMEOUT_MS 30s 와 misalign). - # 30s 로 align → classifier 동작 안정. ask 응답 latency 상한 ↑ 의도. - try: - classifier_result = await asyncio.wait_for(classifier_task, timeout=30.0) - except asyncio.CancelledError: - raise # 요청 취소는 전파 — broad except 가 삼키지 않게 명시 (R3) - except Exception: - classifier_result = ClassifierResult("timeout", None, [], [], 0.0) - - defense_log["classifier"] = { - "status": classifier_result.status, - "verdict": classifier_result.verdict, - "covered_aspects": classifier_result.covered_aspects, - "missing_aspects": classifier_result.missing_aspects, - "elapsed_ms": classifier_result.elapsed_ms, - } - - # 3. Refusal gate (multi-signal fusion) - all_rerank_scores = [ - e.rerank_score for e in evidence - ] if evidence else rerank_scores_top - decision = refusal_decide(all_rerank_scores, classifier_result) - - defense_log["score_gate"] = { - "max": max(all_rerank_scores) if all_rerank_scores else 0.0, - "agg_top3": sum(sorted(all_rerank_scores, reverse=True)[:3]), - } - defense_log["refusal"] = { - "refused": decision.refused, - "rule_triggered": decision.rule_triggered, - } - - if decision.refused: - total_ms = (time.perf_counter() - t_total) * 1000 - no_reason = "관련 근거를 찾지 못했습니다." - if not pr.results: - no_reason = "검색 결과가 없습니다." - logger.info( - "ask REFUSED query=%r rule=%s max_score=%.2f total=%.0f", - q[:80], decision.rule_triggered, - max(all_rerank_scores) if all_rerank_scores else 0.0, total_ms, - ) - # telemetry — search + ask_events 두 경로 동시 - background_tasks.add_task( - record_search_event, q, user.id, pr.results, "hybrid", - pr.confidence_signal, pr.analyzer_confidence, - ) - # input_snapshot (디버깅/재현용) - defense_log["input_snapshot"] = { - "query": q, - "top_chunks_preview": [ - {"title": c.get("title", ""), "snippet": c.get("snippet", "")[:100]} - for c in top_chunks[:3] - ], - "answer_preview": None, - } - background_tasks.add_task( - record_ask_event, - q, user.id, "insufficient", "skipped", None, - True, classifier_result.verdict, - max(all_rerank_scores) if all_rerank_scores else 0.0, - sum(sorted(all_rerank_scores, reverse=True)[:3]), - [], len(evidence), 0, - defense_log, int(total_ms), - # Phase E.1 측정 필드 - answer_length=0, - covered_aspects=classifier_result.covered_aspects or None, - missing_aspects=classifier_result.missing_aspects or None, - model_name=resolve_primary_model(), - prompt_version=ASK_PROMPT_VERSION, - # Phase 3.5 calibration - source=source, - eval_case_id=eval_case_id, - ) - debug_obj = None - if debug: - debug_obj = AskDebug( - timing_ms={**pr.timing_ms, "evidence_ms": ev_ms, "ask_total_ms": total_ms}, - search_notes=pr.notes, - confidence_signal=pr.confidence_signal, - evidence_candidate_count=len(evidence), - evidence_kept_count=len(evidence), - evidence_skip_reason=ev_skip, - synthesis_cache_hit=False, - hallucination_flags=[], - defense_layers=defense_log, - ) - return AskResponse( - results=pr.results, - ai_answer=None, - citations=[], - synthesis_status="skipped", - synthesis_ms=0.0, - confidence=None, - refused=True, - no_results_reason=no_reason, - query=q, - total=len(pr.results), - completeness="insufficient", - covered_aspects=classifier_result.covered_aspects or None, - missing_aspects=classifier_result.missing_aspects or None, - # refusal gate 단계에서는 backend 호출 자체가 일어나지 않음 → - # backend_used = None. backend_requested 는 호출자 의도 표시용. - backend_requested=backend, - backend_used=None, - debug=debug_obj, - ) - - # 4. Synthesis (backend dispatcher 적용 — PR-MacBook-RAG-Backend-1) - t_synth = time.perf_counter() - sr = await synthesize(q, evidence, debug=debug, backend=backend) - synth_ms = (time.perf_counter() - t_synth) * 1000 - - # 4.1. backend_unavailable → 503 fail-fast (자동 fallback 금지) - # 명시 opt-in backend (예: qwen-macbook) 가 비가용일 때만 발생. /ask wrapper 는 - # 절대 다른 backend 로 재시도하지 않음. 사용자가 backend 인자 제거 또는 wake 후 재시도. - if sr.status == "backend_unavailable": - backend_requested_val = backend or "gemma-macmini" - total_ms = (time.perf_counter() - t_total) * 1000 - logger.warning( - "ask backend_unavailable backend=%s query=%r total_ms=%.0f flags=%s", - backend_requested_val, q[:80], total_ms, - ",".join(sr.hallucination_flags) if sr.hallucination_flags else "-", - ) - # error_reason 명명 — macbook_unavailable 만 정착 (자동 fallback 부재). - error_reason = ( - "macbook_unavailable" - if backend_requested_val == "qwen-macbook" - else "backend_unavailable" - ) - # telemetry — search 만 기록 (ask_events 는 200 응답 path 전용) - background_tasks.add_task( - record_search_event, q, user.id, pr.results, "hybrid", - pr.confidence_signal, pr.analyzer_confidence, - ) - return JSONResponse( - status_code=503, - content={ - "error": "backend_unavailable", - "error_reason": error_reason, - "backend_requested": backend_requested_val, - "backend_used": None, - "query": q, - "detail": ( - "명시 선택한 backend 가 일시적으로 응답할 수 없습니다. " - "MacBook 깨우거나 backend 인자를 제거하고 (기본 Gemma) 다시 호출하세요." - ), - }, - ) - - # 5. Grounding check + Verifier (조건부 병렬) + re-gate (Phase 3.5b) - grounding = grounding_check(q, sr.answer or "", evidence) - - # verifier skip: grounding strong 2+ OR retrieval 자체가 망함 - grounding_only_strong = [ - f for f in grounding.strong_flags if not f.startswith("verifier_") - ] - max_rerank = max(all_rerank_scores, default=0.0) - if len(grounding_only_strong) >= 2 or max_rerank < 0.2: - verifier_result = VerifierResult("skipped", [], 0.0) - else: - verifier_task = asyncio.create_task( - verify(q, sr.answer or "", evidence) - ) - # 2026-05-17 B-3: 4s outer wait_for 가 verifier_service LLM_TIMEOUT_MS (10s) 를 override - # → classifier 와 동일 패턴 (search.py:522 가 6s→15s swap 했던 case). 10s 로 align. - try: - verifier_result = await asyncio.wait_for(verifier_task, timeout=10.0) - except asyncio.CancelledError: - raise # 요청 취소는 전파 — broad except 가 삼키지 않게 명시 (R3) - except Exception: - verifier_result = VerifierResult("timeout", [], 0.0) - - # Verifier contradictions → grounding flags 머지 (prefix 로 구분, severity 3단계) - for c in verifier_result.contradictions: - if c.severity == "strong": - grounding.strong_flags.append(f"verifier_{c.type}:{c.claim[:30]}") - elif c.severity == "medium": - grounding.weak_flags.append(f"verifier_{c.type}_medium:{c.claim[:30]}") - else: - grounding.weak_flags.append(f"verifier_{c.type}:{c.claim[:30]}") - - defense_log["evidence"] = { - "skip_reason": ev_skip, - "kept_count": len(evidence), - } - defense_log["grounding"] = { - "strong": grounding.strong_flags, - "weak": grounding.weak_flags, - } - defense_log["verifier"] = { - "status": verifier_result.status, - "contradictions_count": len(verifier_result.contradictions), - "strong_count": sum(1 for c in verifier_result.contradictions if c.severity == "strong"), - "medium_count": sum(1 for c in verifier_result.contradictions if c.severity == "medium"), - "elapsed_ms": verifier_result.elapsed_ms, - } - - # ── Re-gate: 7-tier completeness 결정 (Phase 3.5 B2 — Tier 4 신규 삽입, 재번호) ── - # 기존 6-tier (3.5b 4차 리뷰) + Tier 4(g_strong + v_strong_numeric + low_conf → refuse). - # 호환성: defense_layers["re_gate"] 의 string literal 들은 기존 그대로 유지. - # 신규 "refuse(grounding+verifier_numeric)" 만 추가. - completeness: Literal["full", "partial", "insufficient"] = "full" - covered_aspects = classifier_result.covered_aspects or None - missing_aspects = classifier_result.missing_aspects or None - confirmed_items: list[ConfirmedItem] | None = None - - # verifier/grounding strong 구분 - g_strong = [f for f in grounding.strong_flags if not f.startswith("verifier_")] - v_strong = [f for f in grounding.strong_flags if f.startswith("verifier_")] - v_medium = [f for f in grounding.weak_flags if f.startswith("verifier_") and "_medium:" in f] - has_direct_negation = any("direct_negation" in f for f in v_strong) - # Phase 3.5 B2: verifier strong flags 중 numeric_conflict 만 카운트. - # promote(VERIFIER_NUMERIC_PROMOTE=1) 활성 시 critical numeric_conflict 가 strong 으로 승격되며 - # 여기 카운트에 잡힘. promote off 면 항상 0 → Tier 4 활성 안 됨 (기존 동작 유지). - v_strong_numeric = sum( - 1 for f in v_strong if f.startswith("verifier_numeric_conflict") - ) - - # ── Tier 0 (Phase 3.5 fix3): synthesis 자체 실패 처리 ── - # LLM self-refuse, 메커니즘 실패(timeout/parse_failed/llm_error), answer 공백. - # 빈 답에 대해 grounding/verifier flag 가 0건이라 기존 체인이 "else clean" 으로 빠지며 - # completeness="full" 초기값이 보존되던 모순을 여기서 일관되게 차단. - # 과거 baseline(v1-400char) 에서 20(self-refuse)+4(timeout) = 24/223 (10.8%) 해당. - tier0_label = _detect_synthesis_failure(sr) - if tier0_label: - completeness = "insufficient" - sr.answer = None - sr.refused = True - sr.confidence = None - defense_log["re_gate"] = tier0_label - elif len(g_strong) >= 2: - # Tier 1: grounding strong 2+ → refuse - completeness = "insufficient" - sr.answer = None - sr.refused = True - sr.confidence = None - defense_log["re_gate"] = "refuse(grounding_2+strong)" - elif g_strong and has_direct_negation: - # Tier 2: grounding strong + verifier direct_negation → refuse - completeness = "insufficient" - sr.answer = None - sr.refused = True - sr.confidence = None - defense_log["re_gate"] = "refuse(grounding+direct_negation)" - elif g_strong and sr.confidence == "low" and max_rerank < 0.25: - # Tier 3: grounding strong 1 + (low confidence AND weak evidence) → refuse - completeness = "insufficient" - sr.answer = None - sr.refused = True - sr.confidence = None - defense_log["re_gate"] = "refuse(grounding+low_conf+weak_ev)" - elif g_strong and v_strong_numeric >= 1 and sr.confidence == "low": - # Tier 4 (B2 신규): grounding strong + verifier numeric_conflict strong + low conf → refuse. - # verifier strong 단독 refuse 금지 원칙 유지 — g_strong 교차 필수. - completeness = "insufficient" - sr.answer = None - sr.refused = True - sr.confidence = None - defense_log["re_gate"] = "refuse(grounding+verifier_numeric)" - elif g_strong or has_direct_negation: - # Tier 5 (기존 4): grounding strong 1 또는 verifier direct_negation 단독 → partial - completeness = "partial" - sr.confidence = "low" - defense_log["re_gate"] = "partial(strong_or_negation)" - elif v_medium: - # Tier 6 (기존 5): verifier medium 누적 → count 기반 confidence 하향 - medium_count = len(v_medium) - if medium_count >= 3: - sr.confidence = "low" - defense_log["re_gate"] = f"conf_low(medium_x{medium_count})" - elif medium_count == 2 and sr.confidence == "high": - sr.confidence = "medium" - defense_log["re_gate"] = "conf_cap_medium(medium_x2)" - else: - defense_log["re_gate"] = f"medium_x{medium_count}(no_action)" - elif grounding.weak_flags: - # Tier 7 (기존 6): weak → confidence 한 단계 하향 - if sr.confidence == "high": - sr.confidence = "medium" - defense_log["re_gate"] = "conf_lower(weak)" - else: - defense_log["re_gate"] = "clean" - - # Confidence cap from refusal gate (classifier 부재 시 conservative) - if decision.confidence_cap and sr.confidence: - conf_rank = {"low": 0, "medium": 1, "high": 2} - if conf_rank.get(sr.confidence, 0) > conf_rank.get(decision.confidence_cap, 2): - sr.confidence = decision.confidence_cap - - # Partial 이면 max confidence = medium - if completeness == "partial" and sr.confidence == "high": - sr.confidence = "medium" - - sr.hallucination_flags.extend( - [f"strong:{f}" for f in grounding.strong_flags] - + [f"weak:{f}" for f in grounding.weak_flags] - ) - - total_ms = (time.perf_counter() - t_total) * 1000 - - # 6. 응답 구성 - citations = _build_citations(evidence, sr.used_citations) - no_reason = _map_no_results_reason(pr, evidence, ev_skip, sr) - if completeness == "insufficient" and not no_reason: - # Tier 0 경로: synthesis self-refuse 는 LLM 이 준 사유가 가장 정확. - if sr.refused and sr.refuse_reason: - no_reason = sr.refuse_reason - else: - no_reason = "답변 검증에서 복수 오류 감지" - - logger.info( - "ask query=%r results=%d evidence=%d cite=%d synth=%s conf=%s completeness=%s " - "refused=%s grounding_strong=%d grounding_weak=%d ev_ms=%.0f synth_ms=%.0f total=%.0f", - q[:80], len(pr.results), len(evidence), len(citations), - sr.status, sr.confidence or "-", completeness, - sr.refused, len(grounding.strong_flags), len(grounding.weak_flags), - ev_ms, synth_ms, total_ms, - ) - - # 7. telemetry — search + ask_events 두 경로 동시 - background_tasks.add_task( - record_search_event, q, user.id, pr.results, "hybrid", - pr.confidence_signal, pr.analyzer_confidence, - ) - # input_snapshot (디버깅/재현용) - defense_log["input_snapshot"] = { - "query": q, - "top_chunks_preview": [ - {"title": (r.title or "")[:50], "snippet": (r.snippet or "")[:100]} - for r in pr.results[:3] - ], - "answer_preview": (sr.answer or "")[:200], - } - background_tasks.add_task( - record_ask_event, - q, user.id, completeness, sr.status, sr.confidence, - sr.refused, classifier_result.verdict, - max(all_rerank_scores) if all_rerank_scores else 0.0, - sum(sorted(all_rerank_scores, reverse=True)[:3]), - sr.hallucination_flags, len(evidence), len(citations), - defense_log, int(total_ms), - # Phase E.1 측정 필드 - answer_length=len(sr.answer or ""), - covered_aspects=covered_aspects, - missing_aspects=missing_aspects, - model_name=resolve_primary_model(), - prompt_version=ASK_PROMPT_VERSION, - # Phase 3.5 calibration - source=source, - eval_case_id=eval_case_id, - ) - - debug_obj = None - if debug: - timing = dict(pr.timing_ms) - timing["evidence_ms"] = ev_ms - timing["synthesis_ms"] = synth_ms - timing["ask_total_ms"] = total_ms - debug_obj = AskDebug( - timing_ms=timing, - search_notes=pr.notes, - query_analysis=pr.query_analysis, - confidence_signal=pr.confidence_signal, - evidence_candidate_count=len(evidence), - evidence_kept_count=len(evidence), - evidence_skip_reason=ev_skip, - synthesis_cache_hit=sr.cache_hit, - synthesis_raw_preview=sr.raw_preview, - hallucination_flags=sr.hallucination_flags, - defense_layers=defense_log, - ) - - # backend_used: synthesize 가 실제 호출한 backend (backend 인자 그대로 신뢰 OK — - # backend_unavailable 은 위 503 분기에서 이미 return 됨). - backend_used_val = backend or "gemma-macmini" - - return AskResponse( - results=pr.results, - ai_answer=sr.answer, - citations=citations, - synthesis_status=sr.status, - synthesis_ms=sr.elapsed_ms, - confidence=sr.confidence, - refused=sr.refused, - no_results_reason=no_reason, - query=q, - total=len(pr.results), - completeness=completeness, - covered_aspects=covered_aspects, - missing_aspects=missing_aspects, - confirmed_items=confirmed_items, - backend_requested=backend, - backend_used=backend_used_val, - debug=debug_obj, - ) - - -# ─── PR-DocSrv-Ask-ToolCalling-ReAct-1 ──────────────────────────────────── -# /api/search/ask/react — Qwen native tool calling 로 ReAct loop. -# 본 endpoint 는 qwen-macbook only (endpoint 자체가 implicit opt-in). -# MacBook unavailable 시 503 + error_reason=macbook_unavailable. Gemma 자동 fallback X. -# G0-2 counter semantics: max_tool_rounds=2, max LLM calls=3, search exec ≤ 2. -# G0-3 trace exposure: default response 의 debug_trace=None, debug=True 시만 채움. - - -class AskReactRequest(BaseModel): - query: str - debug: bool = False - - -class AskReactResponse(BaseModel): - final_answer: str - iterations: int - partial: bool - sources: list[dict] - debug_trace: list[dict] | None = None - - -@router.post("/ask/react", response_model=AskReactResponse) -async def ask_react( - payload: AskReactRequest, - user: Annotated[User, Depends(get_current_user)], - session: Annotated[AsyncSession, Depends(get_session)], -): - """ReAct loop endpoint (qwen-macbook only, no fallback). - - 호출자가 명시 opt-in 한 endpoint. MacBook 가 sleep / unreachable / 5xx 시 - HTTP 503 + body `{error_reason: "macbook_unavailable", backend: "qwen-macbook"}` - 를 반환한다. Gemma Mac mini 로 자동 fallback 하지 않는다 (정정 4 의 연장). - - request body: - - query: str (사용자 원본 질의) - - debug: bool (default false; true 시 응답 `debug_trace` 채움) - - response body (성공 200): - - final_answer: str (Qwen 종합문, partial 일 수 있음) - - iterations: int (실제 진행된 tool round 수) - - partial: bool (max_tool_rounds 도달 후 LLM content 비었을 때 true) - - sources: list[dict] (검색에서 모인 evidence 메타, id-기준 dedup) - - debug_trace: list[dict] | null (debug=true 시 round 별 trace) - """ - # 지연 import — 순환 의존성 회피 (react_loop 가 api.search.SearchResult 사용 안 함) - from services.llm.backends import BackendUnavailable, get_backend - from services.search.react_loop import agentic_ask_loop - - backend_inst = get_backend("qwen-macbook") - # PR-2 of DS AI routing policy: backend_inst may be RouterBackend (default) - # or QwenMacBookBackend (DS_BACKENDS_VIA_ROUTER=false rollback). Both - # implement generate_with_tools so the ReAct loop is identical. - assert hasattr(backend_inst, "generate_with_tools") - - try: - result = await agentic_ask_loop( - session, - payload.query, - backend=backend_inst, - debug=payload.debug, - ) - except BackendUnavailable as exc: - logger.warning( - "ask_react backend unavailable backend=%s reason=%s", - exc.backend_name, exc.reason, - ) - return JSONResponse( - status_code=503, - content={ - "error_reason": "macbook_unavailable", - "backend_requested": "qwen-macbook", - "backend_used": None, - "detail": exc.reason, - }, - ) - - return AskReactResponse( - final_answer=result.final_answer, - iterations=result.iterations, - partial=result.partial, - sources=result.sources, - debug_trace=result.debug_trace, - )