36f9fad0af
- document_tool.py: ask() (/api/search/ask 35초 timeout, citation 포맷, refused 시 검색 결과 fallback) + search_full() (rerank+analyze 포함) - registry.py: ALLOWED_OPS에 ask, search_full 추가 - worker.py: 질문/탐색 점수 기반 분기 (ask 강신호 2개 이상), document.ask 전용 35초 timeout, render_mode="final" 시 EXAONE 스킵 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
211 lines
8.1 KiB
Python
211 lines
8.1 KiB
Python
"""Document 도구 — Document Server REST API (read-only)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
|
|
import httpx
|
|
|
|
from config import settings
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
TOOL_NAME = "document"
|
|
MAX_RESULTS = 5
|
|
SEARCH_TIMEOUT = 15.0
|
|
ASK_TIMEOUT = 35.0
|
|
|
|
CONFIDENCE_LABELS = {"high": "높음", "medium": "보통", "low": "낮음"}
|
|
|
|
|
|
def _make_result(ok: bool, operation: str, data=None, summary: str = "", error: str | None = None, **extra) -> dict:
|
|
result = {"ok": ok, "tool": TOOL_NAME, "operation": operation, "data": data or [], "summary": summary, "error": error}
|
|
result.update(extra)
|
|
return result
|
|
|
|
|
|
def _headers() -> dict:
|
|
return {"Authorization": f"Bearer {settings.document_api_token}"} if settings.document_api_token else {}
|
|
|
|
|
|
async def search(query: str) -> dict:
|
|
"""문서 하이브리드 검색 (basic)."""
|
|
if not settings.document_api_url:
|
|
return _make_result(False, "search", error="Document Server 설정이 없습니다.")
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=SEARCH_TIMEOUT) as client:
|
|
resp = await client.get(
|
|
f"{settings.document_api_url}/search/",
|
|
params={"q": query, "mode": "hybrid"},
|
|
headers=_headers(),
|
|
)
|
|
if resp.status_code != 200:
|
|
return _make_result(False, "search", error=f"API 응답 오류 ({resp.status_code})")
|
|
|
|
results = resp.json()
|
|
if isinstance(results, dict):
|
|
results = results.get("results", results.get("data", []))
|
|
|
|
results = results[:MAX_RESULTS]
|
|
|
|
items = []
|
|
for doc in results:
|
|
items.append({
|
|
"id": doc.get("id", ""),
|
|
"title": doc.get("title", "(제목 없음)"),
|
|
"domain": doc.get("domain", ""),
|
|
"preview": str(doc.get("content", doc.get("snippet", "")))[:200],
|
|
})
|
|
|
|
summary = f"'{query}' 검색 결과 {len(items)}건"
|
|
return _make_result(True, "search", data=items, summary=summary)
|
|
|
|
except Exception as e:
|
|
logger.exception("Document search failed")
|
|
return _make_result(False, "search", error=str(e))
|
|
|
|
|
|
async def search_full(query: str) -> dict:
|
|
"""문서 하이브리드 검색 (rerank + analyze 포함)."""
|
|
if not settings.document_api_url:
|
|
return _make_result(False, "search_full", error="Document Server 설정이 없습니다.")
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=SEARCH_TIMEOUT) as client:
|
|
resp = await client.get(
|
|
f"{settings.document_api_url}/search/",
|
|
params={"q": query, "mode": "hybrid", "rerank": "true", "analyze": "true", "limit": "10"},
|
|
headers=_headers(),
|
|
)
|
|
if resp.status_code != 200:
|
|
return _make_result(False, "search_full", error=f"API 응답 오류 ({resp.status_code})")
|
|
|
|
body = resp.json()
|
|
results = body.get("results", [])[:MAX_RESULTS]
|
|
confidence = body.get("confidence_signal", 0)
|
|
|
|
items = []
|
|
for doc in results:
|
|
items.append({
|
|
"id": doc.get("id", ""),
|
|
"title": doc.get("title", "(제목 없음)"),
|
|
"domain": doc.get("domain", ""),
|
|
"score": doc.get("score", 0),
|
|
"summary": str(doc.get("ai_summary", ""))[:150],
|
|
})
|
|
|
|
summary = f"'{query}' 검색 결과 {len(items)}건 (신뢰도: {confidence:.0%})"
|
|
return _make_result(True, "search_full", data=items, summary=summary)
|
|
|
|
except Exception as e:
|
|
logger.exception("Document search_full failed")
|
|
return _make_result(False, "search_full", error=str(e))
|
|
|
|
|
|
async def ask(query: str) -> dict:
|
|
"""문서 기반 AI 답변 (evidence-grounded synthesis)."""
|
|
if not settings.document_api_url:
|
|
return _make_result(False, "ask", error="Document Server 설정이 없습니다.")
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=ASK_TIMEOUT) as client:
|
|
resp = await client.get(
|
|
f"{settings.document_api_url}/search/ask",
|
|
params={"q": query, "limit": "10"},
|
|
headers=_headers(),
|
|
)
|
|
if resp.status_code != 200:
|
|
return _make_result(False, "ask", error=f"API 응답 오류 ({resp.status_code})")
|
|
|
|
body = resp.json()
|
|
|
|
ai_answer = body.get("ai_answer")
|
|
refused = body.get("refused", False)
|
|
completeness = body.get("completeness", "insufficient")
|
|
confidence = body.get("confidence")
|
|
citations = body.get("citations", [])
|
|
results = body.get("results", [])
|
|
|
|
# refused 또는 insufficient → 검색 결과 fallback
|
|
if refused or not ai_answer or completeness == "insufficient":
|
|
reason = body.get("no_results_reason", "관련 근거를 찾지 못했습니다.")
|
|
lines = [reason, ""]
|
|
if results:
|
|
lines.append("[관련 문서]")
|
|
for doc in results[:5]:
|
|
title = doc.get("title", "(제목 없음)")
|
|
score = doc.get("score", 0)
|
|
lines.append(f"- {title} (유사도: {score:.0%})")
|
|
|
|
rendered = "\n".join(lines)
|
|
return _make_result(
|
|
True, "ask", data=results[:5], summary=rendered,
|
|
rendered_text=rendered, render_mode="final",
|
|
citations=[], confidence=None,
|
|
)
|
|
|
|
# 정상 답변 → 포맷팅
|
|
conf_label = CONFIDENCE_LABELS.get(confidence, "")
|
|
lines = [f"[AI 답변] (신뢰도: {conf_label})" if conf_label else "[AI 답변]"]
|
|
lines.append(ai_answer)
|
|
|
|
if citations:
|
|
lines.append("")
|
|
lines.append("[출처]")
|
|
for c in citations:
|
|
n = c.get("n", "")
|
|
title = c.get("title", "")
|
|
rel = c.get("relevance", 0)
|
|
lines.append(f"[{n}] {title} (관련도: {rel:.0%})")
|
|
|
|
if completeness == "partial":
|
|
lines.append("")
|
|
lines.append("(일부 내용만 확인 가능합니다)")
|
|
|
|
rendered = "\n".join(lines)
|
|
citation_meta = [{"n": c.get("n"), "title": c.get("title"), "relevance": c.get("relevance"), "doc_id": c.get("doc_id")} for c in citations]
|
|
|
|
return _make_result(
|
|
True, "ask", data=results[:5], summary=rendered,
|
|
rendered_text=rendered, render_mode="final",
|
|
citations=citation_meta, confidence=confidence,
|
|
)
|
|
|
|
except httpx.TimeoutException:
|
|
logger.warning("Document ask timeout")
|
|
return _make_result(False, "ask", error="답변 생성 시간이 초과되었습니다. 잠시 후 다시 시도해주세요.")
|
|
except Exception as e:
|
|
logger.exception("Document ask failed")
|
|
return _make_result(False, "ask", error=str(e))
|
|
|
|
|
|
async def read(doc_id: str) -> dict:
|
|
"""문서 내용 조회."""
|
|
if not settings.document_api_url:
|
|
return _make_result(False, "read", error="Document Server 설정이 없습니다.")
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=SEARCH_TIMEOUT) as client:
|
|
resp = await client.get(
|
|
f"{settings.document_api_url}/documents/{doc_id}",
|
|
headers=_headers(),
|
|
)
|
|
if resp.status_code == 404:
|
|
return _make_result(False, "read", error=f"문서 {doc_id}를 찾을 수 없습니다.")
|
|
if resp.status_code != 200:
|
|
return _make_result(False, "read", error=f"API 응답 오류 ({resp.status_code})")
|
|
|
|
doc = resp.json()
|
|
data = {
|
|
"id": doc.get("id", ""),
|
|
"title": doc.get("title", ""),
|
|
"domain": doc.get("domain", ""),
|
|
"content": str(doc.get("content", doc.get("markdown_content", "")))[:2000],
|
|
}
|
|
return _make_result(True, "read", data=data, summary=f"문서: {data['title']}")
|
|
|
|
except Exception as e:
|
|
logger.exception("Document read failed")
|
|
return _make_result(False, "read", error=str(e))
|