"""Document 도구 — Document Server REST API (read-only).""" from __future__ import annotations import logging import httpx from config import settings logger = logging.getLogger(__name__) TOOL_NAME = "document" MAX_RESULTS = 5 SEARCH_TIMEOUT = 15.0 ASK_TIMEOUT = 35.0 ANALYZE_TIMEOUT = 60.0 CONFIDENCE_LABELS = {"high": "높음", "medium": "보통", "low": "낮음"} def _make_result(ok: bool, operation: str, data=None, summary: str = "", error: str | None = None, **extra) -> dict: result = {"ok": ok, "tool": TOOL_NAME, "operation": operation, "data": data or [], "summary": summary, "error": error} result.update(extra) return result def _headers() -> dict: return {"Authorization": f"Bearer {settings.document_api_token}"} if settings.document_api_token else {} async def search(query: str) -> dict: """문서 하이브리드 검색 (basic).""" if not settings.document_api_url: return _make_result(False, "search", error="Document Server 설정이 없습니다.") try: async with httpx.AsyncClient(timeout=SEARCH_TIMEOUT) as client: resp = await client.get( f"{settings.document_api_url}/search/", params={"q": query, "mode": "hybrid"}, headers=_headers(), ) if resp.status_code != 200: return _make_result(False, "search", error=f"API 응답 오류 ({resp.status_code})") results = resp.json() if isinstance(results, dict): results = results.get("results", results.get("data", [])) results = results[:MAX_RESULTS] items = [] for doc in results: items.append({ "id": doc.get("id", ""), "title": doc.get("title", "(제목 없음)"), "domain": doc.get("domain", ""), "preview": str(doc.get("content", doc.get("snippet", "")))[:200], }) summary = f"'{query}' 검색 결과 {len(items)}건" return _make_result(True, "search", data=items, summary=summary) except Exception as e: logger.exception("Document search failed") return _make_result(False, "search", error=str(e)) async def search_full(query: str) -> dict: """문서 하이브리드 검색 (rerank + analyze 포함).""" if not settings.document_api_url: return _make_result(False, "search_full", error="Document Server 설정이 없습니다.") try: async with httpx.AsyncClient(timeout=SEARCH_TIMEOUT) as client: resp = await client.get( f"{settings.document_api_url}/search/", params={"q": query, "mode": "hybrid", "rerank": "true", "analyze": "true", "limit": "10"}, headers=_headers(), ) if resp.status_code != 200: return _make_result(False, "search_full", error=f"API 응답 오류 ({resp.status_code})") body = resp.json() results = body.get("results", [])[:MAX_RESULTS] confidence = body.get("confidence_signal", 0) items = [] for doc in results: items.append({ "id": doc.get("id", ""), "title": doc.get("title", "(제목 없음)"), "domain": doc.get("domain", ""), "score": doc.get("score", 0), "summary": str(doc.get("ai_summary", ""))[:150], }) summary = f"'{query}' 검색 결과 {len(items)}건 (신뢰도: {confidence:.0%})" return _make_result(True, "search_full", data=items, summary=summary) except Exception as e: logger.exception("Document search_full failed") return _make_result(False, "search_full", error=str(e)) async def ask(query: str) -> dict: """문서 기반 AI 답변 (evidence-grounded synthesis).""" if not settings.document_api_url: return _make_result(False, "ask", error="Document Server 설정이 없습니다.") try: async with httpx.AsyncClient(timeout=ASK_TIMEOUT) as client: resp = await client.get( f"{settings.document_api_url}/search/ask", params={"q": query, "limit": "10"}, headers=_headers(), ) if resp.status_code != 200: return _make_result(False, "ask", error=f"API 응답 오류 ({resp.status_code})") body = resp.json() ai_answer = body.get("ai_answer") refused = body.get("refused", False) completeness = body.get("completeness", "insufficient") confidence = body.get("confidence") citations = body.get("citations", []) results = body.get("results", []) # refused 또는 insufficient → 검색 결과 fallback if refused or not ai_answer or completeness == "insufficient": reason = body.get("no_results_reason", "관련 근거를 찾지 못했습니다.") lines = [reason, ""] if results: lines.append("[관련 문서]") for doc in results[:5]: title = doc.get("title", "(제목 없음)") score = doc.get("score", 0) lines.append(f"- {title} (유사도: {score:.0%})") rendered = "\n".join(lines) return _make_result( True, "ask", data=results[:5], summary=rendered, rendered_text=rendered, render_mode="final", citations=[], confidence=None, ) # 정상 답변 → 포맷팅 conf_label = CONFIDENCE_LABELS.get(confidence, "") lines = [f"[AI 답변] (신뢰도: {conf_label})" if conf_label else "[AI 답변]"] lines.append(ai_answer) if citations: lines.append("") lines.append("[출처]") for c in citations: n = c.get("n", "") title = c.get("title", "") rel = c.get("relevance", 0) lines.append(f"[{n}] {title} (관련도: {rel:.0%})") if completeness == "partial": lines.append("") lines.append("(일부 내용만 확인 가능합니다)") rendered = "\n".join(lines) citation_meta = [{"n": c.get("n"), "title": c.get("title"), "relevance": c.get("relevance"), "doc_id": c.get("doc_id")} for c in citations] return _make_result( True, "ask", data=results[:5], summary=rendered, rendered_text=rendered, render_mode="final", citations=citation_meta, confidence=confidence, ) except httpx.TimeoutException: logger.warning("Document ask timeout") return _make_result(False, "ask", error="답변 생성 시간이 초과되었습니다. 잠시 후 다시 시도해주세요.") except Exception as e: logger.exception("Document ask failed") return _make_result(False, "ask", error=str(e)) async def read(doc_id: str) -> dict: """문서 내용 조회.""" if not settings.document_api_url: return _make_result(False, "read", error="Document Server 설정이 없습니다.") try: async with httpx.AsyncClient(timeout=SEARCH_TIMEOUT) as client: resp = await client.get( f"{settings.document_api_url}/documents/{doc_id}", headers=_headers(), ) if resp.status_code == 404: return _make_result(False, "read", error=f"문서 {doc_id}를 찾을 수 없습니다.") if resp.status_code != 200: return _make_result(False, "read", error=f"API 응답 오류 ({resp.status_code})") doc = resp.json() data = { "id": doc.get("id", ""), "title": doc.get("title", ""), "domain": doc.get("domain", ""), "content": str(doc.get("content", doc.get("markdown_content", "")))[:2000], } return _make_result(True, "read", data=data, summary=f"문서: {data['title']}") except Exception as e: logger.exception("Document read failed") return _make_result(False, "read", error=str(e)) async def analyze(doc_id: str, query: str) -> dict: """문서 전문 로드 → Gemma 분석용 데이터 준비 (Tier 2).""" if not settings.document_api_url: return _make_result(False, "analyze", error="Document Server 설정이 없습니다.") if not doc_id: return _make_result(False, "analyze", error="문서 ID가 없습니다.") try: async with httpx.AsyncClient(timeout=ANALYZE_TIMEOUT) as client: resp = await client.get( f"{settings.document_api_url}/documents/{doc_id}/content", headers=_headers(), ) if resp.status_code == 404: return _make_result(False, "analyze", error=f"문서 {doc_id}를 찾을 수 없습니다.") if resp.status_code != 200: return _make_result(False, "analyze", error=f"API 응답 오류 ({resp.status_code})") doc = resp.json() data = { "doc_id": doc.get("id"), "title": doc.get("title", ""), "domain": doc.get("domain", ""), "document_type": doc.get("document_type", ""), "ai_summary": doc.get("ai_summary", ""), "content": doc.get("content", ""), "truncated": doc.get("truncated", False), "query": query, } return _make_result( True, "analyze", data=data, summary=f"문서 분석: {data['title']}", render_mode="analyze", ) except Exception as e: logger.exception("Document analyze failed") return _make_result(False, "analyze", error=str(e))