feat(nanoclaude): Tier 2 문서 전문 분석 (analyze) + 층별 답변 구조

- document_tool.analyze(): /content 엔드포인트로 전문 로드 - worker.py: render_mode=analyze → Gemma 스트리밍 분석 - 프롬프트: [근거] [해설] [사례] [요약] 층 구조 - _pre_route: 문서 ID(3755번, #3755) + 분석 키워드 감지 - registry: analyze operation 허용 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-16 09:37:41 +09:00
parent b50aea3f69
commit 3517581eda
3 changed files with 94 additions and 1 deletions
@@ -180,6 +180,16 @@ def _pre_route(message: str) -> dict | None:
            folder = "INBOX"
        return {"action": "tools", "tool": "email", "operation": "search", "params": {"query": query, "days": days, "folder": folder}}
    # Tier 2: 특정 문서 ID 명시 + 분석 키워드 → 전문 분석
    import re
    doc_id_match = re.search(r'(\d{3,6})\s*번', msg)
    if not doc_id_match:
        doc_id_match = re.search(r'#(\d{3,6})', msg)
    analyze_signals = ["전체", "요약", "분석", "정리", "읽어", "전문", "자세히"]
    if doc_id_match and any(s in msg for s in analyze_signals):
        return {"action": "tools", "tool": "document", "operation": "analyze",
                "params": {"doc_id": doc_id_match.group(1), "query": message}}
    # 문서 키워드 — 질문형/탐색형 점수 기반 분기
    doc_entry = any(k in msg for k in ["문서", "도큐먼트", "자료", "파일"])
    doc_action = any(k in msg for k in ["찾아", "검색", "확인", "알려", "설명", "뭐야"])
@@ -418,6 +428,45 @@ async def run(job: Job) -> None:
                        response = result.get("rendered_text", result.get("summary", "결과를 조회했습니다."))
                        collected.append(response)
                        await state_stream.push(job.id, "result", {"content": response})
                    elif result.get("render_mode") == "analyze":
                        # Tier 2: 문서 전문 → Gemma 분석
                        doc_data = result["data"]
                        doc_start_analyze = time()
                        logger.info("Job %s document.analyze doc_id=%s query=%s", job.id, doc_data.get("doc_id"), doc_data.get("query", "")[:80])
                        summary_text = doc_data.get("ai_summary") or "(요약 없음)"
                        analyze_messages = [
                            {"role": "system", "content": (
                                "너는 산업안전 문서 분석 전문가야. "
                                "아래 문서를 분석하여 질문에 답해. "
                                "규칙: "
                                "1) 문서에 있는 내용만 근거로 삼아라. "
                                "2) 문서에 없는 내용은 추정하지 말고 '문서에 명시되지 않음'이라고 답해라. "
                                "3) 답변은 다음 구조로: "
                                "   [근거] 법령/기준 인용 (있으면) "
                                "   [해설] 실무 적용 방법 "
                                "   [사례] 유사 사고/재해 사례 (문서에 있으면) "
                                "   [요약] 왜 중요한지 한 줄 "
                                "4) 해당 층이 문서에 없으면 그 섹션은 생략해라. "
                                "5) 순수 텍스트만 (마크다운/코드블록 금지)."
                            )},
                            {"role": "user", "content": (
                                f"[문서: {doc_data['title']}]\n"
                                f"유형: {doc_data.get('document_type', '미분류')}\n"
                                f"요약: {summary_text}\n\n"
                                f"{doc_data['content'][:12000]}\n\n"
                                f"[질문]\n{doc_data['query']}"
                            )},
                        ]
                        if job.callback == "synology":
                            await send_to_synology("자료를 분석하고 있습니다...", raw=True)
                        ok = await _stream_with_cancel(
                            backend_registry.reasoner, "", job, collected, messages=analyze_messages
                        )
                        logger.info("Job %s document.analyze ok=%s elapsed=%.1fs", job.id, ok, time() - doc_start_analyze)
                        if not ok:
                            return
                        if collected:
                            await conversation_store.add(user_id, "assistant", "".join(collected))
                    else:
                        # 결과를 EXAONE에 전달하여 자연어로 정리 (평문 프롬프트 사용)
                        tool_json = json.dumps(result["data"], ensure_ascii=False)
@@ -14,6 +14,7 @@ TOOL_NAME = "document"
 MAX_RESULTS = 5
 SEARCH_TIMEOUT = 15.0
 ASK_TIMEOUT = 35.0
 ANALYZE_TIMEOUT = 60.0
 CONFIDENCE_LABELS = {"high": "높음", "medium": "보통", "low": "낮음"}
@@ -208,3 +209,44 @@ async def read(doc_id: str) -> dict:
    except Exception as e:
        logger.exception("Document read failed")
        return _make_result(False, "read", error=str(e))
 async def analyze(doc_id: str, query: str) -> dict:
    """문서 전문 로드 → Gemma 분석용 데이터 준비 (Tier 2)."""
    if not settings.document_api_url:
        return _make_result(False, "analyze", error="Document Server 설정이 없습니다.")
    if not doc_id:
        return _make_result(False, "analyze", error="문서 ID가 없습니다.")
    try:
        async with httpx.AsyncClient(timeout=ANALYZE_TIMEOUT) as client:
            resp = await client.get(
                f"{settings.document_api_url}/documents/{doc_id}/content",
                headers=_headers(),
            )
            if resp.status_code == 404:
                return _make_result(False, "analyze", error=f"문서 {doc_id}를 찾을 수 없습니다.")
            if resp.status_code != 200:
                return _make_result(False, "analyze", error=f"API 응답 오류 ({resp.status_code})")
            doc = resp.json()
        data = {
            "doc_id": doc.get("id"),
            "title": doc.get("title", ""),
            "domain": doc.get("domain", ""),
            "document_type": doc.get("document_type", ""),
            "ai_summary": doc.get("ai_summary", ""),
            "content": doc.get("content", ""),
            "truncated": doc.get("truncated", False),
            "query": query,
        }
        return _make_result(
            True, "analyze", data=data,
            summary=f"문서 분석: {data['title']}",
            render_mode="analyze",
        )
    except Exception as e:
        logger.exception("Document analyze failed")
        return _make_result(False, "analyze", error=str(e))
@@ -20,7 +20,7 @@ ERROR_MESSAGES = {
 ALLOWED_OPS = {
    "calendar": {"today", "search", "create_draft", "create_confirmed"},
    "email": {"search", "read"},
-    "document": {"search", "search_full", "ask", "read"},
+    "document": {"search", "search_full", "ask", "read", "analyze"},
    "infra": {"status", "health", "disk", "network", "models", "scheduler", "queue", "verify", "restart"},
 }
@@ -103,6 +103,8 @@ async def _exec_document(operation: str, params: dict) -> dict:
        return await document_tool.ask(params.get("query", ""))
    elif operation == "read":
        return await document_tool.read(params.get("doc_id", ""))
    elif operation == "analyze":
        return await document_tool.analyze(params.get("doc_id", ""), params.get("query", ""))
    return _error("document", operation, "미구현")