From 3517581eda81456e29788a881f501a46370aafe8 Mon Sep 17 00:00:00 2001
From: Hyungi Ahn <hyungiahn@Hyungiui-MacBookPro.local>
Date: Thu, 16 Apr 2026 09:37:41 +0900
Subject: [PATCH] =?UTF-8?q?feat(nanoclaude):=20Tier=202=20=EB=AC=B8?=
 =?UTF-8?q?=EC=84=9C=20=EC=A0=84=EB=AC=B8=20=EB=B6=84=EC=84=9D=20(analyze)?=
 =?UTF-8?q?=20+=20=EC=B8=B5=EB=B3=84=20=EB=8B=B5=EB=B3=80=20=EA=B5=AC?=
 =?UTF-8?q?=EC=A1=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- document_tool.analyze(): /content 엔드포인트로 전문 로드
- worker.py: render_mode=analyze → Gemma 스트리밍 분석
- 프롬프트: [근거] [해설] [사례] [요약] 층 구조
- _pre_route: 문서 ID(3755번, #3755) + 분석 키워드 감지
- registry: analyze operation 허용

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 nanoclaude/services/worker.py     | 49 +++++++++++++++++++++++++++++++
 nanoclaude/tools/document_tool.py | 42 ++++++++++++++++++++++++++
 nanoclaude/tools/registry.py      |  4 ++-
 3 files changed, 94 insertions(+), 1 deletion(-)

diff --git a/nanoclaude/services/worker.py b/nanoclaude/services/worker.py
index 8c02416..46a3d74 100644
--- a/nanoclaude/services/worker.py
+++ b/nanoclaude/services/worker.py
@@ -180,6 +180,16 @@ def _pre_route(message: str) -> dict | None:
             folder = "INBOX"
         return {"action": "tools", "tool": "email", "operation": "search", "params": {"query": query, "days": days, "folder": folder}}
 
+    # Tier 2: 특정 문서 ID 명시 + 분석 키워드 → 전문 분석
+    import re
+    doc_id_match = re.search(r'(\d{3,6})\s*번', msg)
+    if not doc_id_match:
+        doc_id_match = re.search(r'#(\d{3,6})', msg)
+    analyze_signals = ["전체", "요약", "분석", "정리", "읽어", "전문", "자세히"]
+    if doc_id_match and any(s in msg for s in analyze_signals):
+        return {"action": "tools", "tool": "document", "operation": "analyze",
+                "params": {"doc_id": doc_id_match.group(1), "query": message}}
+
     # 문서 키워드 — 질문형/탐색형 점수 기반 분기
     doc_entry = any(k in msg for k in ["문서", "도큐먼트", "자료", "파일"])
     doc_action = any(k in msg for k in ["찾아", "검색", "확인", "알려", "설명", "뭐야"])
@@ -418,6 +428,45 @@ async def run(job: Job) -> None:
                         response = result.get("rendered_text", result.get("summary", "결과를 조회했습니다."))
                         collected.append(response)
                         await state_stream.push(job.id, "result", {"content": response})
+                    elif result.get("render_mode") == "analyze":
+                        # Tier 2: 문서 전문 → Gemma 분석
+                        doc_data = result["data"]
+                        doc_start_analyze = time()
+                        logger.info("Job %s document.analyze doc_id=%s query=%s", job.id, doc_data.get("doc_id"), doc_data.get("query", "")[:80])
+                        summary_text = doc_data.get("ai_summary") or "(요약 없음)"
+                        analyze_messages = [
+                            {"role": "system", "content": (
+                                "너는 산업안전 문서 분석 전문가야. "
+                                "아래 문서를 분석하여 질문에 답해. "
+                                "규칙: "
+                                "1) 문서에 있는 내용만 근거로 삼아라. "
+                                "2) 문서에 없는 내용은 추정하지 말고 '문서에 명시되지 않음'이라고 답해라. "
+                                "3) 답변은 다음 구조로: "
+                                "   [근거] 법령/기준 인용 (있으면) "
+                                "   [해설] 실무 적용 방법 "
+                                "   [사례] 유사 사고/재해 사례 (문서에 있으면) "
+                                "   [요약] 왜 중요한지 한 줄 "
+                                "4) 해당 층이 문서에 없으면 그 섹션은 생략해라. "
+                                "5) 순수 텍스트만 (마크다운/코드블록 금지)."
+                            )},
+                            {"role": "user", "content": (
+                                f"[문서: {doc_data['title']}]\n"
+                                f"유형: {doc_data.get('document_type', '미분류')}\n"
+                                f"요약: {summary_text}\n\n"
+                                f"{doc_data['content'][:12000]}\n\n"
+                                f"[질문]\n{doc_data['query']}"
+                            )},
+                        ]
+                        if job.callback == "synology":
+                            await send_to_synology("자료를 분석하고 있습니다...", raw=True)
+                        ok = await _stream_with_cancel(
+                            backend_registry.reasoner, "", job, collected, messages=analyze_messages
+                        )
+                        logger.info("Job %s document.analyze ok=%s elapsed=%.1fs", job.id, ok, time() - doc_start_analyze)
+                        if not ok:
+                            return
+                        if collected:
+                            await conversation_store.add(user_id, "assistant", "".join(collected))
                     else:
                         # 결과를 EXAONE에 전달하여 자연어로 정리 (평문 프롬프트 사용)
                         tool_json = json.dumps(result["data"], ensure_ascii=False)
diff --git a/nanoclaude/tools/document_tool.py b/nanoclaude/tools/document_tool.py
index aedac35..de6d468 100644
--- a/nanoclaude/tools/document_tool.py
+++ b/nanoclaude/tools/document_tool.py
@@ -14,6 +14,7 @@ TOOL_NAME = "document"
 MAX_RESULTS = 5
 SEARCH_TIMEOUT = 15.0
 ASK_TIMEOUT = 35.0
+ANALYZE_TIMEOUT = 60.0
 
 CONFIDENCE_LABELS = {"high": "높음", "medium": "보통", "low": "낮음"}
 
@@ -208,3 +209,44 @@ async def read(doc_id: str) -> dict:
     except Exception as e:
         logger.exception("Document read failed")
         return _make_result(False, "read", error=str(e))
+
+
+async def analyze(doc_id: str, query: str) -> dict:
+    """문서 전문 로드 → Gemma 분석용 데이터 준비 (Tier 2)."""
+    if not settings.document_api_url:
+        return _make_result(False, "analyze", error="Document Server 설정이 없습니다.")
+    if not doc_id:
+        return _make_result(False, "analyze", error="문서 ID가 없습니다.")
+
+    try:
+        async with httpx.AsyncClient(timeout=ANALYZE_TIMEOUT) as client:
+            resp = await client.get(
+                f"{settings.document_api_url}/documents/{doc_id}/content",
+                headers=_headers(),
+            )
+            if resp.status_code == 404:
+                return _make_result(False, "analyze", error=f"문서 {doc_id}를 찾을 수 없습니다.")
+            if resp.status_code != 200:
+                return _make_result(False, "analyze", error=f"API 응답 오류 ({resp.status_code})")
+
+            doc = resp.json()
+
+        data = {
+            "doc_id": doc.get("id"),
+            "title": doc.get("title", ""),
+            "domain": doc.get("domain", ""),
+            "document_type": doc.get("document_type", ""),
+            "ai_summary": doc.get("ai_summary", ""),
+            "content": doc.get("content", ""),
+            "truncated": doc.get("truncated", False),
+            "query": query,
+        }
+        return _make_result(
+            True, "analyze", data=data,
+            summary=f"문서 분석: {data['title']}",
+            render_mode="analyze",
+        )
+
+    except Exception as e:
+        logger.exception("Document analyze failed")
+        return _make_result(False, "analyze", error=str(e))
diff --git a/nanoclaude/tools/registry.py b/nanoclaude/tools/registry.py
index c83eb91..13e8f5d 100644
--- a/nanoclaude/tools/registry.py
+++ b/nanoclaude/tools/registry.py
@@ -20,7 +20,7 @@ ERROR_MESSAGES = {
 ALLOWED_OPS = {
     "calendar": {"today", "search", "create_draft", "create_confirmed"},
     "email": {"search", "read"},
-    "document": {"search", "search_full", "ask", "read"},
+    "document": {"search", "search_full", "ask", "read", "analyze"},
     "infra": {"status", "health", "disk", "network", "models", "scheduler", "queue", "verify", "restart"},
 }
 
@@ -103,6 +103,8 @@ async def _exec_document(operation: str, params: dict) -> dict:
         return await document_tool.ask(params.get("query", ""))
     elif operation == "read":
         return await document_tool.read(params.get("doc_id", ""))
+    elif operation == "analyze":
+        return await document_tool.analyze(params.get("doc_id", ""), params.get("query", ""))
     return _error("document", operation, "미구현")