From 3517581eda81456e29788a881f501a46370aafe8 Mon Sep 17 00:00:00 2001 From: Hyungi Ahn Date: Thu, 16 Apr 2026 09:37:41 +0900 Subject: [PATCH] =?UTF-8?q?feat(nanoclaude):=20Tier=202=20=EB=AC=B8?= =?UTF-8?q?=EC=84=9C=20=EC=A0=84=EB=AC=B8=20=EB=B6=84=EC=84=9D=20(analyze)?= =?UTF-8?q?=20+=20=EC=B8=B5=EB=B3=84=20=EB=8B=B5=EB=B3=80=20=EA=B5=AC?= =?UTF-8?q?=EC=A1=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - document_tool.analyze(): /content 엔드포인트로 전문 로드 - worker.py: render_mode=analyze → Gemma 스트리밍 분석 - 프롬프트: [근거] [해설] [사례] [요약] 층 구조 - _pre_route: 문서 ID(3755번, #3755) + 분석 키워드 감지 - registry: analyze operation 허용 Co-Authored-By: Claude Opus 4.6 (1M context) --- nanoclaude/services/worker.py | 49 +++++++++++++++++++++++++++++++ nanoclaude/tools/document_tool.py | 42 ++++++++++++++++++++++++++ nanoclaude/tools/registry.py | 4 ++- 3 files changed, 94 insertions(+), 1 deletion(-) diff --git a/nanoclaude/services/worker.py b/nanoclaude/services/worker.py index 8c02416..46a3d74 100644 --- a/nanoclaude/services/worker.py +++ b/nanoclaude/services/worker.py @@ -180,6 +180,16 @@ def _pre_route(message: str) -> dict | None: folder = "INBOX" return {"action": "tools", "tool": "email", "operation": "search", "params": {"query": query, "days": days, "folder": folder}} + # Tier 2: 특정 문서 ID 명시 + 분석 키워드 → 전문 분석 + import re + doc_id_match = re.search(r'(\d{3,6})\s*번', msg) + if not doc_id_match: + doc_id_match = re.search(r'#(\d{3,6})', msg) + analyze_signals = ["전체", "요약", "분석", "정리", "읽어", "전문", "자세히"] + if doc_id_match and any(s in msg for s in analyze_signals): + return {"action": "tools", "tool": "document", "operation": "analyze", + "params": {"doc_id": doc_id_match.group(1), "query": message}} + # 문서 키워드 — 질문형/탐색형 점수 기반 분기 doc_entry = any(k in msg for k in ["문서", "도큐먼트", "자료", "파일"]) doc_action = any(k in msg for k in ["찾아", "검색", "확인", "알려", "설명", "뭐야"]) @@ -418,6 +428,45 @@ async def run(job: Job) -> None: response = result.get("rendered_text", result.get("summary", "결과를 조회했습니다.")) collected.append(response) await state_stream.push(job.id, "result", {"content": response}) + elif result.get("render_mode") == "analyze": + # Tier 2: 문서 전문 → Gemma 분석 + doc_data = result["data"] + doc_start_analyze = time() + logger.info("Job %s document.analyze doc_id=%s query=%s", job.id, doc_data.get("doc_id"), doc_data.get("query", "")[:80]) + summary_text = doc_data.get("ai_summary") or "(요약 없음)" + analyze_messages = [ + {"role": "system", "content": ( + "너는 산업안전 문서 분석 전문가야. " + "아래 문서를 분석하여 질문에 답해. " + "규칙: " + "1) 문서에 있는 내용만 근거로 삼아라. " + "2) 문서에 없는 내용은 추정하지 말고 '문서에 명시되지 않음'이라고 답해라. " + "3) 답변은 다음 구조로: " + " [근거] 법령/기준 인용 (있으면) " + " [해설] 실무 적용 방법 " + " [사례] 유사 사고/재해 사례 (문서에 있으면) " + " [요약] 왜 중요한지 한 줄 " + "4) 해당 층이 문서에 없으면 그 섹션은 생략해라. " + "5) 순수 텍스트만 (마크다운/코드블록 금지)." + )}, + {"role": "user", "content": ( + f"[문서: {doc_data['title']}]\n" + f"유형: {doc_data.get('document_type', '미분류')}\n" + f"요약: {summary_text}\n\n" + f"{doc_data['content'][:12000]}\n\n" + f"[질문]\n{doc_data['query']}" + )}, + ] + if job.callback == "synology": + await send_to_synology("자료를 분석하고 있습니다...", raw=True) + ok = await _stream_with_cancel( + backend_registry.reasoner, "", job, collected, messages=analyze_messages + ) + logger.info("Job %s document.analyze ok=%s elapsed=%.1fs", job.id, ok, time() - doc_start_analyze) + if not ok: + return + if collected: + await conversation_store.add(user_id, "assistant", "".join(collected)) else: # 결과를 EXAONE에 전달하여 자연어로 정리 (평문 프롬프트 사용) tool_json = json.dumps(result["data"], ensure_ascii=False) diff --git a/nanoclaude/tools/document_tool.py b/nanoclaude/tools/document_tool.py index aedac35..de6d468 100644 --- a/nanoclaude/tools/document_tool.py +++ b/nanoclaude/tools/document_tool.py @@ -14,6 +14,7 @@ TOOL_NAME = "document" MAX_RESULTS = 5 SEARCH_TIMEOUT = 15.0 ASK_TIMEOUT = 35.0 +ANALYZE_TIMEOUT = 60.0 CONFIDENCE_LABELS = {"high": "높음", "medium": "보통", "low": "낮음"} @@ -208,3 +209,44 @@ async def read(doc_id: str) -> dict: except Exception as e: logger.exception("Document read failed") return _make_result(False, "read", error=str(e)) + + +async def analyze(doc_id: str, query: str) -> dict: + """문서 전문 로드 → Gemma 분석용 데이터 준비 (Tier 2).""" + if not settings.document_api_url: + return _make_result(False, "analyze", error="Document Server 설정이 없습니다.") + if not doc_id: + return _make_result(False, "analyze", error="문서 ID가 없습니다.") + + try: + async with httpx.AsyncClient(timeout=ANALYZE_TIMEOUT) as client: + resp = await client.get( + f"{settings.document_api_url}/documents/{doc_id}/content", + headers=_headers(), + ) + if resp.status_code == 404: + return _make_result(False, "analyze", error=f"문서 {doc_id}를 찾을 수 없습니다.") + if resp.status_code != 200: + return _make_result(False, "analyze", error=f"API 응답 오류 ({resp.status_code})") + + doc = resp.json() + + data = { + "doc_id": doc.get("id"), + "title": doc.get("title", ""), + "domain": doc.get("domain", ""), + "document_type": doc.get("document_type", ""), + "ai_summary": doc.get("ai_summary", ""), + "content": doc.get("content", ""), + "truncated": doc.get("truncated", False), + "query": query, + } + return _make_result( + True, "analyze", data=data, + summary=f"문서 분석: {data['title']}", + render_mode="analyze", + ) + + except Exception as e: + logger.exception("Document analyze failed") + return _make_result(False, "analyze", error=str(e)) diff --git a/nanoclaude/tools/registry.py b/nanoclaude/tools/registry.py index c83eb91..13e8f5d 100644 --- a/nanoclaude/tools/registry.py +++ b/nanoclaude/tools/registry.py @@ -20,7 +20,7 @@ ERROR_MESSAGES = { ALLOWED_OPS = { "calendar": {"today", "search", "create_draft", "create_confirmed"}, "email": {"search", "read"}, - "document": {"search", "search_full", "ask", "read"}, + "document": {"search", "search_full", "ask", "read", "analyze"}, "infra": {"status", "health", "disk", "network", "models", "scheduler", "queue", "verify", "restart"}, } @@ -103,6 +103,8 @@ async def _exec_document(operation: str, params: dict) -> dict: return await document_tool.ask(params.get("query", "")) elif operation == "read": return await document_tool.read(params.get("doc_id", "")) + elif operation == "analyze": + return await document_tool.analyze(params.get("doc_id", ""), params.get("query", "")) return _error("document", operation, "미구현")