feat(api): Phase D.5 — POST /documents/{id}/analyze 문서 분석 엔드포인트

전문 15,000자 → Gemma 4 구조화 분석 (근거/해설/사례/요약 4층). - MLX gate + 20초 timeout (gate 안쪽) - 인메모리 캐시 TTL 30분, 키 = doc_id + updated_at(fallback: created_at) - 층별 최소 50자 + 억지 채움 문구 제거 - summary 필수 (없으면 422) - 에러: 404 text 없음 / 504 timeout / 502 llm / 422 parse Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-16 12:32:44 +09:00
parent 6bc52928b6
commit d9caf075e5
2 changed files with 239 additions and 1 deletions
@@ -1,10 +1,12 @@
 """문서 CRUD API"""

+import asyncio
 import logging
 import shutil
+import time
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Annotated
+from typing import Annotated, Literal
 from urllib.parse import quote

 from fastapi import APIRouter, Depends, Form, HTTPException, Query, UploadFile, status
@@ -13,6 +15,7 @@ from pydantic import BaseModel
 from sqlalchemy import func, select
 from sqlalchemy.ext.asyncio import AsyncSession

+from ai.client import AIClient, _load_prompt, parse_json_response
 from core.auth import get_current_user
 from core.config import settings
 from core.database import get_session
@@ -20,6 +23,7 @@ from core.utils import file_hash
 from models.document import Document
 from models.queue import ProcessingQueue, enqueue_stage
 from models.user import User
+from services.search.llm_gate import get_mlx_gate

 router = APIRouter()

@@ -670,3 +674,207 @@ async def get_document_content(
        "content_length": len(raw_text),
        "truncated": truncated,
    }
+
+
+# ─── Phase D.5: 문서 분석 (/{doc_id}/analyze) ───
+
+ANALYZE_PROMPT = (
+    _load_prompt("document_analyze.txt")
+    if (Path(__file__).parent.parent / "prompts" / "document_analyze.txt").exists()
+    else ""
+)
+
+ANALYZE_TEXT_LIMIT = 15000  # chars
+ANALYZE_TIMEOUT_S = 20
+ANALYZE_CACHE_TTL_S = 1800  # 30분
+ANALYZE_CACHE_MAXSIZE = 100
+ANALYZE_LAYER_MIN_CHARS = 50  # 이 미만이면 억지 채움으로 보고 제거
+_ANALYZE_LAYER_SKIP_MARKERS = (
+    "해당 없음", "정보 없음", "n/a", "na",
+    "없음", "없습니다", "not applicable",
+)
+
+# 인메모리 LRU (FIFO, synthesis_service 패턴 참조)
+_analyze_cache: dict[str, tuple["AnalyzeResponse", float]] = {}
+
+
+class AnalysisLayer(BaseModel):
+    layer: Literal["evidence", "explanation", "examples", "summary"]
+    title: str
+    content: str
+
+
+class AnalyzeResponse(BaseModel):
+    id: int
+    title: str | None
+    layers: list[AnalysisLayer]
+    elapsed_ms: float
+    truncated: bool
+    cached: bool
+
+
+def _analyze_cache_key(doc_id: int, updated_at: datetime | None, created_at: datetime) -> str:
+    """캐시 키 = doc_id + updated_at (없으면 created_at)"""
+    ts = updated_at or created_at
+    return f"{doc_id}:{ts.isoformat()}"
+
+
+def _analyze_cache_get(key: str) -> "AnalyzeResponse | None":
+    entry = _analyze_cache.get(key)
+    if entry is None:
+        return None
+    result, stored_at = entry
+    if time.time() - stored_at > ANALYZE_CACHE_TTL_S:
+        _analyze_cache.pop(key, None)
+        return None
+    return result
+
+
+def _analyze_cache_set(key: str, result: "AnalyzeResponse") -> None:
+    if len(_analyze_cache) >= ANALYZE_CACHE_MAXSIZE and key not in _analyze_cache:
+        try:
+            oldest = next(iter(_analyze_cache))
+            _analyze_cache.pop(oldest, None)
+        except StopIteration:
+            pass
+    _analyze_cache[key] = (result, time.time())
+
+
+def _is_skip_content(content: str) -> bool:
+    """'해당 없음' 계열 문구 판정 (억지 채움 제거용)."""
+    stripped = content.strip().lower()
+    if not stripped:
+        return True
+    for marker in _ANALYZE_LAYER_SKIP_MARKERS:
+        if stripped == marker or stripped.startswith(marker):
+            return True
+    return False
+
+
+@router.post("/{doc_id}/analyze", response_model=AnalyzeResponse)
+async def analyze_document(
+    doc_id: int,
+    user: Annotated[User, Depends(get_current_user)],
+    session: Annotated[AsyncSession, Depends(get_session)],
+) -> AnalyzeResponse:
+    """문서 전문을 Gemma 4로 구조화 분석. 층(근거/해설/사례/요약) 중 해당 없는 것은 생략."""
+    t_start = time.perf_counter()
+
+    # 1. 문서 조회
+    doc = await session.get(Document, doc_id)
+    if not doc:
+        raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다")
+
+    # 2. 텍스트 확보
+    raw_text = doc.extracted_text or ""
+    if not raw_text.strip():
+        raise HTTPException(status_code=404, detail="텍스트 추출 미완료")
+
+    truncated = len(raw_text) > ANALYZE_TEXT_LIMIT
+    doc_text = raw_text[:ANALYZE_TEXT_LIMIT]
+
+    # 3. 캐시 확인 (키: doc_id + updated_at/created_at)
+    cache_key = _analyze_cache_key(doc_id, doc.updated_at, doc.created_at)
+    cached = _analyze_cache_get(cache_key)
+    if cached is not None:
+        logger.info("document.analyze cache_hit doc_id=%s user=%s", doc_id, getattr(user, "username", "?"))
+        return AnalyzeResponse(
+            id=cached.id,
+            title=cached.title,
+            layers=cached.layers,
+            elapsed_ms=(time.perf_counter() - t_start) * 1000,
+            truncated=cached.truncated,
+            cached=True,
+        )
+
+    # 4. 프롬프트 구성
+    if not ANALYZE_PROMPT:
+        raise HTTPException(status_code=500, detail="분석 프롬프트 미설치")
+    prompt = ANALYZE_PROMPT.replace("{document_title}", doc.title or "").replace(
+        "{document_text}", doc_text
+    )
+
+    # 5. LLM 호출 (MLX gate + timeout 안쪽)
+    ai_client = AIClient()
+    raw: str | None = None
+    try:
+        async with get_mlx_gate():
+            async with asyncio.timeout(ANALYZE_TIMEOUT_S):
+                raw = await ai_client._call_chat(ai_client.ai.primary, prompt)
+    except asyncio.TimeoutError:
+        logger.warning("document.analyze timeout doc_id=%s", doc_id)
+        raise HTTPException(status_code=504, detail="분석 시간이 초과되었습니다")
+    except Exception as exc:
+        logger.warning("document.analyze llm_error doc_id=%s err=%s", doc_id, type(exc).__name__)
+        raise HTTPException(status_code=502, detail="AI 서버 일시 오류")
+    finally:
+        try:
+            await ai_client.close()
+        except Exception:
+            pass
+
+    # 6. JSON 파싱
+    parsed = parse_json_response(raw or "")
+    if not isinstance(parsed, dict):
+        logger.warning("document.analyze parse_failed doc_id=%s raw_preview=%s", doc_id, (raw or "")[:200])
+        raise HTTPException(status_code=422, detail="분석 결과 파싱 실패")
+
+    # 7. 층 검증 + 억지 채움 제거
+    raw_layers = parsed.get("layers") or []
+    if not isinstance(raw_layers, list):
+        raise HTTPException(status_code=422, detail="분석 결과 형식 오류")
+
+    layer_titles = {
+        "evidence": "근거",
+        "explanation": "해설",
+        "examples": "사례",
+        "summary": "요약",
+    }
+    valid_layers: list[AnalysisLayer] = []
+    seen_layers: set[str] = set()
+    for item in raw_layers:
+        if not isinstance(item, dict):
+            continue
+        layer_type = item.get("layer")
+        content = (item.get("content") or "").strip()
+        if layer_type not in layer_titles:
+            continue
+        if layer_type in seen_layers:
+            continue
+        if len(content) < ANALYZE_LAYER_MIN_CHARS:
+            continue
+        if _is_skip_content(content):
+            continue
+        valid_layers.append(
+            AnalysisLayer(
+                layer=layer_type,  # type: ignore[arg-type]
+                title=item.get("title") or layer_titles[layer_type],
+                content=content,
+            )
+        )
+        seen_layers.add(layer_type)
+
+    if not valid_layers or "summary" not in seen_layers:
+        logger.warning("document.analyze missing_summary doc_id=%s layers=%s", doc_id, seen_layers)
+        raise HTTPException(status_code=422, detail="분석 결과에 요약이 없습니다")
+
+    # 8. 응답 + 캐시 저장
+    elapsed_ms = (time.perf_counter() - t_start) * 1000
+    result = AnalyzeResponse(
+        id=doc.id,
+        title=doc.title,
+        layers=valid_layers,
+        elapsed_ms=elapsed_ms,
+        truncated=truncated,
+        cached=False,
+    )
+    _analyze_cache_set(cache_key, result)
+
+    logger.info(
+        "document.analyze ok doc_id=%s user=%s layers=%d elapsed_ms=%.0f",
+        doc_id,
+        getattr(user, "username", "?"),
+        len(valid_layers),
+        elapsed_ms,
+    )
+    return result
@@ -0,0 +1,30 @@
+You are a document analyzer. Respond ONLY in JSON. No markdown wrapping, no explanation.
+
+## Task
+Given a document, produce a structured analysis with up to 4 layers.
+Skip any layer that does not apply. Always include "summary".
+
+## Output Schema
+{
+  "layers": [
+    {"layer": "evidence", "title": "근거", "content": "..."},
+    {"layer": "explanation", "title": "해설", "content": "..."},
+    {"layer": "examples", "title": "사례", "content": "..."},
+    {"layer": "summary", "title": "요약", "content": "..."}
+  ]
+}
+
+## Rules
+- Each content: 200~400 characters, in the same language as the document (Korean documents → Korean).
+- "evidence": Key factual claims or data points stated in the document. Skip for narrative/opinion documents.
+- "explanation": Why the facts matter, context, or interpretation. Skip for pure data/tables.
+- "examples": Concrete cases, scenarios, or instances explicitly mentioned. Skip if none exist.
+- "summary": Always present. 2-3 sentences capturing the document's core message.
+- Use ONLY information in the document. No outside knowledge.
+- If a layer does not apply, OMIT it entirely from the layers array. Do NOT write "해당 없음", "정보 없음", "N/A" — just skip.
+- Maximum 4 layers. Minimum 1 (summary).
+
+## Document
+Title: {document_title}
+Content:
+{document_text}