feat(api): Phase D.5 — POST /documents/{id}/analyze 문서 분석 엔드포인트

전문 15,000자 → Gemma 4 구조화 분석 (근거/해설/사례/요약 4층).
- MLX gate + 20초 timeout (gate 안쪽)
- 인메모리 캐시 TTL 30분, 키 = doc_id + updated_at(fallback: created_at)
- 층별 최소 50자 + 억지 채움 문구 제거
- summary 필수 (없으면 422)
- 에러: 404 text 없음 / 504 timeout / 502 llm / 422 parse

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hyungi Ahn
2026-04-16 12:32:44 +09:00
parent 6bc52928b6
commit d9caf075e5
2 changed files with 239 additions and 1 deletions
+209 -1
View File
@@ -1,10 +1,12 @@
"""문서 CRUD API"""
import asyncio
import logging
import shutil
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Annotated
from typing import Annotated, Literal
from urllib.parse import quote
from fastapi import APIRouter, Depends, Form, HTTPException, Query, UploadFile, status
@@ -13,6 +15,7 @@ from pydantic import BaseModel
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
from ai.client import AIClient, _load_prompt, parse_json_response
from core.auth import get_current_user
from core.config import settings
from core.database import get_session
@@ -20,6 +23,7 @@ from core.utils import file_hash
from models.document import Document
from models.queue import ProcessingQueue, enqueue_stage
from models.user import User
from services.search.llm_gate import get_mlx_gate
router = APIRouter()
@@ -670,3 +674,207 @@ async def get_document_content(
"content_length": len(raw_text),
"truncated": truncated,
}
# ─── Phase D.5: 문서 분석 (/{doc_id}/analyze) ───
ANALYZE_PROMPT = (
_load_prompt("document_analyze.txt")
if (Path(__file__).parent.parent / "prompts" / "document_analyze.txt").exists()
else ""
)
ANALYZE_TEXT_LIMIT = 15000 # chars
ANALYZE_TIMEOUT_S = 20
ANALYZE_CACHE_TTL_S = 1800 # 30분
ANALYZE_CACHE_MAXSIZE = 100
ANALYZE_LAYER_MIN_CHARS = 50 # 이 미만이면 억지 채움으로 보고 제거
_ANALYZE_LAYER_SKIP_MARKERS = (
"해당 없음", "정보 없음", "n/a", "na",
"없음", "없습니다", "not applicable",
)
# 인메모리 LRU (FIFO, synthesis_service 패턴 참조)
_analyze_cache: dict[str, tuple["AnalyzeResponse", float]] = {}
class AnalysisLayer(BaseModel):
layer: Literal["evidence", "explanation", "examples", "summary"]
title: str
content: str
class AnalyzeResponse(BaseModel):
id: int
title: str | None
layers: list[AnalysisLayer]
elapsed_ms: float
truncated: bool
cached: bool
def _analyze_cache_key(doc_id: int, updated_at: datetime | None, created_at: datetime) -> str:
"""캐시 키 = doc_id + updated_at (없으면 created_at)"""
ts = updated_at or created_at
return f"{doc_id}:{ts.isoformat()}"
def _analyze_cache_get(key: str) -> "AnalyzeResponse | None":
entry = _analyze_cache.get(key)
if entry is None:
return None
result, stored_at = entry
if time.time() - stored_at > ANALYZE_CACHE_TTL_S:
_analyze_cache.pop(key, None)
return None
return result
def _analyze_cache_set(key: str, result: "AnalyzeResponse") -> None:
if len(_analyze_cache) >= ANALYZE_CACHE_MAXSIZE and key not in _analyze_cache:
try:
oldest = next(iter(_analyze_cache))
_analyze_cache.pop(oldest, None)
except StopIteration:
pass
_analyze_cache[key] = (result, time.time())
def _is_skip_content(content: str) -> bool:
"""'해당 없음' 계열 문구 판정 (억지 채움 제거용)."""
stripped = content.strip().lower()
if not stripped:
return True
for marker in _ANALYZE_LAYER_SKIP_MARKERS:
if stripped == marker or stripped.startswith(marker):
return True
return False
@router.post("/{doc_id}/analyze", response_model=AnalyzeResponse)
async def analyze_document(
doc_id: int,
user: Annotated[User, Depends(get_current_user)],
session: Annotated[AsyncSession, Depends(get_session)],
) -> AnalyzeResponse:
"""문서 전문을 Gemma 4로 구조화 분석. 층(근거/해설/사례/요약) 중 해당 없는 것은 생략."""
t_start = time.perf_counter()
# 1. 문서 조회
doc = await session.get(Document, doc_id)
if not doc:
raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다")
# 2. 텍스트 확보
raw_text = doc.extracted_text or ""
if not raw_text.strip():
raise HTTPException(status_code=404, detail="텍스트 추출 미완료")
truncated = len(raw_text) > ANALYZE_TEXT_LIMIT
doc_text = raw_text[:ANALYZE_TEXT_LIMIT]
# 3. 캐시 확인 (키: doc_id + updated_at/created_at)
cache_key = _analyze_cache_key(doc_id, doc.updated_at, doc.created_at)
cached = _analyze_cache_get(cache_key)
if cached is not None:
logger.info("document.analyze cache_hit doc_id=%s user=%s", doc_id, getattr(user, "username", "?"))
return AnalyzeResponse(
id=cached.id,
title=cached.title,
layers=cached.layers,
elapsed_ms=(time.perf_counter() - t_start) * 1000,
truncated=cached.truncated,
cached=True,
)
# 4. 프롬프트 구성
if not ANALYZE_PROMPT:
raise HTTPException(status_code=500, detail="분석 프롬프트 미설치")
prompt = ANALYZE_PROMPT.replace("{document_title}", doc.title or "").replace(
"{document_text}", doc_text
)
# 5. LLM 호출 (MLX gate + timeout 안쪽)
ai_client = AIClient()
raw: str | None = None
try:
async with get_mlx_gate():
async with asyncio.timeout(ANALYZE_TIMEOUT_S):
raw = await ai_client._call_chat(ai_client.ai.primary, prompt)
except asyncio.TimeoutError:
logger.warning("document.analyze timeout doc_id=%s", doc_id)
raise HTTPException(status_code=504, detail="분석 시간이 초과되었습니다")
except Exception as exc:
logger.warning("document.analyze llm_error doc_id=%s err=%s", doc_id, type(exc).__name__)
raise HTTPException(status_code=502, detail="AI 서버 일시 오류")
finally:
try:
await ai_client.close()
except Exception:
pass
# 6. JSON 파싱
parsed = parse_json_response(raw or "")
if not isinstance(parsed, dict):
logger.warning("document.analyze parse_failed doc_id=%s raw_preview=%s", doc_id, (raw or "")[:200])
raise HTTPException(status_code=422, detail="분석 결과 파싱 실패")
# 7. 층 검증 + 억지 채움 제거
raw_layers = parsed.get("layers") or []
if not isinstance(raw_layers, list):
raise HTTPException(status_code=422, detail="분석 결과 형식 오류")
layer_titles = {
"evidence": "근거",
"explanation": "해설",
"examples": "사례",
"summary": "요약",
}
valid_layers: list[AnalysisLayer] = []
seen_layers: set[str] = set()
for item in raw_layers:
if not isinstance(item, dict):
continue
layer_type = item.get("layer")
content = (item.get("content") or "").strip()
if layer_type not in layer_titles:
continue
if layer_type in seen_layers:
continue
if len(content) < ANALYZE_LAYER_MIN_CHARS:
continue
if _is_skip_content(content):
continue
valid_layers.append(
AnalysisLayer(
layer=layer_type, # type: ignore[arg-type]
title=item.get("title") or layer_titles[layer_type],
content=content,
)
)
seen_layers.add(layer_type)
if not valid_layers or "summary" not in seen_layers:
logger.warning("document.analyze missing_summary doc_id=%s layers=%s", doc_id, seen_layers)
raise HTTPException(status_code=422, detail="분석 결과에 요약이 없습니다")
# 8. 응답 + 캐시 저장
elapsed_ms = (time.perf_counter() - t_start) * 1000
result = AnalyzeResponse(
id=doc.id,
title=doc.title,
layers=valid_layers,
elapsed_ms=elapsed_ms,
truncated=truncated,
cached=False,
)
_analyze_cache_set(cache_key, result)
logger.info(
"document.analyze ok doc_id=%s user=%s layers=%d elapsed_ms=%.0f",
doc_id,
getattr(user, "username", "?"),
len(valid_layers),
elapsed_ms,
)
return result
+30
View File
@@ -0,0 +1,30 @@
You are a document analyzer. Respond ONLY in JSON. No markdown wrapping, no explanation.
## Task
Given a document, produce a structured analysis with up to 4 layers.
Skip any layer that does not apply. Always include "summary".
## Output Schema
{
"layers": [
{"layer": "evidence", "title": "근거", "content": "..."},
{"layer": "explanation", "title": "해설", "content": "..."},
{"layer": "examples", "title": "사례", "content": "..."},
{"layer": "summary", "title": "요약", "content": "..."}
]
}
## Rules
- Each content: 200~400 characters, in the same language as the document (Korean documents → Korean).
- "evidence": Key factual claims or data points stated in the document. Skip for narrative/opinion documents.
- "explanation": Why the facts matter, context, or interpretation. Skip for pure data/tables.
- "examples": Concrete cases, scenarios, or instances explicitly mentioned. Skip if none exist.
- "summary": Always present. 2-3 sentences capturing the document's core message.
- Use ONLY information in the document. No outside knowledge.
- If a layer does not apply, OMIT it entirely from the layers array. Do NOT write "해당 없음", "정보 없음", "N/A" — just skip.
- Maximum 4 layers. Minimum 1 (summary).
## Document
Title: {document_title}
Content:
{document_text}