feat(api): Phase D.5 — POST /documents/{id}/analyze 문서 분석 엔드포인트
전문 15,000자 → Gemma 4 구조화 분석 (근거/해설/사례/요약 4층). - MLX gate + 20초 timeout (gate 안쪽) - 인메모리 캐시 TTL 30분, 키 = doc_id + updated_at(fallback: created_at) - 층별 최소 50자 + 억지 채움 문구 제거 - summary 필수 (없으면 422) - 에러: 404 text 없음 / 504 timeout / 502 llm / 422 parse Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+209
-1
@@ -1,10 +1,12 @@
|
||||
"""문서 CRUD API"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import shutil
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Annotated
|
||||
from typing import Annotated, Literal
|
||||
from urllib.parse import quote
|
||||
|
||||
from fastapi import APIRouter, Depends, Form, HTTPException, Query, UploadFile, status
|
||||
@@ -13,6 +15,7 @@ from pydantic import BaseModel
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from ai.client import AIClient, _load_prompt, parse_json_response
|
||||
from core.auth import get_current_user
|
||||
from core.config import settings
|
||||
from core.database import get_session
|
||||
@@ -20,6 +23,7 @@ from core.utils import file_hash
|
||||
from models.document import Document
|
||||
from models.queue import ProcessingQueue, enqueue_stage
|
||||
from models.user import User
|
||||
from services.search.llm_gate import get_mlx_gate
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@@ -670,3 +674,207 @@ async def get_document_content(
|
||||
"content_length": len(raw_text),
|
||||
"truncated": truncated,
|
||||
}
|
||||
|
||||
|
||||
# ─── Phase D.5: 문서 분석 (/{doc_id}/analyze) ───
|
||||
|
||||
ANALYZE_PROMPT = (
|
||||
_load_prompt("document_analyze.txt")
|
||||
if (Path(__file__).parent.parent / "prompts" / "document_analyze.txt").exists()
|
||||
else ""
|
||||
)
|
||||
|
||||
ANALYZE_TEXT_LIMIT = 15000 # chars
|
||||
ANALYZE_TIMEOUT_S = 20
|
||||
ANALYZE_CACHE_TTL_S = 1800 # 30분
|
||||
ANALYZE_CACHE_MAXSIZE = 100
|
||||
ANALYZE_LAYER_MIN_CHARS = 50 # 이 미만이면 억지 채움으로 보고 제거
|
||||
_ANALYZE_LAYER_SKIP_MARKERS = (
|
||||
"해당 없음", "정보 없음", "n/a", "na",
|
||||
"없음", "없습니다", "not applicable",
|
||||
)
|
||||
|
||||
# 인메모리 LRU (FIFO, synthesis_service 패턴 참조)
|
||||
_analyze_cache: dict[str, tuple["AnalyzeResponse", float]] = {}
|
||||
|
||||
|
||||
class AnalysisLayer(BaseModel):
|
||||
layer: Literal["evidence", "explanation", "examples", "summary"]
|
||||
title: str
|
||||
content: str
|
||||
|
||||
|
||||
class AnalyzeResponse(BaseModel):
|
||||
id: int
|
||||
title: str | None
|
||||
layers: list[AnalysisLayer]
|
||||
elapsed_ms: float
|
||||
truncated: bool
|
||||
cached: bool
|
||||
|
||||
|
||||
def _analyze_cache_key(doc_id: int, updated_at: datetime | None, created_at: datetime) -> str:
|
||||
"""캐시 키 = doc_id + updated_at (없으면 created_at)"""
|
||||
ts = updated_at or created_at
|
||||
return f"{doc_id}:{ts.isoformat()}"
|
||||
|
||||
|
||||
def _analyze_cache_get(key: str) -> "AnalyzeResponse | None":
|
||||
entry = _analyze_cache.get(key)
|
||||
if entry is None:
|
||||
return None
|
||||
result, stored_at = entry
|
||||
if time.time() - stored_at > ANALYZE_CACHE_TTL_S:
|
||||
_analyze_cache.pop(key, None)
|
||||
return None
|
||||
return result
|
||||
|
||||
|
||||
def _analyze_cache_set(key: str, result: "AnalyzeResponse") -> None:
|
||||
if len(_analyze_cache) >= ANALYZE_CACHE_MAXSIZE and key not in _analyze_cache:
|
||||
try:
|
||||
oldest = next(iter(_analyze_cache))
|
||||
_analyze_cache.pop(oldest, None)
|
||||
except StopIteration:
|
||||
pass
|
||||
_analyze_cache[key] = (result, time.time())
|
||||
|
||||
|
||||
def _is_skip_content(content: str) -> bool:
|
||||
"""'해당 없음' 계열 문구 판정 (억지 채움 제거용)."""
|
||||
stripped = content.strip().lower()
|
||||
if not stripped:
|
||||
return True
|
||||
for marker in _ANALYZE_LAYER_SKIP_MARKERS:
|
||||
if stripped == marker or stripped.startswith(marker):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
@router.post("/{doc_id}/analyze", response_model=AnalyzeResponse)
|
||||
async def analyze_document(
|
||||
doc_id: int,
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
) -> AnalyzeResponse:
|
||||
"""문서 전문을 Gemma 4로 구조화 분석. 층(근거/해설/사례/요약) 중 해당 없는 것은 생략."""
|
||||
t_start = time.perf_counter()
|
||||
|
||||
# 1. 문서 조회
|
||||
doc = await session.get(Document, doc_id)
|
||||
if not doc:
|
||||
raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다")
|
||||
|
||||
# 2. 텍스트 확보
|
||||
raw_text = doc.extracted_text or ""
|
||||
if not raw_text.strip():
|
||||
raise HTTPException(status_code=404, detail="텍스트 추출 미완료")
|
||||
|
||||
truncated = len(raw_text) > ANALYZE_TEXT_LIMIT
|
||||
doc_text = raw_text[:ANALYZE_TEXT_LIMIT]
|
||||
|
||||
# 3. 캐시 확인 (키: doc_id + updated_at/created_at)
|
||||
cache_key = _analyze_cache_key(doc_id, doc.updated_at, doc.created_at)
|
||||
cached = _analyze_cache_get(cache_key)
|
||||
if cached is not None:
|
||||
logger.info("document.analyze cache_hit doc_id=%s user=%s", doc_id, getattr(user, "username", "?"))
|
||||
return AnalyzeResponse(
|
||||
id=cached.id,
|
||||
title=cached.title,
|
||||
layers=cached.layers,
|
||||
elapsed_ms=(time.perf_counter() - t_start) * 1000,
|
||||
truncated=cached.truncated,
|
||||
cached=True,
|
||||
)
|
||||
|
||||
# 4. 프롬프트 구성
|
||||
if not ANALYZE_PROMPT:
|
||||
raise HTTPException(status_code=500, detail="분석 프롬프트 미설치")
|
||||
prompt = ANALYZE_PROMPT.replace("{document_title}", doc.title or "").replace(
|
||||
"{document_text}", doc_text
|
||||
)
|
||||
|
||||
# 5. LLM 호출 (MLX gate + timeout 안쪽)
|
||||
ai_client = AIClient()
|
||||
raw: str | None = None
|
||||
try:
|
||||
async with get_mlx_gate():
|
||||
async with asyncio.timeout(ANALYZE_TIMEOUT_S):
|
||||
raw = await ai_client._call_chat(ai_client.ai.primary, prompt)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("document.analyze timeout doc_id=%s", doc_id)
|
||||
raise HTTPException(status_code=504, detail="분석 시간이 초과되었습니다")
|
||||
except Exception as exc:
|
||||
logger.warning("document.analyze llm_error doc_id=%s err=%s", doc_id, type(exc).__name__)
|
||||
raise HTTPException(status_code=502, detail="AI 서버 일시 오류")
|
||||
finally:
|
||||
try:
|
||||
await ai_client.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 6. JSON 파싱
|
||||
parsed = parse_json_response(raw or "")
|
||||
if not isinstance(parsed, dict):
|
||||
logger.warning("document.analyze parse_failed doc_id=%s raw_preview=%s", doc_id, (raw or "")[:200])
|
||||
raise HTTPException(status_code=422, detail="분석 결과 파싱 실패")
|
||||
|
||||
# 7. 층 검증 + 억지 채움 제거
|
||||
raw_layers = parsed.get("layers") or []
|
||||
if not isinstance(raw_layers, list):
|
||||
raise HTTPException(status_code=422, detail="분석 결과 형식 오류")
|
||||
|
||||
layer_titles = {
|
||||
"evidence": "근거",
|
||||
"explanation": "해설",
|
||||
"examples": "사례",
|
||||
"summary": "요약",
|
||||
}
|
||||
valid_layers: list[AnalysisLayer] = []
|
||||
seen_layers: set[str] = set()
|
||||
for item in raw_layers:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
layer_type = item.get("layer")
|
||||
content = (item.get("content") or "").strip()
|
||||
if layer_type not in layer_titles:
|
||||
continue
|
||||
if layer_type in seen_layers:
|
||||
continue
|
||||
if len(content) < ANALYZE_LAYER_MIN_CHARS:
|
||||
continue
|
||||
if _is_skip_content(content):
|
||||
continue
|
||||
valid_layers.append(
|
||||
AnalysisLayer(
|
||||
layer=layer_type, # type: ignore[arg-type]
|
||||
title=item.get("title") or layer_titles[layer_type],
|
||||
content=content,
|
||||
)
|
||||
)
|
||||
seen_layers.add(layer_type)
|
||||
|
||||
if not valid_layers or "summary" not in seen_layers:
|
||||
logger.warning("document.analyze missing_summary doc_id=%s layers=%s", doc_id, seen_layers)
|
||||
raise HTTPException(status_code=422, detail="분석 결과에 요약이 없습니다")
|
||||
|
||||
# 8. 응답 + 캐시 저장
|
||||
elapsed_ms = (time.perf_counter() - t_start) * 1000
|
||||
result = AnalyzeResponse(
|
||||
id=doc.id,
|
||||
title=doc.title,
|
||||
layers=valid_layers,
|
||||
elapsed_ms=elapsed_ms,
|
||||
truncated=truncated,
|
||||
cached=False,
|
||||
)
|
||||
_analyze_cache_set(cache_key, result)
|
||||
|
||||
logger.info(
|
||||
"document.analyze ok doc_id=%s user=%s layers=%d elapsed_ms=%.0f",
|
||||
doc_id,
|
||||
getattr(user, "username", "?"),
|
||||
len(valid_layers),
|
||||
elapsed_ms,
|
||||
)
|
||||
return result
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
You are a document analyzer. Respond ONLY in JSON. No markdown wrapping, no explanation.
|
||||
|
||||
## Task
|
||||
Given a document, produce a structured analysis with up to 4 layers.
|
||||
Skip any layer that does not apply. Always include "summary".
|
||||
|
||||
## Output Schema
|
||||
{
|
||||
"layers": [
|
||||
{"layer": "evidence", "title": "근거", "content": "..."},
|
||||
{"layer": "explanation", "title": "해설", "content": "..."},
|
||||
{"layer": "examples", "title": "사례", "content": "..."},
|
||||
{"layer": "summary", "title": "요약", "content": "..."}
|
||||
]
|
||||
}
|
||||
|
||||
## Rules
|
||||
- Each content: 200~400 characters, in the same language as the document (Korean documents → Korean).
|
||||
- "evidence": Key factual claims or data points stated in the document. Skip for narrative/opinion documents.
|
||||
- "explanation": Why the facts matter, context, or interpretation. Skip for pure data/tables.
|
||||
- "examples": Concrete cases, scenarios, or instances explicitly mentioned. Skip if none exist.
|
||||
- "summary": Always present. 2-3 sentences capturing the document's core message.
|
||||
- Use ONLY information in the document. No outside knowledge.
|
||||
- If a layer does not apply, OMIT it entirely from the layers array. Do NOT write "해당 없음", "정보 없음", "N/A" — just skip.
|
||||
- Maximum 4 layers. Minimum 1 (summary).
|
||||
|
||||
## Document
|
||||
Title: {document_title}
|
||||
Content:
|
||||
{document_text}
|
||||
Reference in New Issue
Block a user