- summarize_worker: 요약만 생성 (분류 안 함) - queue_consumer: summarize stage 추가 (batch 3) - news_collector: summarize + embed 큐 등록 - process_stage enum에 'summarize' 추가 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
36 lines
1.2 KiB
Python
36 lines
1.2 KiB
Python
"""요약 전용 워커 — 뉴스 등 classify 불필요한 문서의 AI 요약만 생성"""
|
|
|
|
from datetime import datetime, timezone
|
|
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from ai.client import AIClient, strip_thinking
|
|
from core.utils import setup_logger
|
|
from models.document import Document
|
|
|
|
logger = setup_logger("summarize_worker")
|
|
|
|
|
|
async def process(document_id: int, session: AsyncSession) -> None:
|
|
"""문서 AI 요약 생성 (분류 없이 요약만)"""
|
|
doc = await session.get(Document, document_id)
|
|
if not doc:
|
|
raise ValueError(f"문서 ID {document_id}를 찾을 수 없음")
|
|
|
|
if not doc.extracted_text:
|
|
raise ValueError(f"문서 ID {document_id}: extracted_text가 비어있음")
|
|
|
|
if doc.ai_summary:
|
|
logger.info(f"[요약] document_id={document_id}: 이미 요약 있음, skip")
|
|
return
|
|
|
|
client = AIClient()
|
|
try:
|
|
summary = await client.summarize(doc.extracted_text[:15000])
|
|
doc.ai_summary = strip_thinking(summary)
|
|
doc.ai_model_version = "qwen3.5-35b-a3b"
|
|
doc.ai_processed_at = datetime.now(timezone.utc)
|
|
logger.info(f"[요약] document_id={document_id}: {len(doc.ai_summary)}자")
|
|
finally:
|
|
await client.close()
|