hyungi_document_server/app/workers/summarize_worker.py

"""요약 전용 워커 — 뉴스 등 classify 불필요한 문서의 AI 요약만 생성.

P3 of family-adaptive-bengio (2026-05-23): 50k 초과 input 은 sliding window
(cumulative carry-over) 로 분할 처리. 50k 이하 input 은 기존 동작 유지.
"""

from datetime import datetime, timezone

from sqlalchemy.ext.asyncio import AsyncSession

from ai.client import AIClient, strip_thinking
from core.utils import setup_logger
from models.document import Document

logger = setup_logger("summarize_worker")

CHUNK_SIZE = 50000
SUMMARY_PROMPT_CONTINUATION = (
    "이전 부분 요약:\n{prior}\n\n다음 부분:\n{text}\n\n"
    "위 두 정보를 합쳐 전체 문서를 500자 이내로 요약해주세요."
)


async def process(document_id: int, session: AsyncSession) -> None:
    """문서 AI 요약 생성 (분류 없이 요약만)"""
    doc = await session.get(Document, document_id)
    if not doc:
        raise ValueError(f"문서 ID {document_id}를 찾을 수 없음")

    if not doc.extracted_text:
        raise ValueError(f"문서 ID {document_id}: extracted_text가 비어있음")

    if doc.ai_summary:
        logger.info(f"[요약] document_id={document_id}: 이미 요약 있음, skip")
        return

    client = AIClient()
    try:
        text = doc.extracted_text
        total_chars = len(text)
        if total_chars <= CHUNK_SIZE:
            summary = await client.summarize(text)
            logger.info(
                f"[요약] document_id={document_id}: single chunk ({total_chars}자)"
            )
        else:
            chunks = [text[i:i + CHUNK_SIZE] for i in range(0, total_chars, CHUNK_SIZE)]
            logger.info(
                f"[요약] document_id={document_id}: sliding window {len(chunks)} chunk "
                f"(total {total_chars}자, chunk_size={CHUNK_SIZE})"
            )
            carry = ""
            for idx, chunk in enumerate(chunks):
                if idx == 0:
                    partial = await client.summarize(chunk)
                else:
                    prompt = SUMMARY_PROMPT_CONTINUATION.format(prior=carry, text=chunk)
                    partial = await client.call_primary(prompt)
                carry = strip_thinking(partial)
                logger.info(
                    f"[요약] document_id={document_id}: chunk {idx + 1}/{len(chunks)} done "
                    f"(in={len(chunk)}자, carry={len(carry)}자)"
                )
            summary = carry

        doc.ai_summary = strip_thinking(summary)
        doc.ai_model_version = client.ai.primary.model
        doc.ai_processed_at = datetime.now(timezone.utc)
        logger.info(
            f"[요약] document_id={document_id}: {len(doc.ai_summary)}자 final"
        )
    finally:
        await client.close()