feat(digest): Phase 4 Global News Digest (cluster-level batch summarization)
7일 rolling window 뉴스를 country × topic 2-level로 묶어 매일 04:00 KST 배치 생성.
search 파이프라인 미사용. documents → clustering → cluster-level LLM summarization → digest.
핵심 결정:
- adaptive threshold (0.75/0.78/0.80) + EMA centroid (α=0.7) + time-decay (λ=ln(2)/3)
- min_articles=3, max_topics=10/country, top-5 MMR diversity, ai_summary[:300] truncate
- cluster-level LLM only, drop금지 fallback (topic_label="주요 뉴스 묶음" + top member ai_summary[:200])
- importance_score country별 0~1 normalize + raw_weight_sum 별도 보존, max(score, 0.01) floor
- per-call timeout 25s + pipeline hard cap 600s
- DELETE+INSERT idempotent (UNIQUE digest_date), AIClient._call_chat 직접 호출 (client.py 수정 없음)
신규:
- migrations/101_global_digests.sql (2테이블 정규화)
- app/models/digest.py (GlobalDigest + DigestTopic ORM)
- app/services/digest/{loader,clustering,selection,summarizer,pipeline}.py
- app/workers/digest_worker.py (PIPELINE_HARD_CAP + CLI 진입점)
- app/api/digest.py (/latest, ?date|country, /regenerate, inline Pydantic)
- app/prompts/digest_topic.txt (JSON-only + 절대 금지 블록)
main.py 4줄: import 2 + scheduler add_job 1 + include_router 1.
plan: ~/.claude/plans/quiet-herding-tome.md
This commit is contained in:
164
app/api/digest.py
Normal file
164
app/api/digest.py
Normal file
@@ -0,0 +1,164 @@
|
||||
"""Phase 4 Global Digest API — read-only + 디버그 regenerate.
|
||||
|
||||
엔드포인트:
|
||||
- GET /api/digest/latest : 가장 최근 digest
|
||||
- GET /api/digest?date=YYYY-MM-DD : 특정 날짜 digest
|
||||
- GET /api/digest?country=KR : 특정 국가만
|
||||
- POST /api/digest/regenerate : 백그라운드 digest 워커 트리거 (auth 필요)
|
||||
|
||||
응답은 country → topic 2-level 구조. country 가 비어있는 경우 응답에서 자동 생략.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from datetime import date as date_type
|
||||
from datetime import datetime
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
from core.auth import get_current_user
|
||||
from core.database import get_session
|
||||
from models.digest import DigestTopic, GlobalDigest
|
||||
from models.user import User
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
# ─── Pydantic 응답 모델 (schemas/ 디렉토리 미사용 → inline 정의) ───
|
||||
|
||||
|
||||
class TopicResponse(BaseModel):
|
||||
topic_rank: int
|
||||
topic_label: str
|
||||
summary: str
|
||||
article_ids: list[int]
|
||||
article_count: int
|
||||
importance_score: float
|
||||
raw_weight_sum: float
|
||||
llm_fallback_used: bool
|
||||
|
||||
|
||||
class CountryGroup(BaseModel):
|
||||
country: str
|
||||
topics: list[TopicResponse]
|
||||
|
||||
|
||||
class DigestResponse(BaseModel):
|
||||
digest_date: date_type
|
||||
window_start: datetime
|
||||
window_end: datetime
|
||||
decay_lambda: float
|
||||
total_articles: int
|
||||
total_countries: int
|
||||
total_topics: int
|
||||
generation_ms: int | None
|
||||
llm_calls: int
|
||||
llm_failures: int
|
||||
status: str
|
||||
countries: list[CountryGroup]
|
||||
|
||||
|
||||
# ─── helpers ───
|
||||
|
||||
|
||||
def _build_response(digest: GlobalDigest, country_filter: str | None = None) -> DigestResponse:
|
||||
"""ORM 객체 → DigestResponse. country_filter 가 주어지면 해당 국가만."""
|
||||
topics_by_country: dict[str, list[TopicResponse]] = {}
|
||||
for t in sorted(digest.topics, key=lambda x: (x.country, x.topic_rank)):
|
||||
if country_filter and t.country != country_filter:
|
||||
continue
|
||||
topics_by_country.setdefault(t.country, []).append(
|
||||
TopicResponse(
|
||||
topic_rank=t.topic_rank,
|
||||
topic_label=t.topic_label,
|
||||
summary=t.summary,
|
||||
article_ids=list(t.article_ids or []),
|
||||
article_count=t.article_count,
|
||||
importance_score=t.importance_score,
|
||||
raw_weight_sum=t.raw_weight_sum,
|
||||
llm_fallback_used=t.llm_fallback_used,
|
||||
)
|
||||
)
|
||||
|
||||
countries = [
|
||||
CountryGroup(country=c, topics=topics_by_country[c])
|
||||
for c in sorted(topics_by_country.keys())
|
||||
]
|
||||
|
||||
return DigestResponse(
|
||||
digest_date=digest.digest_date,
|
||||
window_start=digest.window_start,
|
||||
window_end=digest.window_end,
|
||||
decay_lambda=digest.decay_lambda,
|
||||
total_articles=digest.total_articles,
|
||||
total_countries=digest.total_countries,
|
||||
total_topics=digest.total_topics,
|
||||
generation_ms=digest.generation_ms,
|
||||
llm_calls=digest.llm_calls,
|
||||
llm_failures=digest.llm_failures,
|
||||
status=digest.status,
|
||||
countries=countries,
|
||||
)
|
||||
|
||||
|
||||
async def _load_digest(
|
||||
session: AsyncSession,
|
||||
target_date: date_type | None,
|
||||
) -> GlobalDigest | None:
|
||||
"""date 가 주어지면 해당 날짜, 아니면 최신 digest 1건."""
|
||||
query = select(GlobalDigest).options(selectinload(GlobalDigest.topics))
|
||||
if target_date is not None:
|
||||
query = query.where(GlobalDigest.digest_date == target_date)
|
||||
else:
|
||||
query = query.order_by(GlobalDigest.digest_date.desc())
|
||||
query = query.limit(1)
|
||||
result = await session.execute(query)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
|
||||
# ─── Routes ───
|
||||
|
||||
|
||||
@router.get("/latest", response_model=DigestResponse)
|
||||
async def get_latest(
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
):
|
||||
"""가장 최근 생성된 global digest."""
|
||||
digest = await _load_digest(session, target_date=None)
|
||||
if digest is None:
|
||||
raise HTTPException(status_code=404, detail="아직 생성된 digest 없음")
|
||||
return _build_response(digest)
|
||||
|
||||
|
||||
@router.get("", response_model=DigestResponse)
|
||||
async def get_digest(
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
date: date_type | None = Query(default=None, description="YYYY-MM-DD (KST)"),
|
||||
country: str | None = Query(default=None, description="국가 코드 (예: KR)"),
|
||||
):
|
||||
"""특정 날짜 또는 국가 필터링된 digest. date 미지정 시 최신."""
|
||||
digest = await _load_digest(session, target_date=date)
|
||||
if digest is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"digest 없음 (date={date})" if date else "아직 생성된 digest 없음",
|
||||
)
|
||||
country_filter = country.upper() if country else None
|
||||
return _build_response(digest, country_filter=country_filter)
|
||||
|
||||
|
||||
@router.post("/regenerate")
|
||||
async def regenerate(
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
):
|
||||
"""디버그용 수동 트리거 — 백그라운드 태스크로 워커 실행 (auth 필요)."""
|
||||
from workers.digest_worker import run
|
||||
|
||||
asyncio.create_task(run())
|
||||
return {"status": "started", "message": "global_digest 워커 백그라운드 실행 시작"}
|
||||
Reference in New Issue
Block a user