7일 rolling window 뉴스를 country × topic 2-level로 묶어 매일 04:00 KST 배치 생성.
search 파이프라인 미사용. documents → clustering → cluster-level LLM summarization → digest.
핵심 결정:
- adaptive threshold (0.75/0.78/0.80) + EMA centroid (α=0.7) + time-decay (λ=ln(2)/3)
- min_articles=3, max_topics=10/country, top-5 MMR diversity, ai_summary[:300] truncate
- cluster-level LLM only, drop금지 fallback (topic_label="주요 뉴스 묶음" + top member ai_summary[:200])
- importance_score country별 0~1 normalize + raw_weight_sum 별도 보존, max(score, 0.01) floor
- per-call timeout 25s + pipeline hard cap 600s
- DELETE+INSERT idempotent (UNIQUE digest_date), AIClient._call_chat 직접 호출 (client.py 수정 없음)
신규:
- migrations/101_global_digests.sql (2테이블 정규화)
- app/models/digest.py (GlobalDigest + DigestTopic ORM)
- app/services/digest/{loader,clustering,selection,summarizer,pipeline}.py
- app/workers/digest_worker.py (PIPELINE_HARD_CAP + CLI 진입점)
- app/api/digest.py (/latest, ?date|country, /regenerate, inline Pydantic)
- app/prompts/digest_topic.txt (JSON-only + 절대 금지 블록)
main.py 4줄: import 2 + scheduler add_job 1 + include_router 1.
plan: ~/.claude/plans/quiet-herding-tome.md
165 lines
5.4 KiB
Python
165 lines
5.4 KiB
Python
"""Phase 4 Global Digest API — read-only + 디버그 regenerate.
|
|
|
|
엔드포인트:
|
|
- GET /api/digest/latest : 가장 최근 digest
|
|
- GET /api/digest?date=YYYY-MM-DD : 특정 날짜 digest
|
|
- GET /api/digest?country=KR : 특정 국가만
|
|
- POST /api/digest/regenerate : 백그라운드 digest 워커 트리거 (auth 필요)
|
|
|
|
응답은 country → topic 2-level 구조. country 가 비어있는 경우 응답에서 자동 생략.
|
|
"""
|
|
|
|
import asyncio
|
|
from datetime import date as date_type
|
|
from datetime import datetime
|
|
from typing import Annotated
|
|
|
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
|
from pydantic import BaseModel
|
|
from sqlalchemy import select
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from sqlalchemy.orm import selectinload
|
|
|
|
from core.auth import get_current_user
|
|
from core.database import get_session
|
|
from models.digest import DigestTopic, GlobalDigest
|
|
from models.user import User
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
# ─── Pydantic 응답 모델 (schemas/ 디렉토리 미사용 → inline 정의) ───
|
|
|
|
|
|
class TopicResponse(BaseModel):
|
|
topic_rank: int
|
|
topic_label: str
|
|
summary: str
|
|
article_ids: list[int]
|
|
article_count: int
|
|
importance_score: float
|
|
raw_weight_sum: float
|
|
llm_fallback_used: bool
|
|
|
|
|
|
class CountryGroup(BaseModel):
|
|
country: str
|
|
topics: list[TopicResponse]
|
|
|
|
|
|
class DigestResponse(BaseModel):
|
|
digest_date: date_type
|
|
window_start: datetime
|
|
window_end: datetime
|
|
decay_lambda: float
|
|
total_articles: int
|
|
total_countries: int
|
|
total_topics: int
|
|
generation_ms: int | None
|
|
llm_calls: int
|
|
llm_failures: int
|
|
status: str
|
|
countries: list[CountryGroup]
|
|
|
|
|
|
# ─── helpers ───
|
|
|
|
|
|
def _build_response(digest: GlobalDigest, country_filter: str | None = None) -> DigestResponse:
|
|
"""ORM 객체 → DigestResponse. country_filter 가 주어지면 해당 국가만."""
|
|
topics_by_country: dict[str, list[TopicResponse]] = {}
|
|
for t in sorted(digest.topics, key=lambda x: (x.country, x.topic_rank)):
|
|
if country_filter and t.country != country_filter:
|
|
continue
|
|
topics_by_country.setdefault(t.country, []).append(
|
|
TopicResponse(
|
|
topic_rank=t.topic_rank,
|
|
topic_label=t.topic_label,
|
|
summary=t.summary,
|
|
article_ids=list(t.article_ids or []),
|
|
article_count=t.article_count,
|
|
importance_score=t.importance_score,
|
|
raw_weight_sum=t.raw_weight_sum,
|
|
llm_fallback_used=t.llm_fallback_used,
|
|
)
|
|
)
|
|
|
|
countries = [
|
|
CountryGroup(country=c, topics=topics_by_country[c])
|
|
for c in sorted(topics_by_country.keys())
|
|
]
|
|
|
|
return DigestResponse(
|
|
digest_date=digest.digest_date,
|
|
window_start=digest.window_start,
|
|
window_end=digest.window_end,
|
|
decay_lambda=digest.decay_lambda,
|
|
total_articles=digest.total_articles,
|
|
total_countries=digest.total_countries,
|
|
total_topics=digest.total_topics,
|
|
generation_ms=digest.generation_ms,
|
|
llm_calls=digest.llm_calls,
|
|
llm_failures=digest.llm_failures,
|
|
status=digest.status,
|
|
countries=countries,
|
|
)
|
|
|
|
|
|
async def _load_digest(
|
|
session: AsyncSession,
|
|
target_date: date_type | None,
|
|
) -> GlobalDigest | None:
|
|
"""date 가 주어지면 해당 날짜, 아니면 최신 digest 1건."""
|
|
query = select(GlobalDigest).options(selectinload(GlobalDigest.topics))
|
|
if target_date is not None:
|
|
query = query.where(GlobalDigest.digest_date == target_date)
|
|
else:
|
|
query = query.order_by(GlobalDigest.digest_date.desc())
|
|
query = query.limit(1)
|
|
result = await session.execute(query)
|
|
return result.scalar_one_or_none()
|
|
|
|
|
|
# ─── Routes ───
|
|
|
|
|
|
@router.get("/latest", response_model=DigestResponse)
|
|
async def get_latest(
|
|
user: Annotated[User, Depends(get_current_user)],
|
|
session: Annotated[AsyncSession, Depends(get_session)],
|
|
):
|
|
"""가장 최근 생성된 global digest."""
|
|
digest = await _load_digest(session, target_date=None)
|
|
if digest is None:
|
|
raise HTTPException(status_code=404, detail="아직 생성된 digest 없음")
|
|
return _build_response(digest)
|
|
|
|
|
|
@router.get("", response_model=DigestResponse)
|
|
async def get_digest(
|
|
user: Annotated[User, Depends(get_current_user)],
|
|
session: Annotated[AsyncSession, Depends(get_session)],
|
|
date: date_type | None = Query(default=None, description="YYYY-MM-DD (KST)"),
|
|
country: str | None = Query(default=None, description="국가 코드 (예: KR)"),
|
|
):
|
|
"""특정 날짜 또는 국가 필터링된 digest. date 미지정 시 최신."""
|
|
digest = await _load_digest(session, target_date=date)
|
|
if digest is None:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=f"digest 없음 (date={date})" if date else "아직 생성된 digest 없음",
|
|
)
|
|
country_filter = country.upper() if country else None
|
|
return _build_response(digest, country_filter=country_filter)
|
|
|
|
|
|
@router.post("/regenerate")
|
|
async def regenerate(
|
|
user: Annotated[User, Depends(get_current_user)],
|
|
):
|
|
"""디버그용 수동 트리거 — 백그라운드 태스크로 워커 실행 (auth 필요)."""
|
|
from workers.digest_worker import run
|
|
|
|
asyncio.create_task(run())
|
|
return {"status": "started", "message": "global_digest 워커 백그라운드 실행 시작"}
|