feat(digest): Phase 4 Global News Digest (cluster-level batch summarization)
7일 rolling window 뉴스를 country × topic 2-level로 묶어 매일 04:00 KST 배치 생성.
search 파이프라인 미사용. documents → clustering → cluster-level LLM summarization → digest.
핵심 결정:
- adaptive threshold (0.75/0.78/0.80) + EMA centroid (α=0.7) + time-decay (λ=ln(2)/3)
- min_articles=3, max_topics=10/country, top-5 MMR diversity, ai_summary[:300] truncate
- cluster-level LLM only, drop금지 fallback (topic_label="주요 뉴스 묶음" + top member ai_summary[:200])
- importance_score country별 0~1 normalize + raw_weight_sum 별도 보존, max(score, 0.01) floor
- per-call timeout 25s + pipeline hard cap 600s
- DELETE+INSERT idempotent (UNIQUE digest_date), AIClient._call_chat 직접 호출 (client.py 수정 없음)
신규:
- migrations/101_global_digests.sql (2테이블 정규화)
- app/models/digest.py (GlobalDigest + DigestTopic ORM)
- app/services/digest/{loader,clustering,selection,summarizer,pipeline}.py
- app/workers/digest_worker.py (PIPELINE_HARD_CAP + CLI 진입점)
- app/api/digest.py (/latest, ?date|country, /regenerate, inline Pydantic)
- app/prompts/digest_topic.txt (JSON-only + 절대 금지 블록)
main.py 4줄: import 2 + scheduler add_job 1 + include_router 1.
plan: ~/.claude/plans/quiet-herding-tome.md
This commit is contained in:
87
app/models/digest.py
Normal file
87
app/models/digest.py
Normal file
@@ -0,0 +1,87 @@
|
||||
"""global_digests + digest_topics 테이블 ORM (Phase 4)"""
|
||||
|
||||
from datetime import date, datetime
|
||||
|
||||
from sqlalchemy import (
|
||||
BigInteger,
|
||||
Boolean,
|
||||
Date,
|
||||
DateTime,
|
||||
Float,
|
||||
ForeignKey,
|
||||
Integer,
|
||||
String,
|
||||
Text,
|
||||
)
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from core.database import Base
|
||||
|
||||
|
||||
class GlobalDigest(Base):
|
||||
"""하루 단위 digest run 메타데이터"""
|
||||
|
||||
__tablename__ = "global_digests"
|
||||
|
||||
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
|
||||
digest_date: Mapped[date] = mapped_column(Date, nullable=False, unique=True)
|
||||
window_start: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False)
|
||||
window_end: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False)
|
||||
decay_lambda: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
|
||||
total_articles: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
|
||||
total_countries: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
|
||||
total_topics: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
|
||||
|
||||
generation_ms: Mapped[int | None] = mapped_column(Integer)
|
||||
llm_calls: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
|
||||
llm_failures: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
|
||||
status: Mapped[str] = mapped_column(String(20), nullable=False, default="success")
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), nullable=False, default=datetime.now
|
||||
)
|
||||
|
||||
topics: Mapped[list["DigestTopic"]] = relationship(
|
||||
back_populates="digest",
|
||||
cascade="all, delete-orphan",
|
||||
order_by="DigestTopic.country, DigestTopic.topic_rank",
|
||||
)
|
||||
|
||||
|
||||
class DigestTopic(Base):
|
||||
"""country × topic 단위 cluster 결과"""
|
||||
|
||||
__tablename__ = "digest_topics"
|
||||
|
||||
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
|
||||
digest_id: Mapped[int] = mapped_column(
|
||||
BigInteger,
|
||||
ForeignKey("global_digests.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
country: Mapped[str] = mapped_column(String(10), nullable=False)
|
||||
topic_rank: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
|
||||
topic_label: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
summary: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
|
||||
article_ids: Mapped[list] = mapped_column(JSONB, nullable=False)
|
||||
article_count: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
|
||||
importance_score: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
raw_weight_sum: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
|
||||
centroid_sample: Mapped[dict | None] = mapped_column(JSONB)
|
||||
llm_model: Mapped[str | None] = mapped_column(String(100))
|
||||
llm_fallback_used: Mapped[bool] = mapped_column(
|
||||
Boolean, nullable=False, default=False
|
||||
)
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), nullable=False, default=datetime.now
|
||||
)
|
||||
|
||||
digest: Mapped["GlobalDigest"] = relationship(back_populates="topics")
|
||||
Reference in New Issue
Block a user