feat(digest): Phase 4 Global News Digest (cluster-level batch summarization)
7일 rolling window 뉴스를 country × topic 2-level로 묶어 매일 04:00 KST 배치 생성.
search 파이프라인 미사용. documents → clustering → cluster-level LLM summarization → digest.
핵심 결정:
- adaptive threshold (0.75/0.78/0.80) + EMA centroid (α=0.7) + time-decay (λ=ln(2)/3)
- min_articles=3, max_topics=10/country, top-5 MMR diversity, ai_summary[:300] truncate
- cluster-level LLM only, drop금지 fallback (topic_label="주요 뉴스 묶음" + top member ai_summary[:200])
- importance_score country별 0~1 normalize + raw_weight_sum 별도 보존, max(score, 0.01) floor
- per-call timeout 25s + pipeline hard cap 600s
- DELETE+INSERT idempotent (UNIQUE digest_date), AIClient._call_chat 직접 호출 (client.py 수정 없음)
신규:
- migrations/101_global_digests.sql (2테이블 정규화)
- app/models/digest.py (GlobalDigest + DigestTopic ORM)
- app/services/digest/{loader,clustering,selection,summarizer,pipeline}.py
- app/workers/digest_worker.py (PIPELINE_HARD_CAP + CLI 진입점)
- app/api/digest.py (/latest, ?date|country, /regenerate, inline Pydantic)
- app/prompts/digest_topic.txt (JSON-only + 절대 금지 블록)
main.py 4줄: import 2 + scheduler add_job 1 + include_router 1.
plan: ~/.claude/plans/quiet-herding-tome.md
This commit is contained in:
1
app/services/digest/__init__.py
Normal file
1
app/services/digest/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Phase 4 Global Digest 서비스 레이어 — 7일 뉴스 batch clustering + summarization."""
|
||||
118
app/services/digest/clustering.py
Normal file
118
app/services/digest/clustering.py
Normal file
@@ -0,0 +1,118 @@
|
||||
"""Time-decay weight + adaptive threshold + EMA centroid greedy clustering.
|
||||
|
||||
플랜의 핵심 결정:
|
||||
- λ = ln(2)/3 (3일 반감기)
|
||||
- threshold: 0.75 / 0.78 / 0.80 (밀도 기반 adaptive)
|
||||
- centroid: EMA α=0.7 (단순 평균의 seed bias / drift 방어)
|
||||
- min_articles_per_topic = 3, max_topics_per_country = 10
|
||||
- importance_score: country 내 0~1 normalize + max(score, 0.01) floor
|
||||
- raw_weight_sum 별도 보존 (cross-day 트렌드 분석용)
|
||||
"""
|
||||
|
||||
import math
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import numpy as np
|
||||
|
||||
from core.utils import setup_logger
|
||||
|
||||
logger = setup_logger("digest_clustering")
|
||||
|
||||
LAMBDA = math.log(2) / 3 # 3일 반감기 — 사용자 확정값
|
||||
CENTROID_ALPHA = 0.7 # EMA: 기존 중심 70% 유지, 새 멤버 30% 반영
|
||||
MIN_ARTICLES_PER_TOPIC = 3
|
||||
MAX_TOPICS_PER_COUNTRY = 10
|
||||
SCORE_FLOOR = 0.01 # UI 0 표시 문제 사전 차단
|
||||
|
||||
|
||||
def adaptive_threshold(n_docs: int) -> float:
|
||||
"""문서 밀도 기반 동적 threshold — fragmentation/blob 동시 방어."""
|
||||
if n_docs > 200:
|
||||
return 0.80
|
||||
if n_docs < 50:
|
||||
return 0.75
|
||||
return 0.78
|
||||
|
||||
|
||||
def _normalize(v: np.ndarray) -> np.ndarray:
|
||||
norm = float(np.linalg.norm(v))
|
||||
if norm == 0.0:
|
||||
return v
|
||||
return v / norm
|
||||
|
||||
|
||||
def _decay_weight(now: datetime, created_at: datetime) -> float:
|
||||
"""exp(-λ * days_ago). created_at 이 naive 면 UTC 가정."""
|
||||
if created_at.tzinfo is None:
|
||||
created_at = created_at.replace(tzinfo=timezone.utc)
|
||||
days = (now - created_at).total_seconds() / 86400.0
|
||||
if days < 0:
|
||||
days = 0.0
|
||||
return math.exp(-LAMBDA * days)
|
||||
|
||||
|
||||
def cluster_country(country: str, docs: list[dict]) -> list[dict]:
|
||||
"""단일 country 의 docs 를 cluster 로 묶어 정렬 + normalize 후 반환.
|
||||
|
||||
Args:
|
||||
country: 국가 코드 (KR, US, ...)
|
||||
docs: loader.load_news_window 의 출력 (단일 country 슬라이스)
|
||||
|
||||
Returns:
|
||||
[{centroid, members, weight_sum, raw_weight_sum, importance_score}, ...]
|
||||
- members 는 weight 가 채워진 doc dict 리스트
|
||||
- 정렬: importance_score 내림차순, 최대 MAX_TOPICS_PER_COUNTRY 개
|
||||
"""
|
||||
if not docs:
|
||||
logger.info(f"[{country}] docs=0 → skip")
|
||||
return []
|
||||
|
||||
threshold = adaptive_threshold(len(docs))
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# time-decay weight 계산 + 가중치 높은 순으로 seed 우선
|
||||
for d in docs:
|
||||
d["weight"] = _decay_weight(now, d["created_at"])
|
||||
docs.sort(key=lambda d: -d["weight"])
|
||||
|
||||
clusters: list[dict] = []
|
||||
for d in docs:
|
||||
v = _normalize(d["embedding"])
|
||||
best_idx, best_sim = -1, 0.0
|
||||
for i, c in enumerate(clusters):
|
||||
sim = float(np.dot(c["centroid"], v))
|
||||
if sim > best_sim and sim >= threshold:
|
||||
best_sim, best_idx = sim, i
|
||||
if best_idx >= 0:
|
||||
c = clusters[best_idx]
|
||||
# EMA centroid update — drift 방지
|
||||
c["centroid"] = CENTROID_ALPHA * c["centroid"] + (1.0 - CENTROID_ALPHA) * v
|
||||
c["centroid"] = _normalize(c["centroid"])
|
||||
c["members"].append(d)
|
||||
c["weight_sum"] += d["weight"]
|
||||
else:
|
||||
clusters.append({
|
||||
"centroid": v,
|
||||
"members": [d],
|
||||
"weight_sum": d["weight"],
|
||||
})
|
||||
|
||||
raw_count = len(clusters)
|
||||
clusters = [c for c in clusters if len(c["members"]) >= MIN_ARTICLES_PER_TOPIC]
|
||||
dropped = raw_count - len(clusters)
|
||||
clusters.sort(key=lambda c: -c["weight_sum"])
|
||||
clusters = clusters[:MAX_TOPICS_PER_COUNTRY]
|
||||
|
||||
# country 내 normalize (0~1) + floor
|
||||
if clusters:
|
||||
max_w = max(c["weight_sum"] for c in clusters)
|
||||
for c in clusters:
|
||||
normalized = (c["weight_sum"] / max_w) if max_w > 0 else 0.0
|
||||
c["raw_weight_sum"] = c["weight_sum"]
|
||||
c["importance_score"] = max(normalized, SCORE_FLOOR)
|
||||
|
||||
logger.info(
|
||||
f"[{country}] docs={len(docs)} threshold={threshold} "
|
||||
f"raw_clusters={raw_count} dropped={dropped} kept={len(clusters)}"
|
||||
)
|
||||
return clusters
|
||||
135
app/services/digest/loader.py
Normal file
135
app/services/digest/loader.py
Normal file
@@ -0,0 +1,135 @@
|
||||
"""뉴스 7일 window 로드 + country 정규화
|
||||
|
||||
- documents 테이블엔 country 컬럼이 없으므로 document_chunks.country 를 first non-null 로 조인.
|
||||
- chunk-level country 도 NULL 이면 news_sources.name prefix(ai_sub_group) 매칭으로 fallback.
|
||||
- 그래도 NULL 이면 drop(로그 경고).
|
||||
- ai_summary / embedding 이 NULL 이면 처음부터 제외 (재요약/재임베딩 0회 원칙).
|
||||
"""
|
||||
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
from sqlalchemy import text
|
||||
|
||||
from core.database import async_session
|
||||
from core.utils import setup_logger
|
||||
|
||||
logger = setup_logger("digest_loader")
|
||||
|
||||
|
||||
_NEWS_WINDOW_SQL = text("""
|
||||
SELECT
|
||||
d.id,
|
||||
d.title,
|
||||
d.ai_summary,
|
||||
d.embedding,
|
||||
d.created_at,
|
||||
d.edit_url,
|
||||
d.ai_sub_group,
|
||||
(
|
||||
SELECT c.country
|
||||
FROM document_chunks c
|
||||
WHERE c.doc_id = d.id AND c.country IS NOT NULL
|
||||
LIMIT 1
|
||||
) AS chunk_country
|
||||
FROM documents d
|
||||
WHERE d.source_channel = 'news'
|
||||
AND d.deleted_at IS NULL
|
||||
AND d.created_at >= :window_start
|
||||
AND d.created_at < :window_end
|
||||
AND d.embedding IS NOT NULL
|
||||
AND d.ai_summary IS NOT NULL
|
||||
""")
|
||||
|
||||
|
||||
_SOURCE_COUNTRY_SQL = text("""
|
||||
SELECT name, country FROM news_sources WHERE country IS NOT NULL
|
||||
""")
|
||||
|
||||
|
||||
def _to_numpy_embedding(raw: Any) -> np.ndarray | None:
|
||||
"""pgvector 컬럼을 numpy array(float32)로 정규화."""
|
||||
if raw is None:
|
||||
return None
|
||||
arr = np.asarray(raw, dtype=np.float32)
|
||||
if arr.size == 0:
|
||||
return None
|
||||
return arr
|
||||
|
||||
|
||||
async def _load_source_country_map(session) -> dict[str, str]:
|
||||
"""news_sources name → country 매핑.
|
||||
|
||||
name 은 '경향신문 문화' 형태이고 documents.ai_sub_group 은 '경향신문' (split[0]).
|
||||
prefix 매칭이 가능하도록 첫 토큰 → country 로 인덱싱.
|
||||
"""
|
||||
rows = await session.execute(_SOURCE_COUNTRY_SQL)
|
||||
mapping: dict[str, str] = {}
|
||||
for name, country in rows:
|
||||
if not name or not country:
|
||||
continue
|
||||
prefix = name.split(" ")[0].strip()
|
||||
if prefix and prefix not in mapping:
|
||||
mapping[prefix] = country
|
||||
return mapping
|
||||
|
||||
|
||||
async def load_news_window(
|
||||
window_start: datetime,
|
||||
window_end: datetime,
|
||||
) -> dict[str, list[dict]]:
|
||||
"""주어진 윈도우 안의 뉴스 documents 를 country 별 dict 로 반환.
|
||||
|
||||
Returns:
|
||||
{"KR": [doc_dict, ...], "US": [...], ...}
|
||||
"""
|
||||
docs_by_country: dict[str, list[dict]] = defaultdict(list)
|
||||
null_country_count = 0
|
||||
total = 0
|
||||
|
||||
async with async_session() as session:
|
||||
source_country = await _load_source_country_map(session)
|
||||
|
||||
result = await session.execute(
|
||||
_NEWS_WINDOW_SQL,
|
||||
{"window_start": window_start, "window_end": window_end},
|
||||
)
|
||||
for row in result.mappings():
|
||||
embedding = _to_numpy_embedding(row["embedding"])
|
||||
if embedding is None:
|
||||
continue
|
||||
|
||||
country = row["chunk_country"]
|
||||
if not country:
|
||||
# news_sources prefix fallback
|
||||
ai_sub_group = (row["ai_sub_group"] or "").strip()
|
||||
if ai_sub_group:
|
||||
country = source_country.get(ai_sub_group)
|
||||
if not country:
|
||||
null_country_count += 1
|
||||
continue
|
||||
|
||||
country = country.upper()
|
||||
docs_by_country[country].append({
|
||||
"id": int(row["id"]),
|
||||
"title": row["title"] or "",
|
||||
"ai_summary": row["ai_summary"] or "",
|
||||
"embedding": embedding,
|
||||
"created_at": row["created_at"],
|
||||
"edit_url": row["edit_url"] or "",
|
||||
"ai_sub_group": row["ai_sub_group"] or "",
|
||||
})
|
||||
total += 1
|
||||
|
||||
if null_country_count:
|
||||
logger.warning(
|
||||
f"[loader] country 분류 실패로 drop된 문서 {null_country_count}건 "
|
||||
f"(chunk_country + news_sources fallback 모두 실패)"
|
||||
)
|
||||
logger.info(
|
||||
f"[loader] window {window_start.date()} ~ {window_end.date()} → "
|
||||
f"{total}건 ({len(docs_by_country)}개 국가)"
|
||||
)
|
||||
return dict(docs_by_country)
|
||||
177
app/services/digest/pipeline.py
Normal file
177
app/services/digest/pipeline.py
Normal file
@@ -0,0 +1,177 @@
|
||||
"""Phase 4 digest pipeline orchestration.
|
||||
|
||||
Step:
|
||||
1. AIClient 생성
|
||||
2. 7일 window 로 documents 로드 (loader)
|
||||
3. country 별 cluster_country (clustering)
|
||||
4. cluster 별 select_for_llm (selection)
|
||||
5. cluster 별 summarize_cluster_with_fallback (summarizer, LLM)
|
||||
6. DELETE+INSERT 단일 트랜잭션 (idempotent)
|
||||
7. start/end 로그 + generation_ms + fallback 비율 health metric
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import time
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
from sqlalchemy import delete
|
||||
|
||||
from ai.client import AIClient
|
||||
from core.database import async_session
|
||||
from core.utils import setup_logger
|
||||
from models.digest import DigestTopic, GlobalDigest
|
||||
|
||||
from .clustering import LAMBDA, cluster_country
|
||||
from .loader import load_news_window
|
||||
from .selection import select_for_llm
|
||||
from .summarizer import summarize_cluster_with_fallback
|
||||
|
||||
logger = setup_logger("digest_pipeline")
|
||||
|
||||
WINDOW_DAYS = 7
|
||||
KST = ZoneInfo("Asia/Seoul")
|
||||
|
||||
|
||||
def _kst_today() -> datetime:
|
||||
return datetime.now(KST).date()
|
||||
|
||||
|
||||
def _summary_hash(text: str) -> str:
|
||||
return hashlib.sha256((text or "").encode("utf-8")).hexdigest()[:16]
|
||||
|
||||
|
||||
def _build_topic_row(
|
||||
country: str,
|
||||
rank: int,
|
||||
cluster: dict,
|
||||
selected: list[dict],
|
||||
llm_result: dict,
|
||||
primary_model: str,
|
||||
) -> DigestTopic:
|
||||
"""LLM 결과 + cluster 메타 → DigestTopic ORM 인스턴스.
|
||||
|
||||
article_ids 는 코드가 cluster.members 에서 직접 주입 (LLM 생성 금지 → id 위조 불가).
|
||||
"""
|
||||
article_ids = [int(m["id"]) for m in cluster["members"]]
|
||||
centroid_sample = {
|
||||
"selected_doc_ids": [int(m["id"]) for m in selected],
|
||||
"summary_hashes": [_summary_hash(m.get("ai_summary") or "") for m in selected],
|
||||
}
|
||||
return DigestTopic(
|
||||
country=country,
|
||||
topic_rank=rank,
|
||||
topic_label=llm_result["topic_label"],
|
||||
summary=llm_result["summary"],
|
||||
article_ids=article_ids,
|
||||
article_count=len(article_ids),
|
||||
importance_score=float(cluster["importance_score"]),
|
||||
raw_weight_sum=float(cluster["raw_weight_sum"]),
|
||||
centroid_sample=centroid_sample,
|
||||
llm_model=primary_model,
|
||||
llm_fallback_used=bool(llm_result["llm_fallback_used"]),
|
||||
)
|
||||
|
||||
|
||||
async def run_digest_pipeline() -> dict:
|
||||
"""전체 파이프라인 실행. worker entry 에서 호출.
|
||||
|
||||
Returns:
|
||||
실행 통계 dict {llm_calls, fallback_used, total_topics, generation_ms}
|
||||
"""
|
||||
start = time.time()
|
||||
|
||||
window_end = datetime.now(timezone.utc)
|
||||
window_start = window_end - timedelta(days=WINDOW_DAYS)
|
||||
digest_date = _kst_today()
|
||||
|
||||
logger.info(
|
||||
f"[global_digest] start window={window_start.date()} ~ {window_end.date()} "
|
||||
f"digest_date={digest_date} decay_lambda={LAMBDA:.4f}"
|
||||
)
|
||||
|
||||
docs_by_country = await load_news_window(window_start, window_end)
|
||||
if not docs_by_country:
|
||||
logger.warning("[global_digest] 7일 window에 뉴스 0건 — digest 생성 스킵")
|
||||
return {
|
||||
"llm_calls": 0,
|
||||
"fallback_used": 0,
|
||||
"total_topics": 0,
|
||||
"generation_ms": int((time.time() - start) * 1000),
|
||||
}
|
||||
|
||||
client = AIClient()
|
||||
primary_model = client.ai.primary.model
|
||||
|
||||
all_topic_rows: list[DigestTopic] = []
|
||||
stats = {"llm_calls": 0, "fallback_used": 0}
|
||||
|
||||
try:
|
||||
for country, docs in docs_by_country.items():
|
||||
clusters = cluster_country(country, docs)
|
||||
if not clusters:
|
||||
continue # sparse country 자동 제외
|
||||
|
||||
for rank, cluster in enumerate(clusters, start=1):
|
||||
selected = select_for_llm(cluster)
|
||||
stats["llm_calls"] += 1
|
||||
llm_result = await summarize_cluster_with_fallback(client, cluster, selected)
|
||||
if llm_result["llm_fallback_used"]:
|
||||
stats["fallback_used"] += 1
|
||||
all_topic_rows.append(
|
||||
_build_topic_row(country, rank, cluster, selected, llm_result, primary_model)
|
||||
)
|
||||
finally:
|
||||
await client.close()
|
||||
|
||||
generation_ms = int((time.time() - start) * 1000)
|
||||
total_articles = sum(len(d) for d in docs_by_country.values())
|
||||
countries_with_topics = len({r.country for r in all_topic_rows})
|
||||
|
||||
if stats["fallback_used"] == 0:
|
||||
status = "success"
|
||||
elif stats["llm_calls"] and stats["fallback_used"] / stats["llm_calls"] > 0.5:
|
||||
status = "failed"
|
||||
else:
|
||||
status = "partial"
|
||||
|
||||
async with async_session() as session:
|
||||
# idempotent: 같은 날짜 row 가 있으면 CASCADE 로 topics 까지 삭제
|
||||
await session.execute(
|
||||
delete(GlobalDigest).where(GlobalDigest.digest_date == digest_date)
|
||||
)
|
||||
new_digest = GlobalDigest(
|
||||
digest_date=digest_date,
|
||||
window_start=window_start,
|
||||
window_end=window_end,
|
||||
decay_lambda=LAMBDA,
|
||||
total_articles=total_articles,
|
||||
total_countries=countries_with_topics,
|
||||
total_topics=len(all_topic_rows),
|
||||
generation_ms=generation_ms,
|
||||
llm_calls=stats["llm_calls"],
|
||||
llm_failures=stats["fallback_used"],
|
||||
status=status,
|
||||
)
|
||||
new_digest.topics = all_topic_rows
|
||||
session.add(new_digest)
|
||||
await session.commit()
|
||||
|
||||
fallback_pct = (
|
||||
(stats["fallback_used"] / stats["llm_calls"] * 100.0)
|
||||
if stats["llm_calls"] else 0.0
|
||||
)
|
||||
logger.info(
|
||||
f"[global_digest] done countries={countries_with_topics} "
|
||||
f"topics={len(all_topic_rows)} llm_calls={stats['llm_calls']} "
|
||||
f"fallback={stats['fallback_used']}/{stats['llm_calls']} ({fallback_pct:.2f}%) "
|
||||
f"status={status} elapsed={generation_ms / 1000:.1f}s"
|
||||
)
|
||||
|
||||
return {
|
||||
"llm_calls": stats["llm_calls"],
|
||||
"fallback_used": stats["fallback_used"],
|
||||
"total_topics": len(all_topic_rows),
|
||||
"generation_ms": generation_ms,
|
||||
"status": status,
|
||||
}
|
||||
62
app/services/digest/selection.py
Normal file
62
app/services/digest/selection.py
Normal file
@@ -0,0 +1,62 @@
|
||||
"""Cluster 내 LLM 입력 선정 — top-k + MMR diversity + ai_summary truncate.
|
||||
|
||||
순수 top-relevance 는 동일 사건 중복 요약문에 편향되므로 MMR 로 다양성 확보.
|
||||
ai_summary 길이는 LLM 토큰 보호를 위해 SUMMARY_TRUNCATE 로 제한.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
K_PER_CLUSTER = 5
|
||||
LAMBDA_MMR = 0.7 # relevance 70% / diversity 30%
|
||||
SUMMARY_TRUNCATE = 300 # long tail ai_summary 방어
|
||||
|
||||
|
||||
def _normalize(v: np.ndarray) -> np.ndarray:
|
||||
norm = float(np.linalg.norm(v))
|
||||
if norm == 0.0:
|
||||
return v
|
||||
return v / norm
|
||||
|
||||
|
||||
def select_for_llm(cluster: dict, k: int = K_PER_CLUSTER) -> list[dict]:
|
||||
"""cluster 내 LLM 호출용 대표 article 들 선정.
|
||||
|
||||
Args:
|
||||
cluster: clustering.cluster_country 결과 단일 cluster
|
||||
k: 선정 개수 (기본 5)
|
||||
|
||||
Returns:
|
||||
선정된 doc dict 리스트. 각 항목에 ai_summary_truncated 필드가 추가됨.
|
||||
"""
|
||||
members = cluster["members"]
|
||||
if len(members) <= k:
|
||||
selected = list(members)
|
||||
else:
|
||||
centroid = cluster["centroid"]
|
||||
# relevance = centroid 유사도 × decay weight
|
||||
for m in members:
|
||||
v = _normalize(m["embedding"])
|
||||
m["_rel"] = float(np.dot(centroid, v)) * m["weight"]
|
||||
|
||||
first = max(members, key=lambda x: x["_rel"])
|
||||
selected = [first]
|
||||
candidates = [m for m in members if m is not first]
|
||||
|
||||
while len(selected) < k and candidates:
|
||||
def mmr_score(c: dict) -> float:
|
||||
v = _normalize(c["embedding"])
|
||||
max_sim = max(
|
||||
float(np.dot(v, _normalize(s["embedding"])))
|
||||
for s in selected
|
||||
)
|
||||
return LAMBDA_MMR * c["_rel"] - (1.0 - LAMBDA_MMR) * max_sim
|
||||
|
||||
pick = max(candidates, key=mmr_score)
|
||||
selected.append(pick)
|
||||
candidates.remove(pick)
|
||||
|
||||
# LLM 입력 토큰 보호
|
||||
for m in selected:
|
||||
m["ai_summary_truncated"] = (m.get("ai_summary") or "")[:SUMMARY_TRUNCATE]
|
||||
|
||||
return selected
|
||||
123
app/services/digest/summarizer.py
Normal file
123
app/services/digest/summarizer.py
Normal file
@@ -0,0 +1,123 @@
|
||||
"""Cluster-level LLM 호출 + JSON 파싱 + timeout + drop금지 fallback.
|
||||
|
||||
핵심 결정:
|
||||
- AIClient._call_chat 직접 호출 (client.py 수정 회피, fallback 로직 재사용)
|
||||
- Semaphore(1) 로 MLX 과부하 회피
|
||||
- Per-call timeout 25초 (asyncio.wait_for) — MLX hang/Ollama stall 방어
|
||||
- JSON 파싱 실패 → 1회 재시도 → 그래도 실패 시 minimal fallback (drop 금지)
|
||||
- fallback: topic_label="주요 뉴스 묶음", summary = top member ai_summary[:200]
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from ai.client import parse_json_response
|
||||
from core.utils import setup_logger
|
||||
|
||||
logger = setup_logger("digest_summarizer")
|
||||
|
||||
LLM_CALL_TIMEOUT = 25 # 초. MLX 평균 5초 + tail latency 마진
|
||||
FALLBACK_SUMMARY_LIMIT = 200
|
||||
|
||||
_llm_sem = asyncio.Semaphore(1)
|
||||
|
||||
_PROMPT_PATH = Path(__file__).resolve().parent.parent.parent / "prompts" / "digest_topic.txt"
|
||||
_PROMPT_TEMPLATE: str | None = None
|
||||
|
||||
|
||||
def _load_prompt() -> str:
|
||||
global _PROMPT_TEMPLATE
|
||||
if _PROMPT_TEMPLATE is None:
|
||||
_PROMPT_TEMPLATE = _PROMPT_PATH.read_text(encoding="utf-8")
|
||||
return _PROMPT_TEMPLATE
|
||||
|
||||
|
||||
def build_prompt(selected: list[dict]) -> str:
|
||||
"""digest_topic.txt 템플릿에 selected article들의 ai_summary_truncated 주입.
|
||||
|
||||
템플릿 placeholder: {articles_block}
|
||||
"""
|
||||
template = _load_prompt()
|
||||
lines = []
|
||||
for i, m in enumerate(selected, start=1):
|
||||
text = (m.get("ai_summary_truncated") or m.get("ai_summary") or m.get("title") or "").strip()
|
||||
lines.append(f"[{i}] {text}")
|
||||
articles_block = "\n".join(lines)
|
||||
return template.replace("{articles_block}", articles_block)
|
||||
|
||||
|
||||
async def _try_call_llm(client: Any, prompt: str) -> str:
|
||||
"""Semaphore + per-call timeout 으로 감싼 단일 호출."""
|
||||
async with _llm_sem:
|
||||
return await asyncio.wait_for(
|
||||
client._call_chat(client.ai.primary, prompt),
|
||||
timeout=LLM_CALL_TIMEOUT,
|
||||
)
|
||||
|
||||
|
||||
def _make_fallback(cluster: dict) -> dict:
|
||||
"""cluster 의 top member 데이터로 minimal fallback 생성 — 정보 손실 회피."""
|
||||
members = cluster["members"]
|
||||
if not members:
|
||||
return {
|
||||
"topic_label": "주요 뉴스 묶음",
|
||||
"summary": "",
|
||||
"llm_fallback_used": True,
|
||||
}
|
||||
top = max(members, key=lambda m: m.get("_rel", m.get("weight", 0.0)))
|
||||
text = (top.get("ai_summary") or top.get("title") or "").strip()
|
||||
return {
|
||||
"topic_label": "주요 뉴스 묶음",
|
||||
"summary": text[:FALLBACK_SUMMARY_LIMIT],
|
||||
"llm_fallback_used": True,
|
||||
}
|
||||
|
||||
|
||||
async def summarize_cluster_with_fallback(
|
||||
client: Any,
|
||||
cluster: dict,
|
||||
selected: list[dict],
|
||||
) -> dict:
|
||||
"""cluster 1개에 대해 LLM 호출 + JSON 파싱 + fallback.
|
||||
|
||||
Returns:
|
||||
{topic_label, summary, llm_fallback_used}
|
||||
"""
|
||||
prompt = build_prompt(selected)
|
||||
|
||||
for attempt in range(2): # 1회 재시도 포함
|
||||
try:
|
||||
raw = await _try_call_llm(client, prompt)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(
|
||||
f"LLM 호출 timeout {LLM_CALL_TIMEOUT}s "
|
||||
f"(attempt={attempt + 1}, cluster size={len(cluster['members'])})"
|
||||
)
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"LLM 호출 실패 attempt={attempt + 1} "
|
||||
f"(cluster size={len(cluster['members'])}): {e}"
|
||||
)
|
||||
continue
|
||||
|
||||
parsed = parse_json_response(raw)
|
||||
if (
|
||||
parsed
|
||||
and isinstance(parsed.get("topic_label"), str)
|
||||
and isinstance(parsed.get("summary"), str)
|
||||
and parsed["topic_label"].strip()
|
||||
and parsed["summary"].strip()
|
||||
):
|
||||
return {
|
||||
"topic_label": parsed["topic_label"].strip(),
|
||||
"summary": parsed["summary"].strip(),
|
||||
"llm_fallback_used": False,
|
||||
}
|
||||
logger.warning(
|
||||
f"JSON 파싱 실패 attempt={attempt + 1} "
|
||||
f"(cluster size={len(cluster['members'])}, raw_len={len(raw) if raw else 0})"
|
||||
)
|
||||
|
||||
return _make_fallback(cluster)
|
||||
Reference in New Issue
Block a user