a82b0724df
2026-06-11 맥미니 모델 교체(Gemma4 26B→Qwen3.6-27B-6bit, 콜당 ~90~300s)의 타임아웃 상향 sweep 이 config.yaml/synthesis 만 갱신하고 digest/briefing 코드의 하드코딩 LLM_CALL_TIMEOUT=25(빠른 Gemma 기준)를 누락 → digest 600s 하드캡 초과로 06-10 이후 미생성, briefing 4/4 LLM 폴백(status=failed). (적대 리뷰로 블로커 정정: concurrency=1 사설 세마포로는 digest 44~68 클러스터가 하드캡에 여전히 걸림 + llm_gate 영구 룰 위반.) - 타임아웃·재시도·하드캡을 config.pipeline 단일소스로 이관(digest_llm_timeout_s=300, attempts=2, pipeline_hard_cap_s=3000). 다음 모델 교체 때 재발 차단. - digest/briefing LLM 호출을 사설 Semaphore 제거하고 전역 MLX gate(BACKGROUND) 경유로 변경 — llm_gate 영구 룰(같은 endpoint 단일 게이트, 새 Semaphore 금지) 준수 + ask/eid(FOREGROUND)와 조율. 동시성 lever = 기존 mlx_gate_concurrency 2→4 (continuous batching 실측 — 3동시콜 wall 121s ≈ 단일콜, 직렬 대비 ~3배). - digest/briefing pipeline cluster 루프를 asyncio.gather 동시 실행으로 전환 (실동시성은 게이트가 제한, rank/순서 보존). - deep_summary(70~300s)를 메인 consume_queue 에서 분리해 consume_deep_queue 신설 (markdown/fast split 선례) — 단일 deep 호출이 1분 틱 초과로 메인 큐를 영구 coalesce 시키던 문제 제거. - 죽은 PIPELINE_HARD_CAP=600(briefing/pipeline.py) 제거, summarizer docstring 갱신, deep 컨슈머 disjoint/hold 테스트 추가. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
274 lines
9.8 KiB
Python
274 lines
9.8 KiB
Python
"""야간 수집 뉴스 브리핑 파이프라인 (Plan §"PR-MorningBriefing-1 Backend").
|
|
|
|
흐름: load_night_window → cluster_global → select_for_llm (k=7) →
|
|
(옵션) historical retrieval → compare_cluster_with_fallback → DB save.
|
|
|
|
regenerate 정책: briefing_date UNIQUE 충돌 시 transaction 안에서 DELETE+INSERT.
|
|
"""
|
|
|
|
import asyncio
|
|
import time
|
|
from datetime import date, datetime, timedelta, timezone
|
|
from typing import Any
|
|
from zoneinfo import ZoneInfo
|
|
|
|
from sqlalchemy import delete
|
|
|
|
from ai.client import AIClient
|
|
from core.database import async_session
|
|
from core.utils import setup_logger
|
|
from models.briefing import BriefingTopic, MorningBriefing
|
|
from services.briefing.clustering import LAMBDA, cluster_global
|
|
from services.briefing.comparator import (
|
|
HISTORICAL_WINDOW_DAYS,
|
|
compare_cluster_with_fallback,
|
|
historical_enabled,
|
|
retrieve_historical,
|
|
)
|
|
from services.briefing.loader import load_historical_candidates, load_night_window
|
|
from services.digest.selection import select_for_llm
|
|
|
|
logger = setup_logger("briefing_pipeline")
|
|
|
|
KST = ZoneInfo("Asia/Seoul")
|
|
NIGHT_WINDOW_HOURS = 5 # KST 00:00 ~ 05:00
|
|
SELECT_K = 7 # Plan §"Clustering 파라미터" briefing K_PER_CLUSTER=7
|
|
SELECT_LAMBDA_MMR = 0.6 # Plan briefing MMR lambda 0.6
|
|
|
|
|
|
def _compute_window(target_date: date | None = None) -> tuple[datetime, datetime, date]:
|
|
"""target_date (KST 자정 시작일) → (window_start_utc, window_end_utc, kst_date).
|
|
|
|
target_date=None 시 오늘 KST.
|
|
"""
|
|
if target_date is None:
|
|
target_date = datetime.now(KST).date()
|
|
start_kst = datetime.combine(target_date, datetime.min.time(), tzinfo=KST)
|
|
end_kst = start_kst + timedelta(hours=NIGHT_WINDOW_HOURS)
|
|
return start_kst.astimezone(timezone.utc), end_kst.astimezone(timezone.utc), target_date
|
|
|
|
|
|
def _is_usable_topic(envelope: dict, topic_label: str) -> bool:
|
|
"""fallback row 가 아닌 진짜 LLM 결과인지 판정."""
|
|
if envelope.get("llm_fallback_used"):
|
|
return False
|
|
if not envelope.get("country_perspectives"):
|
|
return False
|
|
if topic_label == "주요 뉴스 묶음":
|
|
return False
|
|
return True
|
|
|
|
|
|
def _compute_status(llm_calls: int, fallback_count: int, usable_count: int, has_topics: bool) -> str:
|
|
"""Plan §"Status 4-state 판정표"."""
|
|
if not has_topics or llm_calls == 0:
|
|
return "empty"
|
|
if usable_count == 0:
|
|
return "failed"
|
|
fallback_pct = (fallback_count / llm_calls) if llm_calls else 0.0
|
|
if fallback_pct >= 0.5:
|
|
return "failed"
|
|
if fallback_count > 0 or usable_count < llm_calls:
|
|
return "partial"
|
|
return "success"
|
|
|
|
|
|
def _build_topic_row(
|
|
rank: int,
|
|
cluster: dict,
|
|
envelope: dict,
|
|
historical_docs: list[dict] | None,
|
|
primary_model: str,
|
|
) -> BriefingTopic:
|
|
historical_ids = None
|
|
historical_window = None
|
|
if historical_enabled():
|
|
historical_ids = [d["id"] for d in (historical_docs or [])]
|
|
historical_window = HISTORICAL_WINDOW_DAYS
|
|
|
|
return BriefingTopic(
|
|
topic_rank=rank,
|
|
topic_label=envelope["topic_label"],
|
|
headline=envelope["headline"],
|
|
country_perspectives=envelope["country_perspectives"],
|
|
divergences=envelope["divergences"],
|
|
convergences=envelope["convergences"],
|
|
key_quotes=envelope["key_quotes"],
|
|
historical_article_ids=historical_ids,
|
|
historical_context=envelope.get("historical_context"),
|
|
historical_window_days=historical_window,
|
|
cluster_members=[m["id"] for m in cluster["members"]],
|
|
article_count=len(cluster["members"]),
|
|
country_count=cluster.get("country_count", 0),
|
|
importance_score=cluster.get("importance_score", 0.0),
|
|
raw_weight_sum=cluster.get("raw_weight_sum", 0.0),
|
|
llm_model=primary_model,
|
|
llm_fallback_used=envelope.get("llm_fallback_used", False),
|
|
)
|
|
|
|
|
|
async def _save_briefing(
|
|
briefing_date: date,
|
|
window_start: datetime,
|
|
window_end: datetime,
|
|
total_articles: int,
|
|
total_countries: int,
|
|
topic_rows: list[BriefingTopic],
|
|
llm_calls: int,
|
|
llm_failures: int,
|
|
generation_ms: int,
|
|
status: str,
|
|
) -> int:
|
|
"""briefing_date UNIQUE 충돌은 DELETE+INSERT transaction 으로 처리."""
|
|
async with async_session() as session:
|
|
await session.execute(
|
|
delete(MorningBriefing).where(MorningBriefing.briefing_date == briefing_date)
|
|
)
|
|
new = MorningBriefing(
|
|
briefing_date=briefing_date,
|
|
window_start=window_start,
|
|
window_end=window_end,
|
|
decay_lambda=LAMBDA,
|
|
total_articles=total_articles,
|
|
total_countries=total_countries,
|
|
total_topics=len(topic_rows),
|
|
generation_ms=generation_ms,
|
|
llm_calls=llm_calls,
|
|
llm_failures=llm_failures,
|
|
status=status,
|
|
)
|
|
new.topics = topic_rows
|
|
session.add(new)
|
|
await session.commit()
|
|
return new.id
|
|
|
|
|
|
async def run_briefing_pipeline(target_date: date | None = None) -> dict[str, Any]:
|
|
"""야간 뉴스 브리핑 1회 실행. cron 또는 수동 regenerate API 에서 호출.
|
|
|
|
Returns:
|
|
{briefing_id, status, total_topics, total_articles, llm_calls, llm_failures, generation_ms, regenerated}
|
|
"""
|
|
start = time.time()
|
|
window_start, window_end, briefing_date = _compute_window(target_date)
|
|
logger.info(
|
|
f"[briefing] start date={briefing_date} window {window_start} ~ {window_end} "
|
|
f"decay_lambda={LAMBDA:.4f} historical={'on' if historical_enabled() else 'off'}"
|
|
)
|
|
|
|
# 1. Load night window
|
|
docs = await load_night_window(window_start, window_end)
|
|
total_articles = len(docs)
|
|
total_countries_in_window = len({d["country"] for d in docs})
|
|
|
|
# 2. Cluster (topic-first)
|
|
clusters = cluster_global(docs)
|
|
|
|
if not clusters:
|
|
briefing_id = await _save_briefing(
|
|
briefing_date=briefing_date,
|
|
window_start=window_start,
|
|
window_end=window_end,
|
|
total_articles=total_articles,
|
|
total_countries=total_countries_in_window,
|
|
topic_rows=[],
|
|
llm_calls=0,
|
|
llm_failures=0,
|
|
generation_ms=int((time.time() - start) * 1000),
|
|
status="empty",
|
|
)
|
|
logger.info(f"[briefing] empty (no usable clusters) → briefing_id={briefing_id}")
|
|
return {
|
|
"briefing_id": briefing_id,
|
|
"status": "empty",
|
|
"total_topics": 0,
|
|
"total_articles": total_articles,
|
|
"llm_calls": 0,
|
|
"llm_failures": 0,
|
|
"generation_ms": int((time.time() - start) * 1000),
|
|
"regenerated": True,
|
|
}
|
|
|
|
# 3. (옵션) Historical candidate pool 1회 로드
|
|
historical_candidates: list[dict] = []
|
|
if historical_enabled():
|
|
hist_end = window_start # 오늘 윈도우 직전까지
|
|
hist_start = hist_end - timedelta(days=HISTORICAL_WINDOW_DAYS)
|
|
exclude = {d["id"] for d in docs}
|
|
historical_candidates = await load_historical_candidates(hist_start, hist_end, exclude)
|
|
|
|
# 4. cluster 별 LLM 호출
|
|
client = AIClient()
|
|
primary_model = client.ai.primary.model
|
|
topic_rows: list[BriefingTopic] = []
|
|
llm_calls = 0
|
|
llm_failures = 0
|
|
usable_count = 0
|
|
|
|
try:
|
|
# 2026-06-15: cluster 호출 gather 동시 실행. 실동시성 = 전역 MLX gate
|
|
# (config.mlx_gate_concurrency, BACKGROUND 우선순위). rank/순서 보존.
|
|
jobs = []
|
|
for rank, cluster in enumerate(clusters, start=1):
|
|
selected = select_for_llm(cluster, k=SELECT_K, lambda_mmr=SELECT_LAMBDA_MMR)
|
|
historical_docs = (
|
|
retrieve_historical(cluster, historical_candidates)
|
|
if historical_enabled() else []
|
|
)
|
|
jobs.append((rank, cluster, selected, historical_docs))
|
|
|
|
async def _run_one(cluster, selected, historical_docs):
|
|
return await compare_cluster_with_fallback(
|
|
client, cluster, selected, historical_docs=historical_docs
|
|
)
|
|
|
|
results = await asyncio.gather(
|
|
*[_run_one(c, s, h) for (_, c, s, h) in jobs]
|
|
)
|
|
|
|
for (rank, cluster, selected, historical_docs), envelope in zip(jobs, results):
|
|
llm_calls += 1
|
|
if envelope.get("llm_fallback_used"):
|
|
llm_failures += 1
|
|
if _is_usable_topic(envelope, envelope["topic_label"]):
|
|
usable_count += 1
|
|
topic_rows.append(
|
|
_build_topic_row(rank, cluster, envelope, historical_docs, primary_model)
|
|
)
|
|
finally:
|
|
await client.close()
|
|
|
|
generation_ms = int((time.time() - start) * 1000)
|
|
status = _compute_status(llm_calls, llm_failures, usable_count, has_topics=bool(topic_rows))
|
|
|
|
briefing_id = await _save_briefing(
|
|
briefing_date=briefing_date,
|
|
window_start=window_start,
|
|
window_end=window_end,
|
|
total_articles=total_articles,
|
|
total_countries=total_countries_in_window,
|
|
topic_rows=topic_rows,
|
|
llm_calls=llm_calls,
|
|
llm_failures=llm_failures,
|
|
generation_ms=generation_ms,
|
|
status=status,
|
|
)
|
|
|
|
fallback_pct = (llm_failures / llm_calls * 100.0) if llm_calls else 0.0
|
|
logger.info(
|
|
f"[briefing] done id={briefing_id} status={status} topics={len(topic_rows)} "
|
|
f"usable={usable_count}/{llm_calls} fallback={llm_failures}/{llm_calls} ({fallback_pct:.1f}%) "
|
|
f"elapsed={generation_ms / 1000:.1f}s"
|
|
)
|
|
|
|
return {
|
|
"briefing_id": briefing_id,
|
|
"status": status,
|
|
"total_topics": len(topic_rows),
|
|
"total_articles": total_articles,
|
|
"llm_calls": llm_calls,
|
|
"llm_failures": llm_failures,
|
|
"generation_ms": generation_ms,
|
|
"regenerated": True,
|
|
}
|