d31ea8ff25
GET /published/processing-status (Bearer, read-only, pull-through) — build_overview 재사용 + source_health⋈news_sources 요약(by_circuit_state·problems). 저장 X(라이브), 소비자 2~3s timeout 책임. P1-4: MAINTENANCE_MODE/NOTE 플래그 동봉 — 소프트락/점검이 워커 멈춰 수치 정체 시 뷰어가 배너로 구분(표면 != 데이터). 검증: 무토큰 401·유효 200 (overview+sources 67 closed+maintenance off). docsrv-viewer-publish (plan P1-2/P1-4). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
255 lines
10 KiB
Python
255 lines
10 KiB
Python
"""발행 read API (docsrv-viewer-publish P0-2) — 뷰어가 pull-sync 로 당기는 feed.
|
|
|
|
published 테이블(발행 워커가 rev 커밋순 gapless 부여)을 rev 커서로 페이지네이션해 반환.
|
|
뷰어 = Bearer(settings.viewer_sync_token) 인증, default-deny. read-only(SELECT 만).
|
|
GET /published/feed?since={rev}&kind={kind}&limit={n}
|
|
rev > since 행을 rev ASC 로 limit 만큼. kind 옵션(study_question|study_explanation|... 후속).
|
|
tombstone(deleted=true)도 1급 이벤트로 포함 — 뷰어가 pub_id 로 로컬 삭제(stale 회피).
|
|
|
|
rev 커서 안전성: 워커가 pg_advisory_xact_lock 단일 라이터로 배치 rev 를 한 트랜잭션에
|
|
부여·커밋 → 리더는 rev N 을 N-1 없이 보지 못함(부분가시 0). 뷰어는 next_since 로 반복.
|
|
|
|
엔벨로프 schema_version = 전송 계약 버전(payload 행별 schema_version 과 별개).
|
|
미지원 버전 가시거부는 뷰어 책임(no-silent-fallback) — 여기선 행별 schema_version 그대로 전달.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import hmac
|
|
import logging
|
|
import logging
|
|
from datetime import datetime, timezone
|
|
|
|
from fastapi import APIRouter, Depends, Header, HTTPException, Query
|
|
from pydantic import BaseModel
|
|
from sqlalchemy import select, text
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from core.config import settings
|
|
from core.database import async_session
|
|
from models.published import Published
|
|
from models.published import Published
|
|
from services.queue_overview import build_overview
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
router = APIRouter()
|
|
|
|
# feed 엔벨로프(전송 계약) 버전 — payload schema_version 과 독립.
|
|
FEED_SCHEMA_VERSION = 1
|
|
DEFAULT_LIMIT = 200
|
|
MAX_LIMIT = 500
|
|
|
|
|
|
def _verify_token(authorization: str | None = Header(default=None)) -> None:
|
|
"""뷰어↔DS 발행 채널 Bearer 인증. default-deny(미설정=503). 상수시간 비교(internal_study 정본).
|
|
|
|
이 토큰은 정답 포함 study payload 를 노출하므로 hmac.compare_digest 로 timing side-channel 차단.
|
|
"""
|
|
if not settings.viewer_sync_token:
|
|
raise HTTPException(status_code=503, detail="viewer_sync_token not configured")
|
|
if not authorization or not authorization.lower().startswith("bearer "):
|
|
raise HTTPException(status_code=401, detail="missing Bearer token")
|
|
token = authorization[7:].strip()
|
|
if not hmac.compare_digest(token, settings.viewer_sync_token):
|
|
raise HTTPException(status_code=403, detail="invalid token")
|
|
|
|
|
|
async def _session() -> AsyncSession:
|
|
async with async_session() as s:
|
|
yield s
|
|
|
|
|
|
class FeedItem(BaseModel):
|
|
pub_id: str # opaque+stable = 뷰어 dedup키 = progress키
|
|
kind: str
|
|
source_id: int # DS 내부 소스 행 id (ingest write-back 역해소용, P2)
|
|
rev: int
|
|
deleted: bool # tombstone — 뷰어 로컬 삭제 트리거
|
|
schema_version: int # payload 모양 버전(뷰어 range 수용)
|
|
payload: dict # render-ready projection (tombstone 이면 {})
|
|
|
|
|
|
class FeedResponse(BaseModel):
|
|
schema_version: int # 엔벨로프(전송 계약) 버전
|
|
items: list[FeedItem]
|
|
next_since: int # 다음 호출 since (이 배치 max rev; 빈 배치면 입력 since 유지)
|
|
has_more: bool # limit 가득 = 더 있을 수 있음(뷰어 반복)
|
|
|
|
|
|
@router.get("/feed", response_model=FeedResponse)
|
|
async def published_feed(
|
|
since: int = Query(0, ge=0),
|
|
kind: str | None = Query(None, max_length=40),
|
|
limit: int = Query(DEFAULT_LIMIT, ge=1, le=MAX_LIMIT),
|
|
_auth: None = Depends(_verify_token),
|
|
session: AsyncSession = Depends(_session),
|
|
):
|
|
"""rev > since 행을 rev ASC 로 limit 만큼 반환. 뷰어가 next_since 로 incremental pull."""
|
|
stmt = select(Published).where(Published.rev > since)
|
|
if kind:
|
|
stmt = stmt.where(Published.kind == kind)
|
|
stmt = stmt.order_by(Published.rev.asc()).limit(limit)
|
|
rows = (await session.execute(stmt)).scalars().all()
|
|
|
|
items = [
|
|
FeedItem(
|
|
pub_id=r.pub_id,
|
|
kind=r.kind,
|
|
source_id=r.source_id,
|
|
rev=r.rev,
|
|
deleted=r.deleted,
|
|
schema_version=r.schema_version,
|
|
payload=r.payload if r.payload is not None else {},
|
|
)
|
|
for r in rows
|
|
]
|
|
next_since = items[-1].rev if items else since
|
|
has_more = len(rows) == limit
|
|
logger.info(
|
|
"published_feed since=%s kind=%s returned=%s next_since=%s has_more=%s",
|
|
since, kind, len(items), next_since, has_more,
|
|
)
|
|
return FeedResponse(
|
|
schema_version=FEED_SCHEMA_VERSION,
|
|
items=items,
|
|
next_since=next_since,
|
|
has_more=has_more,
|
|
)
|
|
|
|
|
|
# ── P1-1: 뉴스/다이제스트 발행 read API (docsrv-viewer-publish) ────────────────────
|
|
# global_digests(일간 컨테이너) + digest_topics(토픽 N, digest_id FK) -> render-ready
|
|
# read-time projection. content-type 파라미터화(plan r2): version 커서=global_digests.id
|
|
# (일간 단일 라이터라 gapless 불요·gap 무해) · pub_id=date-as-id(admin-gated feed 라 opacity
|
|
# 불필요) · tombstone 없음(다이제스트 미삭제). 엔벨로프는 /feed 와 동일(FeedResponse)=뷰어 재사용.
|
|
# scaffold-first: DIGEST_PUBLISH_ENABLED off(기본)=503(명시적 미가동, no-silent).
|
|
DIGEST_PAYLOAD_SCHEMA_VERSION = 1
|
|
|
|
|
|
@router.get("/digest", response_model=FeedResponse)
|
|
async def published_digest(
|
|
since: int = Query(0, ge=0),
|
|
limit: int = Query(DEFAULT_LIMIT, ge=1, le=MAX_LIMIT),
|
|
_auth: None = Depends(_verify_token),
|
|
session: AsyncSession = Depends(_session),
|
|
):
|
|
"""global_digests.id > since 를 id ASC 로 limit 만큼. 각 digest 에 topics 조인해 render-ready 반환."""
|
|
if not settings.digest_publish_enabled:
|
|
raise HTTPException(status_code=503, detail="digest publish not enabled (scaffold)")
|
|
|
|
drows = (await session.execute(
|
|
text(
|
|
"SELECT id, digest_date, status, total_articles, total_topics, total_countries, created_at "
|
|
"FROM global_digests WHERE id > :since ORDER BY id ASC LIMIT :limit"
|
|
),
|
|
{"since": since, "limit": limit},
|
|
)).mappings().all()
|
|
|
|
if not drows:
|
|
return FeedResponse(schema_version=FEED_SCHEMA_VERSION, items=[], next_since=since, has_more=False)
|
|
|
|
ids = [r["id"] for r in drows]
|
|
trows = (await session.execute(
|
|
text(
|
|
"SELECT digest_id, topic_rank, topic_label, summary, country, article_count, importance_score "
|
|
"FROM digest_topics WHERE digest_id = ANY(:ids) ORDER BY digest_id ASC, topic_rank ASC"
|
|
),
|
|
{"ids": ids},
|
|
)).mappings().all()
|
|
|
|
topics_by_digest: dict[int, list[dict]] = {}
|
|
for t in trows:
|
|
topics_by_digest.setdefault(t["digest_id"], []).append({
|
|
"rank": t["topic_rank"],
|
|
"label": t["topic_label"],
|
|
"summary": t["summary"],
|
|
"country": t["country"],
|
|
"article_count": t["article_count"],
|
|
"importance": t["importance_score"],
|
|
})
|
|
|
|
items = []
|
|
for r in drows:
|
|
d_date = r["digest_date"].isoformat() if r["digest_date"] else None
|
|
items.append(FeedItem(
|
|
pub_id=f"digest:{d_date}",
|
|
kind="digest",
|
|
source_id=r["id"],
|
|
rev=r["id"],
|
|
deleted=False,
|
|
schema_version=DIGEST_PAYLOAD_SCHEMA_VERSION,
|
|
payload={
|
|
"digest_date": d_date,
|
|
"status": r["status"],
|
|
"total_articles": r["total_articles"],
|
|
"total_topics": r["total_topics"],
|
|
"total_countries": r["total_countries"],
|
|
"generated_at": r["created_at"].isoformat() if r["created_at"] else None,
|
|
"topics": topics_by_digest.get(r["id"], []),
|
|
},
|
|
))
|
|
next_since = items[-1].rev
|
|
has_more = len(drows) == limit
|
|
logger.info(
|
|
"published_digest since=%s returned=%s next_since=%s has_more=%s",
|
|
since, len(items), next_since, has_more,
|
|
)
|
|
return FeedResponse(
|
|
schema_version=FEED_SCHEMA_VERSION,
|
|
items=items,
|
|
next_since=next_since,
|
|
has_more=has_more,
|
|
)
|
|
|
|
|
|
# ── P1-2: 가공현황 라이브 스냅샷 API (+P1-4 점검 플래그) ──────────────────────────
|
|
# 뷰어 리포트 '문서 가공현황' 섹션용. build_overview(기존 서비스) 재사용 + source_health
|
|
# 조인 요약. pull-through(저장 X) — 라이브 수치라 캐시 없음, 소비자(뷰어)가 2~3s timeout 책임
|
|
# (plan P1-2). P1-4: maintenance 플래그 동봉 — 소프트락/점검이 워커를 멈춰 수치가 정체로
|
|
# 보일 때 뷰어가 '점검·실험 중' 배너로 구분(표면 != 데이터). read-only.
|
|
@router.get("/processing-status")
|
|
async def published_processing_status(
|
|
_auth: None = Depends(_verify_token),
|
|
session: AsyncSession = Depends(_session),
|
|
):
|
|
"""가공현황 스냅샷: queue overview + source_health 요약 + maintenance 플래그."""
|
|
overview = await build_overview(session)
|
|
|
|
sh_rows = (await session.execute(text(
|
|
"SELECT ns.name, ns.category, sh.circuit_state, sh.consecutive_failures, sh.empty_streak, "
|
|
"sh.last_success_at, sh.last_probe_ok "
|
|
"FROM source_health sh JOIN news_sources ns ON ns.id = sh.source_id "
|
|
"ORDER BY (sh.circuit_state <> 'closed') DESC, sh.consecutive_failures DESC"
|
|
))).mappings().all()
|
|
|
|
by_state: dict[str, int] = {}
|
|
problems: list[dict] = []
|
|
for r in sh_rows:
|
|
st = r["circuit_state"]
|
|
by_state[st] = by_state.get(st, 0) + 1
|
|
if st != "closed":
|
|
problems.append({
|
|
"name": r["name"],
|
|
"category": r["category"],
|
|
"circuit_state": st,
|
|
"consecutive_failures": r["consecutive_failures"],
|
|
"empty_streak": r["empty_streak"],
|
|
"last_success_at": r["last_success_at"].isoformat() if r["last_success_at"] else None,
|
|
"last_probe_ok": r["last_probe_ok"],
|
|
})
|
|
|
|
return {
|
|
"schema_version": 1,
|
|
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
"overview": overview,
|
|
"sources": {
|
|
"total": len(sh_rows),
|
|
"by_circuit_state": by_state,
|
|
"problems": problems,
|
|
},
|
|
"maintenance": {
|
|
"active": settings.maintenance_mode,
|
|
"note": settings.maintenance_note,
|
|
},
|
|
}
|