Files
hyungi_document_server/app/api/published.py
T
hyungi 85e98db71c feat(publish): P1-1 digest projection — global_digests/digest_topics → render-ready feed
/published/digest 가 read-time projection 반환: version 커서=global_digests.id
(일간 단일라이터 gapless 불요) · pub_id=digest:<date>(date-as-id) · tombstone 없음.
각 digest 에 digest_topics(rank/label/summary/country/article_count/importance) 조인.
엔벨로프 FeedResponse 재사용(뷰어 pull-sync 공용). DIGEST_PUBLISH_ENABLED 점등(host .env).
검증: since=70 → rev71/72 실데이터(49·54 토픽) · since=72 → 빈 배치 next_since 유지(증분 정확).
docsrv-viewer-publish 트랙 (plan viewer-daily-report P1-1).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-25 21:43:44 +00:00

199 lines
7.9 KiB
Python

"""발행 read API (docsrv-viewer-publish P0-2) — 뷰어가 pull-sync 로 당기는 feed.
published 테이블(발행 워커가 rev 커밋순 gapless 부여)을 rev 커서로 페이지네이션해 반환.
뷰어 = Bearer(settings.viewer_sync_token) 인증, default-deny. read-only(SELECT 만).
GET /published/feed?since={rev}&kind={kind}&limit={n}
rev > since 행을 rev ASC 로 limit 만큼. kind 옵션(study_question|study_explanation|... 후속).
tombstone(deleted=true)도 1급 이벤트로 포함 — 뷰어가 pub_id 로 로컬 삭제(stale 회피).
rev 커서 안전성: 워커가 pg_advisory_xact_lock 단일 라이터로 배치 rev 를 한 트랜잭션에
부여·커밋 → 리더는 rev N 을 N-1 없이 보지 못함(부분가시 0). 뷰어는 next_since 로 반복.
엔벨로프 schema_version = 전송 계약 버전(payload 행별 schema_version 과 별개).
미지원 버전 가시거부는 뷰어 책임(no-silent-fallback) — 여기선 행별 schema_version 그대로 전달.
"""
from __future__ import annotations
import hmac
import logging
from fastapi import APIRouter, Depends, Header, HTTPException, Query
from pydantic import BaseModel
from sqlalchemy import select, text
from sqlalchemy.ext.asyncio import AsyncSession
from core.config import settings
from core.database import async_session
from models.published import Published
logger = logging.getLogger(__name__)
router = APIRouter()
# feed 엔벨로프(전송 계약) 버전 — payload schema_version 과 독립.
FEED_SCHEMA_VERSION = 1
DEFAULT_LIMIT = 200
MAX_LIMIT = 500
def _verify_token(authorization: str | None = Header(default=None)) -> None:
"""뷰어↔DS 발행 채널 Bearer 인증. default-deny(미설정=503). 상수시간 비교(internal_study 정본).
이 토큰은 정답 포함 study payload 를 노출하므로 hmac.compare_digest 로 timing side-channel 차단.
"""
if not settings.viewer_sync_token:
raise HTTPException(status_code=503, detail="viewer_sync_token not configured")
if not authorization or not authorization.lower().startswith("bearer "):
raise HTTPException(status_code=401, detail="missing Bearer token")
token = authorization[7:].strip()
if not hmac.compare_digest(token, settings.viewer_sync_token):
raise HTTPException(status_code=403, detail="invalid token")
async def _session() -> AsyncSession:
async with async_session() as s:
yield s
class FeedItem(BaseModel):
pub_id: str # opaque+stable = 뷰어 dedup키 = progress키
kind: str
source_id: int # DS 내부 소스 행 id (ingest write-back 역해소용, P2)
rev: int
deleted: bool # tombstone — 뷰어 로컬 삭제 트리거
schema_version: int # payload 모양 버전(뷰어 range 수용)
payload: dict # render-ready projection (tombstone 이면 {})
class FeedResponse(BaseModel):
schema_version: int # 엔벨로프(전송 계약) 버전
items: list[FeedItem]
next_since: int # 다음 호출 since (이 배치 max rev; 빈 배치면 입력 since 유지)
has_more: bool # limit 가득 = 더 있을 수 있음(뷰어 반복)
@router.get("/feed", response_model=FeedResponse)
async def published_feed(
since: int = Query(0, ge=0),
kind: str | None = Query(None, max_length=40),
limit: int = Query(DEFAULT_LIMIT, ge=1, le=MAX_LIMIT),
_auth: None = Depends(_verify_token),
session: AsyncSession = Depends(_session),
):
"""rev > since 행을 rev ASC 로 limit 만큼 반환. 뷰어가 next_since 로 incremental pull."""
stmt = select(Published).where(Published.rev > since)
if kind:
stmt = stmt.where(Published.kind == kind)
stmt = stmt.order_by(Published.rev.asc()).limit(limit)
rows = (await session.execute(stmt)).scalars().all()
items = [
FeedItem(
pub_id=r.pub_id,
kind=r.kind,
source_id=r.source_id,
rev=r.rev,
deleted=r.deleted,
schema_version=r.schema_version,
payload=r.payload if r.payload is not None else {},
)
for r in rows
]
next_since = items[-1].rev if items else since
has_more = len(rows) == limit
logger.info(
"published_feed since=%s kind=%s returned=%s next_since=%s has_more=%s",
since, kind, len(items), next_since, has_more,
)
return FeedResponse(
schema_version=FEED_SCHEMA_VERSION,
items=items,
next_since=next_since,
has_more=has_more,
)
# ── P1-1: 뉴스/다이제스트 발행 read API (docsrv-viewer-publish) ────────────────────
# global_digests(일간 컨테이너) + digest_topics(토픽 N, digest_id FK) -> render-ready
# read-time projection. content-type 파라미터화(plan r2): version 커서=global_digests.id
# (일간 단일 라이터라 gapless 불요·gap 무해) · pub_id=date-as-id(admin-gated feed 라 opacity
# 불필요) · tombstone 없음(다이제스트 미삭제). 엔벨로프는 /feed 와 동일(FeedResponse)=뷰어 재사용.
# scaffold-first: DIGEST_PUBLISH_ENABLED off(기본)=503(명시적 미가동, no-silent).
DIGEST_PAYLOAD_SCHEMA_VERSION = 1
@router.get("/digest", response_model=FeedResponse)
async def published_digest(
since: int = Query(0, ge=0),
limit: int = Query(DEFAULT_LIMIT, ge=1, le=MAX_LIMIT),
_auth: None = Depends(_verify_token),
session: AsyncSession = Depends(_session),
):
"""global_digests.id > since 를 id ASC 로 limit 만큼. 각 digest 에 topics 조인해 render-ready 반환."""
if not settings.digest_publish_enabled:
raise HTTPException(status_code=503, detail="digest publish not enabled (scaffold)")
drows = (await session.execute(
text(
"SELECT id, digest_date, status, total_articles, total_topics, total_countries, created_at "
"FROM global_digests WHERE id > :since ORDER BY id ASC LIMIT :limit"
),
{"since": since, "limit": limit},
)).mappings().all()
if not drows:
return FeedResponse(schema_version=FEED_SCHEMA_VERSION, items=[], next_since=since, has_more=False)
ids = [r["id"] for r in drows]
trows = (await session.execute(
text(
"SELECT digest_id, topic_rank, topic_label, summary, country, article_count, importance_score "
"FROM digest_topics WHERE digest_id = ANY(:ids) ORDER BY digest_id ASC, topic_rank ASC"
),
{"ids": ids},
)).mappings().all()
topics_by_digest: dict[int, list[dict]] = {}
for t in trows:
topics_by_digest.setdefault(t["digest_id"], []).append({
"rank": t["topic_rank"],
"label": t["topic_label"],
"summary": t["summary"],
"country": t["country"],
"article_count": t["article_count"],
"importance": t["importance_score"],
})
items = []
for r in drows:
d_date = r["digest_date"].isoformat() if r["digest_date"] else None
items.append(FeedItem(
pub_id=f"digest:{d_date}",
kind="digest",
source_id=r["id"],
rev=r["id"],
deleted=False,
schema_version=DIGEST_PAYLOAD_SCHEMA_VERSION,
payload={
"digest_date": d_date,
"status": r["status"],
"total_articles": r["total_articles"],
"total_topics": r["total_topics"],
"total_countries": r["total_countries"],
"generated_at": r["created_at"].isoformat() if r["created_at"] else None,
"topics": topics_by_digest.get(r["id"], []),
},
))
next_since = items[-1].rev
has_more = len(drows) == limit
logger.info(
"published_digest since=%s returned=%s next_since=%s has_more=%s",
since, len(items), next_since, has_more,
)
return FeedResponse(
schema_version=FEED_SCHEMA_VERSION,
items=items,
next_since=next_since,
has_more=has_more,
)