fix(migration): fresh DB/DR 부트스트랩 깨짐 3건 수정 (validator 오탐 + multi-statement)

verification env(ephemeral postgres + init_db) 실측으로 fresh DB 부트스트랩이 359~376 replay 중 깨지는 3건 발견·수정: 1. _validate_sql_content 가 인라인 주석(SQL -- ...) 미제거 → 365 의 '-- commit 시 ...' 설명주석을 트랜잭션 제어문 오탐. 줄별 -- 이후 제거. 2. raw '"schema_migrations" in sql.lower()' 체크도 주석 미제외 → 365 의 '-- ... schema_migrations 건드리지 않음' 오탐. _validate_sql_content 로 통합(주석 제외). 3. 마이그 루프가 exec_driver_sql(prepared)이라 multi-statement(365=테이블+시드+인덱스) 불허 → baseline 적재와 동일한 raw asyncpg simple execute 로 통일. (에이전트가 P0로 본 320/326 enum-same-txn 은 오탐 — baseline 0358 이 이미 방어.) Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
refactor(workers): 죽은 코드 law_monitor.py 삭제 (367줄)
2026-06-27 07:00:32 +09:00 · 2026-06-27 06:29:02 +09:00 · 2026-06-26 20:07:30 +09:00 · 2026-06-26 19:24:42 +09:00 · 2026-06-26 19:24:42 +09:00 · 2026-06-25 22:54:40 +00:00
80 changed files with 3644 additions and 1143 deletions
@@ -47,3 +47,6 @@ caddy_data/
 *.bak_*
 *.pre-*
 .pre-*/
+
+# SQLite 로컬 아티팩트 (Django/툴링 잔재)
+*.sqlite3
@@ -12,6 +12,13 @@ http://document.hyungi.net {
    # 명시 Content-Type match — 기본 match 의 text/* 는 text/event-stream 까지 포함해
    # SSE(/api/eid/chat)의 첫 ~512B 를 gzip 버퍼링함. SSE 제외, 기존 압축 대상은 보존.
    # (응답 매처는 header <필드> <값> 한 쌍씩 — 여러 줄 = OR. 한 줄 다중 값은 파싱 에러)
+    # 2026-06-20 보안 헤더 (M: 클릭재킹·MIME 스니핑 방어). HSTS 는 TLS 종단 edge(home-caddy) 소관.
+    header {
+        X-Content-Type-Options nosniff
+        X-Frame-Options SAMEORIGIN
+        Referrer-Policy strict-origin-when-cross-origin
+        -Server
+    }
    encode {
        gzip
        match {
@@ -1,5 +1,6 @@
 """AI 추상화 레이어 — 통합 클라이언트. 기본값은 항상 Qwen3.5."""

+import asyncio
 import json
 import re
 from pathlib import Path
@@ -188,6 +189,25 @@ def _load_prompt(name: str) -> str:
 CLASSIFY_PROMPT = _load_prompt("classify.txt") if (PROMPTS_DIR / "classify.txt").exists() else ""


+# 공유 httpx 클라이언트 — 호출마다 AsyncClient 를 새로 만들던 것(30+ 사이트, 연결풀 재사용 0)을
+# 일원화해 keep-alive 재사용. 이벤트루프 바인딩이라 루프 변경(pytest 격리 등) 시 재생성한다.
+# close() 는 공유 풀이라 no-op — 프로세스 종료 시 GC.
+_shared_http: httpx.AsyncClient | None = None
+_shared_http_loop: object | None = None
+
+
+def _get_shared_http() -> httpx.AsyncClient:
+    global _shared_http, _shared_http_loop
+    try:
+        loop: object | None = asyncio.get_running_loop()
+    except RuntimeError:
+        loop = None
+    if _shared_http is None or _shared_http.is_closed or _shared_http_loop is not loop:
+        _shared_http = httpx.AsyncClient(timeout=120)
+        _shared_http_loop = loop
+    return _shared_http
+
+
 class AIClient:
    """AI 모델 통합 클라이언트.

@@ -202,7 +222,7 @@ class AIClient:

    def __init__(self):
        self.ai = settings.ai
-        self._http = httpx.AsyncClient(timeout=120)
+        self._http = _get_shared_http()

    # ─── 3-tier routing (B-0) ───────────────────────────────────────────────

@@ -240,6 +260,23 @@ class AIClient:
        cfg = self.ai.deep or self.ai.primary
        return await self._request(cfg, prompt, system=system)

+    async def call_classifier(self, prompt: str) -> str:
+        """answerability classifier (config ai.classifier, Mac mini 26B MLX).
+
+        private _request 직접 호출(classifier_service)을 봉인하는 public 진입점. gate 는
+        caller(classifier_service)가 acquire_mlx_gate 로 관리 — call_primary 와 동일한
+        caller-managed 계약(여기서 self-gate 하면 caller 와 double-acquire 데드락).
+        """
+        return await self._request(self.ai.classifier, prompt)
+
+    async def call_verifier(self, prompt: str) -> str:
+        """semantic verifier (config ai.verifier, Mac mini 26B MLX).
+
+        private _request 직접 호출(verifier_service)을 봉인. gate 는 caller(verifier_service)
+        가 관리(caller-managed — self-gate 금지).
+        """
+        return await self._request(self.ai.verifier, prompt)
+
    # ─── Legacy API (classify_worker 교체 시 제거 예정) ───────────────────

    async def classify(self, text: str, cfg=None) -> dict:
@@ -346,6 +383,10 @@ class AIClient:
                payload["temperature"] = model_config.temperature
            if model_config.top_p is not None:
                payload["top_p"] = model_config.top_p
+            if model_config.repetition_penalty is not None:
+                payload["repetition_penalty"] = model_config.repetition_penalty
+            if model_config.top_k is not None:
+                payload["top_k"] = model_config.top_k
            response = await self._http.post(
                model_config.endpoint,
                json=payload,
@@ -356,4 +397,5 @@ class AIClient:
            return data["choices"][0]["message"]["content"]

    async def close(self):
-        await self._http.aclose()
+        # 공유 풀(_get_shared_http) 이라 per-use close 안 함 — 연결 재사용. 프로세스 종료 시 GC.
+        return None
@@ -672,6 +672,71 @@ async def list_duplicates(
    )


+class ClauseHit(BaseModel):
+    doc_id: int
+    doc_title: str
+    section_title: str | None = None
+    char_start: int | None = None
+    chunk_id: int
+    node_type: str | None = None
+
+
+class ClauseLookupResponse(BaseModel):
+    label: str
+    hits: list[ClauseHit]
+
+
+# NOTE: '/{doc_id}' (int path param) 라우트보다 먼저 선언해야 '/clause-lookup' 이 doc_id 로
+# 잘못 매칭되지 않는다 (FastAPI 선언 순서 매칭). 이동 금지.
+@router.get("/clause-lookup", response_model=ClauseLookupResponse)
+async def clause_lookup(
+    label: str,
+    user: Annotated[User, Depends(get_current_user)],
+    session: Annotated[AsyncSession, Depends(get_session)],
+):
+    """절 식별자(예: UG-79)로 크로스-doc 절 위치 조회 — 'UG-79 보여줘' 진입점 (U-1).
+
+    절(node_type=clause/clause_split)은 in_corpus=false(검색 비활성)라 의미검색으론 못 찾으므로,
+    라벨 prefix 정확매칭으로 (doc, char_start) 를 직접 해소해 읽기뷰 점프를 가능케 한다.
+    대부분 1건; 부록(A-/E-/F-) 등 doc 간 공유 라벨만 다중 반환(에디션 선택). /sections 와 동일하게
+    document_chunks 직접 조회 — corpus_chunks 우회는 retrieval 아닌 정확지목이므로 의도적 예외.
+    """
+    from sqlalchemy import text as sql_text
+
+    lab = (label or "").strip()
+    if not lab:
+        return ClauseLookupResponse(label=label, hits=[])
+    rows = (
+        await session.execute(
+            sql_text(
+                """
+                SELECT c.doc_id, d.title AS doc_title, c.section_title, c.char_start, c.node_type,
+                       -- 점프 타깃 = outline(/sections: is_leaf 또는 %_split)에 있는 chunk 여야 딥링크 동작.
+                       -- 자신이 그러면 자신, 아니면(컨테이너 절: 자식 heading 보유·is_leaf=false) 문서순서상
+                       -- 자신 이후 첫 딥링크 가능 chunk(=그 절 내용 시작)로 해소. 그래도 없으면 자신(폴백).
+                       COALESCE(
+                         CASE WHEN c.is_leaf = true OR c.node_type LIKE '%\\_split' ESCAPE '\\' THEN c.id END,
+                         (SELECT ch.id FROM document_chunks ch
+                          WHERE ch.doc_id = c.doc_id AND ch.source_type = 'hier_section'
+                            AND ch.chunk_index >= c.chunk_index
+                            AND (ch.is_leaf = true OR ch.node_type LIKE '%\\_split' ESCAPE '\\')
+                          ORDER BY ch.chunk_index LIMIT 1),
+                         c.id
+                       ) AS chunk_id
+                FROM document_chunks c
+                JOIN documents d ON d.id = c.doc_id
+                WHERE c.node_type IN ('clause', 'clause_split')
+                  AND (c.section_title ILIKE :lab_sp OR c.section_title ILIKE :lab_eq)
+                  AND d.deleted_at IS NULL
+                ORDER BY c.doc_id, c.char_start NULLS LAST
+                LIMIT 50
+                """
+            ).bindparams(lab_sp=lab + " %", lab_eq=lab)
+        )
+    ).mappings().all()
+    return ClauseLookupResponse(label=lab, hits=[ClauseHit(**dict(r)) for r in rows])
+
+
@router.get("/{doc_id}", response_model=DocumentDetailResponse)
 async def get_document(
    doc_id: int,
@@ -1166,8 +1231,10 @@ async def upload_document(
            doc.duplicate_of = canonical.id
            canonical.duplicate_count = (canonical.duplicate_count or 0) + 1

-        # document + processing_queue 는 단일 트랜잭션으로 묶어 원자적 정리
-        await enqueue_stage(session, doc.id, "extract")
+        # document + processing_queue 는 단일 트랜잭션으로 묶어 원자적 정리.
+        # G2: 첫 stage=presegment (extract 前 번들 PDF 분할, 후보 A 검증완료 2026-06-18).
+        # 非PDF/단일은 presegment 가 무변 통과 → extract. 번들 PDF 만 N 자식 분할(worker-side gating).
+        await enqueue_stage(session, doc.id, "presegment")
        await session.commit()
    except Exception:
        # DB 예외 시 session 은 get_session 컨텍스트 종료로 자동 rollback.
@@ -1210,6 +1277,14 @@ async def update_document(
        if val is not None and val not in ("business", "knowledge"):
            raise HTTPException(status_code=400, detail="doc_purpose는 business 또는 knowledge만 가능")

+    # edit_url SSRF 가드 (2026-06-20 M1): 내부/메타데이터 주소 후속 fetch 차단 (news.py 동형 검증)
+    if update_data.get("edit_url"):
+        from core.url_validator import validate_feed_url
+        try:
+            await asyncio.to_thread(validate_feed_url, update_data["edit_url"])
+        except Exception as e:
+            raise HTTPException(status_code=400, detail=f"edit_url 검증 실패: {e}")
+
    for field, value in update_data.items():
        setattr(doc, field, value)
    doc.updated_at = datetime.now(timezone.utc)
@@ -1490,7 +1565,7 @@ ANALYZE_PROMPT = (
 )

 ANALYZE_TEXT_LIMIT = 12000  # chars (15000 → 12000, 실측 timeout 빈발)
-ANALYZE_TIMEOUT_S = 60  # 15,000자 입력 + 4층 출력. 실측 7~45초, safety margin 포함
+ANALYZE_TIMEOUT_S = settings.llm_call_timeout_s  # 2026-06-20 config 단일소스 (구 60s=빠른 Gemma)
 ANALYZE_CACHE_TTL_S = 1800  # 30분
 ANALYZE_CACHE_MAXSIZE = 100
 ANALYZE_LAYER_MIN_CHARS = 50  # 이 미만이면 억지 채움으로 보고 제거
@@ -0,0 +1,230 @@
+"""뷰어 write-back ingest (study-to-viewer P2) — 뷰어 로컬 풀이 세션을 DS 로 흘려 finalize 재생.
+
+흐름(plan study-to-viewer-slice1 P2, r2/r3 불변식):
+  뷰어 outbox → POST /ingest/study/attempts (Bearer VIEWER_SYNC_TOKEN, study_ingest_enabled gate)
+  → pub_id→published.source_id→StudyQuestion 해소(부재 graceful skip) → principal=question.user_id
+  → topic 별 그룹(뷰어 subject 퀴즈가 여러 DS topic 걸칠 수 있음) → topic 마다 DS quiz_session
+     (source='viewer', client_session_uuid) 생성 + attempt(derive_outcome=채점 단일 소스) + 세션 done
+  → finalize_session **무수정 재생**(SR/pattern/progress + 4-A/4-B enqueue) → finalized_at 마커
+  → 전부 1 트랜잭션(원자) 후 commit.
+
+멱등(r2 P2-2): client_session_uuid 로 기존 세션 있으면 이미 적재된 것 → 캐시 요약 반환(재실행 0).
+  원자 1-tx 라 'uuid 존재 ⟺ finalize 완료' → at-least-once outbox 재전송에도 SR 이중 advance 없음.
+user_id 리터럴 금지(r2): principal = 해소된 질문의 owner(단일, mixed 면 거부).
+"""
+
+from __future__ import annotations
+
+import hmac
+import logging
+from collections import defaultdict
+from datetime import datetime, timezone
+
+from fastapi import APIRouter, Depends, Header, HTTPException
+from pydantic import BaseModel
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from core.config import settings
+from core.database import async_session
+from models.published import Published
+from models.study_question import StudyQuestion, StudyQuestionAttempt
+from models.study_quiz_session import StudyQuizSession
+from services.study.outcome import derive_outcome
+from services.study.publish_projection import KIND_QUESTION
+from services.study.session_finalize import finalize_session
+
+logger = logging.getLogger(__name__)
+router = APIRouter()
+
+
+def _verify_token(authorization: str | None = Header(default=None)) -> None:
+    """뷰어↔DS 발행 채널 Bearer(read 와 동일 토큰, r3 단일토큰 수용). default-deny(미설정=503)."""
+    if not settings.viewer_sync_token:
+        raise HTTPException(status_code=503, detail="viewer_sync_token not configured")
+    if not authorization or not authorization.lower().startswith("bearer "):
+        raise HTTPException(status_code=401, detail="missing Bearer token")
+    token = authorization[7:].strip()
+    if not hmac.compare_digest(token, settings.viewer_sync_token):
+        raise HTTPException(status_code=403, detail="invalid token")
+
+
+async def _session() -> AsyncSession:
+    async with async_session() as s:
+        yield s
+
+
+class IngestAttempt(BaseModel):
+    question_pub_id: str
+    selected_choice: int | None = None
+    is_unsure: bool = False
+    answered_at: str | None = None  # 클라(오프라인) ISO 시각 — 미래 스큐 클램프, id 가 타이브레이커
+
+
+class IngestBody(BaseModel):
+    client_session_uuid: str
+    attempts: list[IngestAttempt]
+
+
+def _parse_answered_at(s: str | None, now: datetime) -> datetime:
+    if not s:
+        return now
+    try:
+        dt = datetime.fromisoformat(s.replace("Z", "+00:00"))
+        if dt.tzinfo is None:
+            dt = dt.replace(tzinfo=timezone.utc)
+        return min(dt, now)  # 미래 스큐는 now 로 클램프(클라 시계 오염 방지)
+    except Exception:
+        return now
+
+
+@router.post("/attempts")
+async def ingest_attempts(
+    body: IngestBody,
+    _auth: None = Depends(_verify_token),
+    session: AsyncSession = Depends(_session),
+):
+    if not settings.study_ingest_enabled:
+        raise HTTPException(status_code=503, detail="study_ingest not enabled")
+    if not body.client_session_uuid or not body.attempts:
+        raise HTTPException(status_code=400, detail="client_session_uuid 와 attempts 필요")
+
+    # 멱등: 이 uuid 로 이미 적재됐나(원자 1-tx 라 존재=완료). 있으면 캐시 요약 반환(재실행 0).
+    existing = (
+        await session.execute(
+            select(StudyQuizSession).where(
+                StudyQuizSession.client_session_uuid == body.client_session_uuid
+            )
+        )
+    ).scalars().all()
+    if existing:
+        return {
+            "status": "already_ingested",
+            "sessions": [
+                {
+                    "topic_id": s.study_topic_id,
+                    "correct": s.correct_count,
+                    "wrong": s.wrong_count,
+                    "unsure": s.unsure_count,
+                }
+                for s in existing
+            ],
+        }
+
+    # pub_id → source_id(내부 질문 id) 해소. deleted tombstone 제외.
+    pub_ids = list({a.question_pub_id for a in body.attempts})
+    pub_rows = (
+        await session.execute(
+            select(Published.pub_id, Published.source_id).where(
+                Published.kind == KIND_QUESTION,
+                Published.pub_id.in_(pub_ids),
+                Published.deleted.is_(False),
+            )
+        )
+    ).all()
+    src_by_pubid = {r.pub_id: r.source_id for r in pub_rows}
+
+    # 질문 fetch(미삭제). principal = owner(단일).
+    source_ids = list(set(src_by_pubid.values()))
+    q_rows = (
+        await session.execute(
+            select(StudyQuestion).where(
+                StudyQuestion.id.in_(source_ids), StudyQuestion.deleted_at.is_(None)
+            )
+        )
+    ).scalars().all()
+    q_by_id = {q.id: q for q in q_rows}
+    owners = {q.user_id for q in q_by_id.values()}
+    if len(owners) > 1:
+        raise HTTPException(status_code=400, detail="여러 사용자 소유 질문 혼재 — 단일 principal 위반")
+    if not owners:
+        raise HTTPException(status_code=404, detail="해소 가능한 질문 없음")
+    user_id = owners.pop()
+
+    now = datetime.now(timezone.utc)
+
+    # topic 별 그룹(해소 실패 attempt 는 graceful skip). 같은 (uuid, topic) 1 세션.
+    by_topic: dict[int, list[tuple[IngestAttempt, StudyQuestion]]] = defaultdict(list)
+    skipped: list[str] = []
+    for a in body.attempts:
+        src = src_by_pubid.get(a.question_pub_id)
+        q = q_by_id.get(src) if src is not None else None
+        if q is None:
+            skipped.append(a.question_pub_id)
+            continue
+        by_topic[q.study_topic_id].append((a, q))
+    if not by_topic:
+        raise HTTPException(status_code=404, detail="해소된 attempt 없음")
+
+    summaries = []
+    for topic_id, items in by_topic.items():
+        qids = [q.id for (_, q) in items]
+        qs = StudyQuizSession(
+            user_id=user_id,
+            study_topic_id=topic_id,
+            question_ids=qids,
+            subject_distribution={},
+            status="done",
+            cursor=len(qids),
+            source="viewer",
+            client_session_uuid=body.client_session_uuid,
+            finished_at=now,
+            created_at=now,
+            updated_at=now,
+        )
+        session.add(qs)
+        await session.flush()  # qs.id
+
+        c = w = u = 0
+        for a, q in items:
+            try:
+                sel, is_corr, outcome = derive_outcome(a.selected_choice, a.is_unsure, q.correct_choice)
+            except ValueError:
+                skipped.append(a.question_pub_id)  # 선택 없고 unsure 아님 = 무효 → skip
+                continue
+            if outcome == "correct":
+                c += 1
+            elif outcome == "wrong":
+                w += 1
+            elif outcome == "unsure":
+                u += 1
+            session.add(
+                StudyQuestionAttempt(
+                    user_id=user_id,
+                    study_question_id=q.id,
+                    study_topic_id=topic_id,
+                    selected_choice=sel,
+                    correct_choice=q.correct_choice,
+                    is_correct=is_corr,
+                    outcome=outcome,
+                    quiz_session_id=qs.id,
+                    answered_at=_parse_answered_at(a.answered_at, now),
+                )
+            )
+        qs.correct_count, qs.wrong_count, qs.unsure_count = c, w, u
+        await session.flush()
+
+        # finalize 무수정 재생(progress/SR/pattern + 4-A/4-B enqueue). 그 후 멱등 마커.
+        summary = await finalize_session(
+            session, user_id=user_id, study_topic_id=topic_id, quiz_session_id=qs.id
+        )
+        qs.finalized_at = now
+        summaries.append(
+            {
+                "topic_id": topic_id,
+                "quiz_session_id": qs.id,
+                "correct": summary.correct,
+                "wrong": summary.wrong,
+                "unsure": summary.unsure,
+                "newly_correct": summary.newly_correct,
+                "relapsed": summary.relapsed,
+                "recovered": summary.recovered,
+            }
+        )
+
+    await session.commit()
+    logger.info(
+        "study_ingest uuid=%s user=%s sessions=%s skipped=%s",
+        body.client_session_uuid, user_id, len(summaries), len(skipped),
+    )
+    return {"status": "ingested", "skipped": skipped, "sessions": summaries}
@@ -9,7 +9,7 @@ from sqlalchemy import func, select
 from sqlalchemy import text as sql_text
 from sqlalchemy.ext.asyncio import AsyncSession

-from core.auth import get_current_user
+from core.auth import get_current_user, require_admin
 from core.database import get_session
 from core.library import LIBRARY_PREFIX, MAX_DEPTH, normalize_library_path
 from models.category import LibraryCategory
@@ -78,7 +78,7 @@ async def list_categories(
@router.post("/categories", response_model=CategoryResponse, status_code=201)
 async def create_category(
    body: CategoryCreate,
-    user: Annotated[User, Depends(get_current_user)],
+    user: Annotated[User, Depends(require_admin)],
    session: Annotated[AsyncSession, Depends(get_session)],
 ):
    """카테고리 생성 (조상 자동 생성 포함)"""
@@ -133,7 +133,7 @@ async def create_category(
@router.patch("/categories", response_model=CategoryResponse)
 async def rename_category(
    body: CategoryRename,
-    user: Annotated[User, Depends(get_current_user)],
+    user: Annotated[User, Depends(require_admin)],
    session: Annotated[AsyncSession, Depends(get_session)],
 ):
    """카테고리 이름 변경 (leaf only, path 기반 식별)"""
@@ -214,7 +214,7 @@ async def rename_category(
@router.delete("/categories", status_code=204)
 async def delete_category(
    path: str = Query(..., description="삭제할 카테고리 경로"),
-    user: Annotated[User, Depends(get_current_user)] = None,
+    user: Annotated[User, Depends(require_admin)] = None,
    session: Annotated[AsyncSession, Depends(get_session)] = None,
 ):
    """카테고리 삭제 (leaf only, 문서 없는 경우만)"""
@@ -410,7 +410,7 @@ async def get_facet_values(
@router.post("/facets", response_model=FacetValueResponse, status_code=201)
 async def add_facet_value(
    body: FacetValueResponse,
-    user: Annotated[User, Depends(get_current_user)],
+    user: Annotated[User, Depends(require_admin)],
    session: Annotated[AsyncSession, Depends(get_session)],
 ):
    """facet 사전에 새 값 추가"""
@@ -0,0 +1,254 @@
+"""발행 read API (docsrv-viewer-publish P0-2) — 뷰어가 pull-sync 로 당기는 feed.
+
+published 테이블(발행 워커가 rev 커밋순 gapless 부여)을 rev 커서로 페이지네이션해 반환.
+뷰어 = Bearer(settings.viewer_sync_token) 인증, default-deny. read-only(SELECT 만).
+  GET /published/feed?since={rev}&kind={kind}&limit={n}
+    rev > since 행을 rev ASC 로 limit 만큼. kind 옵션(study_question|study_explanation|... 후속).
+    tombstone(deleted=true)도 1급 이벤트로 포함 — 뷰어가 pub_id 로 로컬 삭제(stale 회피).
+
+rev 커서 안전성: 워커가 pg_advisory_xact_lock 단일 라이터로 배치 rev 를 한 트랜잭션에
+부여·커밋 → 리더는 rev N 을 N-1 없이 보지 못함(부분가시 0). 뷰어는 next_since 로 반복.
+
+엔벨로프 schema_version = 전송 계약 버전(payload 행별 schema_version 과 별개).
+미지원 버전 가시거부는 뷰어 책임(no-silent-fallback) — 여기선 행별 schema_version 그대로 전달.
+"""
+from __future__ import annotations
+
+import hmac
+import logging
+import logging
+from datetime import datetime, timezone
+
+from fastapi import APIRouter, Depends, Header, HTTPException, Query
+from pydantic import BaseModel
+from sqlalchemy import select, text
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from core.config import settings
+from core.database import async_session
+from models.published import Published
+from models.published import Published
+from services.queue_overview import build_overview
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter()
+
+# feed 엔벨로프(전송 계약) 버전 — payload schema_version 과 독립.
+FEED_SCHEMA_VERSION = 1
+DEFAULT_LIMIT = 200
+MAX_LIMIT = 500
+
+
+def _verify_token(authorization: str | None = Header(default=None)) -> None:
+    """뷰어↔DS 발행 채널 Bearer 인증. default-deny(미설정=503). 상수시간 비교(internal_study 정본).
+
+    이 토큰은 정답 포함 study payload 를 노출하므로 hmac.compare_digest 로 timing side-channel 차단.
+    """
+    if not settings.viewer_sync_token:
+        raise HTTPException(status_code=503, detail="viewer_sync_token not configured")
+    if not authorization or not authorization.lower().startswith("bearer "):
+        raise HTTPException(status_code=401, detail="missing Bearer token")
+    token = authorization[7:].strip()
+    if not hmac.compare_digest(token, settings.viewer_sync_token):
+        raise HTTPException(status_code=403, detail="invalid token")
+
+
+async def _session() -> AsyncSession:
+    async with async_session() as s:
+        yield s
+
+
+class FeedItem(BaseModel):
+    pub_id: str          # opaque+stable = 뷰어 dedup키 = progress키
+    kind: str
+    source_id: int       # DS 내부 소스 행 id (ingest write-back 역해소용, P2)
+    rev: int
+    deleted: bool        # tombstone — 뷰어 로컬 삭제 트리거
+    schema_version: int  # payload 모양 버전(뷰어 range 수용)
+    payload: dict        # render-ready projection (tombstone 이면 {})
+
+
+class FeedResponse(BaseModel):
+    schema_version: int       # 엔벨로프(전송 계약) 버전
+    items: list[FeedItem]
+    next_since: int           # 다음 호출 since (이 배치 max rev; 빈 배치면 입력 since 유지)
+    has_more: bool            # limit 가득 = 더 있을 수 있음(뷰어 반복)
+
+
+@router.get("/feed", response_model=FeedResponse)
+async def published_feed(
+    since: int = Query(0, ge=0),
+    kind: str | None = Query(None, max_length=40),
+    limit: int = Query(DEFAULT_LIMIT, ge=1, le=MAX_LIMIT),
+    _auth: None = Depends(_verify_token),
+    session: AsyncSession = Depends(_session),
+):
+    """rev > since 행을 rev ASC 로 limit 만큼 반환. 뷰어가 next_since 로 incremental pull."""
+    stmt = select(Published).where(Published.rev > since)
+    if kind:
+        stmt = stmt.where(Published.kind == kind)
+    stmt = stmt.order_by(Published.rev.asc()).limit(limit)
+    rows = (await session.execute(stmt)).scalars().all()
+
+    items = [
+        FeedItem(
+            pub_id=r.pub_id,
+            kind=r.kind,
+            source_id=r.source_id,
+            rev=r.rev,
+            deleted=r.deleted,
+            schema_version=r.schema_version,
+            payload=r.payload if r.payload is not None else {},
+        )
+        for r in rows
+    ]
+    next_since = items[-1].rev if items else since
+    has_more = len(rows) == limit
+    logger.info(
+        "published_feed since=%s kind=%s returned=%s next_since=%s has_more=%s",
+        since, kind, len(items), next_since, has_more,
+    )
+    return FeedResponse(
+        schema_version=FEED_SCHEMA_VERSION,
+        items=items,
+        next_since=next_since,
+        has_more=has_more,
+    )
+
+
+# ── P1-1: 뉴스/다이제스트 발행 read API (docsrv-viewer-publish) ────────────────────
+# global_digests(일간 컨테이너) + digest_topics(토픽 N, digest_id FK) -> render-ready
+# read-time projection. content-type 파라미터화(plan r2): version 커서=global_digests.id
+# (일간 단일 라이터라 gapless 불요·gap 무해) · pub_id=date-as-id(admin-gated feed 라 opacity
+# 불필요) · tombstone 없음(다이제스트 미삭제). 엔벨로프는 /feed 와 동일(FeedResponse)=뷰어 재사용.
+# scaffold-first: DIGEST_PUBLISH_ENABLED off(기본)=503(명시적 미가동, no-silent).
+DIGEST_PAYLOAD_SCHEMA_VERSION = 1
+
+
+@router.get("/digest", response_model=FeedResponse)
+async def published_digest(
+    since: int = Query(0, ge=0),
+    limit: int = Query(DEFAULT_LIMIT, ge=1, le=MAX_LIMIT),
+    _auth: None = Depends(_verify_token),
+    session: AsyncSession = Depends(_session),
+):
+    """global_digests.id > since 를 id ASC 로 limit 만큼. 각 digest 에 topics 조인해 render-ready 반환."""
+    if not settings.digest_publish_enabled:
+        raise HTTPException(status_code=503, detail="digest publish not enabled (scaffold)")
+
+    drows = (await session.execute(
+        text(
+            "SELECT id, digest_date, status, total_articles, total_topics, total_countries, created_at "
+            "FROM global_digests WHERE id > :since ORDER BY id ASC LIMIT :limit"
+        ),
+        {"since": since, "limit": limit},
+    )).mappings().all()
+
+    if not drows:
+        return FeedResponse(schema_version=FEED_SCHEMA_VERSION, items=[], next_since=since, has_more=False)
+
+    ids = [r["id"] for r in drows]
+    trows = (await session.execute(
+        text(
+            "SELECT digest_id, topic_rank, topic_label, summary, country, article_count, importance_score "
+            "FROM digest_topics WHERE digest_id = ANY(:ids) ORDER BY digest_id ASC, topic_rank ASC"
+        ),
+        {"ids": ids},
+    )).mappings().all()
+
+    topics_by_digest: dict[int, list[dict]] = {}
+    for t in trows:
+        topics_by_digest.setdefault(t["digest_id"], []).append({
+            "rank": t["topic_rank"],
+            "label": t["topic_label"],
+            "summary": t["summary"],
+            "country": t["country"],
+            "article_count": t["article_count"],
+            "importance": t["importance_score"],
+        })
+
+    items = []
+    for r in drows:
+        d_date = r["digest_date"].isoformat() if r["digest_date"] else None
+        items.append(FeedItem(
+            pub_id=f"digest:{d_date}",
+            kind="digest",
+            source_id=r["id"],
+            rev=r["id"],
+            deleted=False,
+            schema_version=DIGEST_PAYLOAD_SCHEMA_VERSION,
+            payload={
+                "digest_date": d_date,
+                "status": r["status"],
+                "total_articles": r["total_articles"],
+                "total_topics": r["total_topics"],
+                "total_countries": r["total_countries"],
+                "generated_at": r["created_at"].isoformat() if r["created_at"] else None,
+                "topics": topics_by_digest.get(r["id"], []),
+            },
+        ))
+    next_since = items[-1].rev
+    has_more = len(drows) == limit
+    logger.info(
+        "published_digest since=%s returned=%s next_since=%s has_more=%s",
+        since, len(items), next_since, has_more,
+    )
+    return FeedResponse(
+        schema_version=FEED_SCHEMA_VERSION,
+        items=items,
+        next_since=next_since,
+        has_more=has_more,
+    )
+
+
+# ── P1-2: 가공현황 라이브 스냅샷 API (+P1-4 점검 플래그) ──────────────────────────
+# 뷰어 리포트 '문서 가공현황' 섹션용. build_overview(기존 서비스) 재사용 + source_health
+# 조인 요약. pull-through(저장 X) — 라이브 수치라 캐시 없음, 소비자(뷰어)가 2~3s timeout 책임
+# (plan P1-2). P1-4: maintenance 플래그 동봉 — 소프트락/점검이 워커를 멈춰 수치가 정체로
+# 보일 때 뷰어가 '점검·실험 중' 배너로 구분(표면 != 데이터). read-only.
+@router.get("/processing-status")
+async def published_processing_status(
+    _auth: None = Depends(_verify_token),
+    session: AsyncSession = Depends(_session),
+):
+    """가공현황 스냅샷: queue overview + source_health 요약 + maintenance 플래그."""
+    overview = await build_overview(session)
+
+    sh_rows = (await session.execute(text(
+        "SELECT ns.name, ns.category, sh.circuit_state, sh.consecutive_failures, sh.empty_streak, "
+        "sh.last_success_at, sh.last_probe_ok "
+        "FROM source_health sh JOIN news_sources ns ON ns.id = sh.source_id "
+        "ORDER BY (sh.circuit_state <> 'closed') DESC, sh.consecutive_failures DESC"
+    ))).mappings().all()
+
+    by_state: dict[str, int] = {}
+    problems: list[dict] = []
+    for r in sh_rows:
+        st = r["circuit_state"]
+        by_state[st] = by_state.get(st, 0) + 1
+        if st != "closed":
+            problems.append({
+                "name": r["name"],
+                "category": r["category"],
+                "circuit_state": st,
+                "consecutive_failures": r["consecutive_failures"],
+                "empty_streak": r["empty_streak"],
+                "last_success_at": r["last_success_at"].isoformat() if r["last_success_at"] else None,
+                "last_probe_ok": r["last_probe_ok"],
+            })
+
+    return {
+        "schema_version": 1,
+        "generated_at": datetime.now(timezone.utc).isoformat(),
+        "overview": overview,
+        "sources": {
+            "total": len(sh_rows),
+            "by_circuit_state": by_state,
+            "problems": problems,
+        },
+        "maintenance": {
+            "active": settings.maintenance_mode,
+            "note": settings.maintenance_note,
+        },
+    }
@@ -21,12 +21,14 @@ from sqlalchemy.exc import IntegrityError
 from sqlalchemy.ext.asyncio import AsyncSession

 from core.auth import get_current_user
+from core.config import settings
 from core.database import get_session
 from models.study_memo_card import StudyMemoCard, StudyMemoCardEvidence, record_card_view
 from models.study_memo_card_progress import StudyMemoCardProgress, rate_card
 from models.study_question import StudyQuestion
 from models.user import User
 from services.study.card_normalize import compute_dedup_hash
+from services.study.publish_enqueue import enqueue_card_progress_publish, enqueue_card_publish

 router = APIRouter()

@@ -248,9 +250,18 @@ async def approve_batch(
            StudyMemoCard.needs_review,
        )
        .values(needs_review=False, flagged_by=None, flagged_at=None)
+        .returning(StudyMemoCard.id)
    )
+    approved_ids = list(result.scalars().all())
+    # 방금 검수완료된 카드 발행(같은 tx, flag off 면 no-op). S-2.
+    if settings.study_publish_enabled and approved_ids:
+        cards = (
+            await session.execute(select(StudyMemoCard).where(StudyMemoCard.id.in_(approved_ids)))
+        ).scalars().all()
+        for c in cards:
+            await enqueue_card_publish(session, c)
    await session.commit()
-    return {"approved": result.rowcount or 0}
+    return {"approved": len(approved_ids)}


 # ─── 복습(SR) 트랙 ───
@@ -310,6 +321,9 @@ async def rate(
    if outcome is None:
        raise HTTPException(status_code=422, detail=f"invalid outcome: {body.outcome!r}")
    progress = await rate_card(session, card=card, outcome=outcome, now=datetime.now(timezone.utc))
+    # 카드 SR 상태 발행(같은 tx, flag off=no-op) — ALL row(sentinel/terminal 포함). S-4.
+    if settings.study_publish_enabled:
+        await enqueue_card_progress_publish(session, progress)
    await session.commit()
    return RateResult(
        card_id=card.id, outcome=outcome, review_stage=progress.review_stage, due_at=progress.due_at
@@ -392,6 +406,9 @@ async def update_card(
            card.flagged_by = None
            card.flagged_at = None

+    # 발행 재투영/tombstone(같은 tx) — 검수완료=발행·검수대기복귀=tombstone(상태 기반). S-2.
+    if settings.study_publish_enabled:
+        await enqueue_card_publish(session, card)
    try:
        await session.commit()
    except IntegrityError:
@@ -414,4 +431,7 @@ async def delete_card(
    card = await session.get(StudyMemoCard, card_id)
    card = _verify_card(card, user)
    card.deleted_at = datetime.now(timezone.utc)
+    # 발행 tombstone(같은 tx) — 삭제는 feed 1급 이벤트. S-2.
+    if settings.study_publish_enabled:
+        await enqueue_card_publish(session, card)
    await session.commit()
@@ -39,6 +39,9 @@ from services.study.explanation_rag import (
    gather_explanation_context,
    render_evidence_block,
 )
+from services.study.publish_enqueue import enqueue_publish, enqueue_question_publish
+from services.study.publish_projection import KIND_CARD, KIND_EXPLANATION, KIND_QUESTION
+from services.study.outcome import derive_outcome

 logger = logging.getLogger(__name__)
 router = APIRouter()
@@ -543,6 +546,9 @@ async def create_question_in_topic(
    )
    session.add(q)
    await session.flush()
+    # 발행 outbox 적재(같은 tx, flag off 면 no-op) — 신규 문항 발행. P0-1b.
+    if settings.study_publish_enabled:
+        await enqueue_question_publish(session, q)
    await session.commit()

    stats = QuestionAttemptStats(attempt_count=0, correct_count=0, wrong_count=0)
@@ -905,9 +911,16 @@ async def update_question(
    # 카드는 '구' ai_explanation 에서 추출됐으므로 정정 후 stale 가능 — 즉시 가시화 플래그.
    # 최종 stale 정리는 card_extract 워커의 supersede 가 책임(새 버전 추출 시 구버전 retire).
    if AI_STALE_TRIGGER & fields_set:
-        await flag_cards_for_source(session, source_question_id=q.id, reason="source_changed")
+        flagged_card_ids = await flag_cards_for_source(session, source_question_id=q.id, reason="source_changed")
+        # 발행 자격 잃은(검수대기 복귀) 파생 카드 tombstone(같은 tx). S-2.
+        if settings.study_publish_enabled:
+            for cid in flagged_card_ids:
+                await enqueue_publish(session, kind=KIND_CARD, source_id=cid, payload=None, deleted=True)

    q.updated_at = datetime.now(timezone.utc)
+    # 발행 재투영(같은 tx) — 문항 갱신 반영. 해설은 ready 일 때만 동봉, stale→tombstone 은 P1-3. P0-1b.
+    if settings.study_publish_enabled:
+        await enqueue_question_publish(session, q)
    await session.commit()

    stats = await _attempt_stats(session, user.id, question_id)
@@ -970,7 +983,16 @@ async def soft_delete_question(
    )
    # 공부 암기노트: 소스 문제 삭제 시 파생 암기카드를 검토 대기로 마킹(source_deleted).
    # study_questions 는 soft-delete 만이라 카드 FK CASCADE 는 미발동 — 이 훅이 실 경로.
-    await flag_cards_for_source(session, source_question_id=q.id, reason="source_deleted")
+    flagged_card_ids = await flag_cards_for_source(session, source_question_id=q.id, reason="source_deleted")
+    # 발행 자격 잃은 파생 카드 tombstone(같은 tx). S-2.
+    if settings.study_publish_enabled:
+        for cid in flagged_card_ids:
+            await enqueue_publish(session, kind=KIND_CARD, source_id=cid, payload=None, deleted=True)
+    # 발행 tombstone(같은 tx) — 삭제는 feed 1급 이벤트(raw DELETE 금지·워커 경유). 해설 본문 있으면 그 kind 도. P0-1b.
+    if settings.study_publish_enabled:
+        await enqueue_publish(session, kind=KIND_QUESTION, source_id=q.id, payload=None, deleted=True)
+        if q.ai_explanation:
+            await enqueue_publish(session, kind=KIND_EXPLANATION, source_id=q.id, payload=None, deleted=True)
    await session.commit()


@@ -992,19 +1014,13 @@ async def submit_attempt(
    q = await session.get(StudyQuestion, question_id)
    q = _verify_question_ownership(q, user)

-    if body.is_unsure:
-        selected = None
-        is_correct = False
-        outcome = "unsure"
-    elif body.selected_choice is None:
-        raise HTTPException(
-            status_code=422,
-            detail="selected_choice (1~4) 또는 is_unsure=true 가 필요합니다",
+    # 채점 단일 소스 — 뷰어 ingest 와 동일 함수(P2). 선택 없고 unsure 아니면 422.
+    try:
+        selected, is_correct, outcome = derive_outcome(
+            body.selected_choice, body.is_unsure, q.correct_choice
        )
-    else:
-        selected = body.selected_choice
-        is_correct = selected == q.correct_choice
-        outcome = "correct" if is_correct else "wrong"
+    except ValueError as e:
+        raise HTTPException(status_code=422, detail=str(e))

    # PR-10: 세션 연동. 기본은 None.
    quiz_session: StudyQuizSession | None = None
@@ -1543,8 +1559,8 @@ async def delete_question_image(

 # ─── PR-3: AI 풀이 생성 엔드포인트 ───

-# MLX 호출 timeout (초). MLX gate + 26B 추론 평균 ~10s, 안전 마진.
-LLM_TIMEOUT_S = 30.0
+# 2026-06-20: config 단일소스 (구 하드코딩 30s = 빠른 Gemma 기준).
+LLM_TIMEOUT_S = settings.llm_call_timeout_s
 # 프롬프트 템플릿 lazy load
 _PROMPT_PATH = "study_question_explanation.txt"
 _prompt_cache: str | None = None
@@ -1713,6 +1729,9 @@ async def generate_ai_explanation(
    primary_name = ai_client.ai.primary.model if hasattr(ai_client.ai.primary, "model") else "primary"
    q.ai_explanation_model = f"mlx:{primary_name}"
    q.updated_at = q.ai_explanation_generated_at
+    # 발행 재투영(같은 tx) — 실시간 해설 ready → 문항+해설 발행. P0-1b.
+    if settings.study_publish_enabled:
+        await enqueue_question_publish(session, q)
    await session.commit()

    return AIExplanationResponse(
@@ -33,6 +33,7 @@ from ai.client import AIClient, strip_thinking
 from eid.ai import EidAIClient
 from eid.compose import compose
 from core.auth import get_current_user
+from core.config import settings
 from core.database import get_session
 from core.library import LIBRARY_PREFIX, normalize_library_path
 from models.document import Document
@@ -46,6 +47,8 @@ from models.eid_study_weakness import EidStudyWeakness
 from models.eid_review_set_draft import EidReviewSetDraft
 from models.user import User
 from services.search.llm_gate import Priority, acquire_mlx_gate
+from services.study.publish_enqueue import enqueue_publish, enqueue_topic_publish
+from services.study.publish_projection import KIND_TOPIC
 from services.study.subject_note_rag import (
    SubjectNoteContext,
    gather_subject_note_context,
@@ -466,6 +469,9 @@ async def create_study_topic(
    session.add(topic)
    try:
        await session.flush()
+        # 발행 outbox 적재(같은 tx, flag off 면 no-op) — 신규 주제 발행. S-1.
+        if settings.study_publish_enabled:
+            await enqueue_topic_publish(session, topic)
        await session.commit()
    except IntegrityError:
        await session.rollback()
@@ -695,6 +701,10 @@ async def update_study_topic(
        topic.focused_at = datetime.now(timezone.utc) if body.focused else None

    topic.updated_at = datetime.now(timezone.utc)
+    # 발행 재투영(같은 tx) — 주제 메타 갱신 반영. payload(name·exam_round_size) 무변경(focused 등)
+    # 은 워커 (payload_hash, deleted) 디둡이 rev 안 올리고 흡수 = churn 없음. S-1.
+    if settings.study_publish_enabled:
+        await enqueue_topic_publish(session, topic)
    try:
        await session.commit()
    except IntegrityError:
@@ -770,6 +780,9 @@ async def delete_study_topic(
    )

    topic.deleted_at = datetime.now(timezone.utc)
+    # 발행 tombstone(같은 tx) — 삭제는 feed 1급 이벤트(raw DELETE 금지·워커 경유). S-1.
+    if settings.study_publish_enabled:
+        await enqueue_publish(session, kind=KIND_TOPIC, source_id=topic.id, payload=None, deleted=True)
    await session.commit()


@@ -1015,7 +1028,7 @@ async def detach_session_from_topic(

 # ─── PR-9: 분야 설명 (study_topic_subject_notes) ───

-SUBJECT_NOTE_TIMEOUT_S = 30.0
+SUBJECT_NOTE_TIMEOUT_S = settings.llm_call_timeout_s
 _SUBJECT_NOTE_PROMPT_PATH = "study_subject_note.txt"
 _subject_note_prompt_cache: str | None = None

@@ -1242,7 +1255,7 @@ async def generate_subject_note(
 # 워커(study_weakness)가 산출한 최신 eid_study_weakness 스냅샷을 '학습 진단 코치'(study overlay)
 # 로 번역. 약점/태도 '판정'은 코드 derived(스냅샷) — LLM 은 스냅샷 블록 값만 인용(환각 약점 차단).
 # compose("study_diagnosis") = persona+rules+study overlay(+{placeholder}) → 표면이 블록 substitute.
-DIAGNOSIS_TIMEOUT_S = 40.0
+DIAGNOSIS_TIMEOUT_S = settings.llm_call_timeout_s


 class StudyDiagnosisResponse(BaseModel):
@@ -30,6 +30,11 @@ class AIModelConfig(BaseModel):
    # None = MLX/OpenAI server default. Anthropic branch 는 미적용 (별 plan 범위).
    temperature: float | None = None
    top_p: float | None = None
+    # mlx 네이티브 샘플링 — 한국어 장문 코드스위칭(CJK/라틴 누수)·반복루프 억제용.
+    # Qwen3 권장: top_k=20, repetition_penalty 1.05~1.1. None = 서버 기본값(주입 안 함).
+    # OpenAI 호환 분기(mlx)만 적용 — Anthropic 분기는 미적용(별 범위).
+    repetition_penalty: float | None = None
+    top_k: int | None = None


 class DeepSummaryBacklogConfig(BaseModel):
@@ -176,16 +181,29 @@ class Settings(BaseModel):
    digest_llm_timeout_s: int = 200
    digest_llm_attempts: int = 2
    digest_pipeline_hard_cap_s: int = 1800
+    # 2026-06-20: study/analyze 단일 primary-call 타임아웃 (구 하드코딩 30~60s = 빠른 Gemma 기준,
+    # Qwen 27B 교체 sweep 누락 → 사용자 대면 504 + 워커 영구 stuck). digest 와 동형 단일소스.
+    llm_call_timeout_s: int = 200

    # PR-MacMini-Derived-Worker-1: study explanation owner = Mac mini
    # GPU 측은 false 로 설정 (.env), explanation 분기 skip guard 트리거.
    study_explanation_enabled: bool = True
    # 공부 암기노트 Phase 1: card_extract 폴러/consumer 게이트. owner 분리 시 false 로.
    study_card_extract_enabled: bool = True
+    # 발행 레이어(docsrv-viewer-publish): publish_outbox 워커 게이트. 저자/4-A enqueue 결선(P0-1b) 후 true.
+    study_publish_enabled: bool = False
+    digest_publish_enabled: bool = False  # docsrv-viewer-publish P1-1 (뉴스/다이제스트 발행 feed gate)
+    maintenance_mode: bool = False  # P1-4: 점검/실험 중 = 가공현황 배너(표면 != 데이터)
+    maintenance_note: str = ""
+    # 뷰어 write-back ingest(study-to-viewer P2) 게이트. /ingest/study/attempts 활성. 기본 false=inert(503).
+    study_ingest_enabled: bool = False

    # internal endpoint Bearer token (Mac mini derived-worker 호출용)
    internal_worker_token: str = ""

+    # 뷰어↔DS 발행 채널 Bearer token (publish read API P0-2 + ingest P2). Mac mini 토큰과 분리(폭발반경 격리).
+    viewer_sync_token: str = ""
+

 def load_settings() -> Settings:
    """config.yaml + 환경변수에서 설정 로딩"""
@@ -193,7 +211,13 @@ def load_settings() -> Settings:
    database_url = os.getenv("DATABASE_URL", "")
    study_explanation_enabled = os.getenv("STUDY_EXPLANATION_ENABLED", "true").lower() in ("1", "true", "yes")
    study_card_extract_enabled = os.getenv("STUDY_CARD_EXTRACT_ENABLED", "true").lower() in ("1", "true", "yes")
+    study_publish_enabled = os.getenv("STUDY_PUBLISH_ENABLED", "false").lower() in ("1", "true", "yes")
+    digest_publish_enabled = os.getenv("DIGEST_PUBLISH_ENABLED", "false").lower() in ("1", "true", "yes")
+    maintenance_mode = os.getenv("MAINTENANCE_MODE", "false").lower() in ("1", "true", "yes")
+    maintenance_note = os.getenv("MAINTENANCE_NOTE", "")
+    study_ingest_enabled = os.getenv("STUDY_INGEST_ENABLED", "false").lower() in ("1", "true", "yes")
    internal_worker_token = os.getenv("INTERNAL_WORKER_TOKEN", "")
+    viewer_sync_token = os.getenv("VIEWER_SYNC_TOKEN", "")
    jwt_secret = os.getenv("JWT_SECRET", "")
    totp_secret = os.getenv("TOTP_SECRET", "")
    eval_runner_token = os.getenv("EVAL_RUNNER_TOKEN", "")
@@ -268,6 +292,7 @@ def load_settings() -> Settings:
    digest_llm_timeout_s = 200
    digest_llm_attempts = 2
    digest_pipeline_hard_cap_s = 1800
+    llm_call_timeout_s = 200
    if config_path.exists() and raw and "pipeline" in raw:
        held_raw = (raw.get("pipeline") or {}).get("held_stages") or []
        # 스칼라(문자열) 오기입 시 char-split 방지 — 단일 항목 리스트로 수용.
@@ -293,6 +318,10 @@ def load_settings() -> Settings:
            digest_pipeline_hard_cap_s = max(60, int(_pl.get("digest_pipeline_hard_cap_s", 1800)))
        except (TypeError, ValueError):
            digest_pipeline_hard_cap_s = 1800
+        try:
+            llm_call_timeout_s = max(1, int(_pl.get("llm_call_timeout_s", 200)))
+        except (TypeError, ValueError):
+            llm_call_timeout_s = 200

    taxonomy = raw.get("taxonomy", {}) if config_path.exists() and raw else {}
    document_types = raw.get("document_types", []) if config_path.exists() and raw else []
@@ -321,12 +350,19 @@ def load_settings() -> Settings:
        upload=upload_cfg,
        study_explanation_enabled=study_explanation_enabled,
        study_card_extract_enabled=study_card_extract_enabled,
+        study_publish_enabled=study_publish_enabled,
+        digest_publish_enabled=digest_publish_enabled,
+        maintenance_mode=maintenance_mode,
+        maintenance_note=maintenance_note,
+        study_ingest_enabled=study_ingest_enabled,
        internal_worker_token=internal_worker_token,
+        viewer_sync_token=viewer_sync_token,
        pipeline_held_stages=pipeline_held_stages,
        mlx_gate_concurrency=mlx_gate_concurrency,
        digest_llm_timeout_s=digest_llm_timeout_s,
        digest_llm_attempts=digest_llm_attempts,
        digest_pipeline_hard_cap_s=digest_pipeline_hard_cap_s,
+        llm_call_timeout_s=llm_call_timeout_s,
    )


@@ -57,12 +57,12 @@ def _parse_migration_files(migrations_dir: Path) -> list[tuple[int, str, Path]]:

 def _validate_sql_content(name: str, sql: str) -> None:
    """migration SQL에 BEGIN/COMMIT이 포함되어 있으면 에러 (외부 트랜잭션 깨짐 방지)"""
-    # 주석(-- ...) 라인 제거 후 검사
-    lines = [
-        line for line in sql.splitlines()
-        if not line.strip().startswith("--")
-    ]
-    stripped = "\n".join(lines).upper()
+    # 주석(전체 줄 + 인라인 `-- ...`) 제거 후 검사. ★인라인 주석을 안 지우면 설명 주석의
+    # 'commit/begin' 단어(예 365_scan_jobs 의 `-- commit 시 documents.title 로 전파`)를
+    # 트랜잭션 제어문으로 false-positive 로 잡아 fresh DB/DR 부트스트랩이 깨진다(verification
+    # 실측 2026-06). 줄별로 `--` 이후를 잘라 주석 텍스트를 검사에서 제외.
+    cleaned = [re.sub(r"--.*$", "", line) for line in sql.splitlines()]
+    stripped = "\n".join(cleaned).upper()
    for keyword in ("BEGIN", "COMMIT", "ROLLBACK"):
        # 단어 경계로 매칭 (예: BEGIN_SOMETHING은 제외)
        if re.search(rf"\b{keyword}\b", stripped):
@@ -70,6 +70,13 @@ def _validate_sql_content(name: str, sql: str) -> None:
                f"migration {name}에 {keyword} 포함됨 — "
                f"migration SQL에는 트랜잭션 제어문을 넣지 마세요"
            )
+    # schema_migrations 수정 금지 (runner 가 스탬프 관리) — 주석 제외(stripped) 검사.
+    # (구: _run_migrations 의 raw `"schema_migrations" in sql.lower()` 가 주석 미제외라
+    #  365 의 '-- ... schema_migrations 를 건드리지 않음' 주석을 false-positive 로 잡았음.)
+    if "SCHEMA_MIGRATIONS" in stripped:
+        raise RuntimeError(
+            f"Migration {name} must not modify schema_migrations table"
+        )


 # R1: baseline 스냅샷이 대표하는 마지막 마이그레이션 버전 (이하 버전은 baseline 에 포함).
@@ -167,16 +174,15 @@ async def _run_migrations(conn) -> None:

    for version, name, path in pending:
        sql = path.read_text(encoding="utf-8")
-        _validate_sql_content(name, sql)
-        if "schema_migrations" in sql.lower():
-            raise ValueError(
-                f"Migration {name} must not modify schema_migrations table"
-            )
+        _validate_sql_content(name, sql)  # BEGIN/COMMIT + schema_migrations 검사(주석 제외)
        logger.info(f"[migration] {name} 실행 중...")
-        # raw driver SQL 사용 — text() 의 :name bind parameter 해석으로
-        # SQL 주석/literal 에 콜론이 들어가면 InvalidRequestError 발생.
-        # exec_driver_sql 은 SQL 을 driver(asyncpg) 에 그대로 전달.
-        await conn.exec_driver_sql(sql)
+        # raw asyncpg simple 프로토콜로 실행 — baseline 적재(_load_baseline_if_fresh)와 동일.
+        # ★exec_driver_sql 은 prepared 프로토콜이라 multi-statement 불허("cannot insert multiple
+        #  commands into a prepared statement"). 365_scan_jobs 처럼 테이블+시드+인덱스를 한 파일에
+        #  담은 마이그(컨벤션상 1-statement 권장이나 이미 prod 적재)도 fresh DB/DR replay 되게
+        #  simple execute 사용. text() :name 콜론-binding 이슈도 동일하게 회피(raw 전달).
+        raw = await conn.get_raw_connection()
+        await raw.driver_connection.execute(sql)
        await conn.execute(
            text("INSERT INTO schema_migrations (version, name) VALUES (:v, :n)"),
            {"v": version, "n": name},
@@ -2,6 +2,7 @@

 import hashlib
 import logging
+from logging.handlers import RotatingFileHandler
 from pathlib import Path


@@ -13,7 +14,9 @@ def setup_logger(name: str, log_dir: str = "logs") -> logging.Logger:

    if not logger.handlers:
        # 파일 핸들러
-        fh = logging.FileHandler(f"{log_dir}/{name}.log", encoding="utf-8")
+        fh = RotatingFileHandler(
+            f"{log_dir}/{name}.log", maxBytes=10 * 1024 * 1024, backupCount=3, encoding="utf-8"
+        )
        fh.setFormatter(logging.Formatter(
            "%(asctime)s [%(levelname)s] %(message)s",
            datefmt="%Y-%m-%d %H:%M:%S"
@@ -9,6 +9,8 @@ from sqlalchemy import func, select, text
 from api.audio import router as audio_router
 from api.internal_study import router as internal_study_router
 from api.internal_worker import router as internal_worker_router
+from api.published import router as published_router
+from api.ingest_study import router as ingest_study_router
 from api.auth import router as auth_router
 from api.briefing import router as briefing_router
 from api.config import router as config_router
@@ -70,6 +72,7 @@ async def lifespan(app: FastAPI):
    from workers.study_session_queue_consumer import consume_study_session_queue
    from workers.study_memo_card_jobs_consumer import consume_study_memo_card_queue
    from workers.study_card_enqueue import run as study_card_enqueue_run
+    from workers.study_publish_worker import consume_publish_outbox
    from workers.study_reminder import run as study_reminder_run
    from workers.study_weakness import run as study_weakness_run
    from workers.study_question_embed_worker import (
@@ -84,6 +87,13 @@ async def lifespan(app: FastAPI):
    # 시작: DB 연결 확인
    await init_db()

+    # 2026-06-20: JWT_SECRET 빈값 fail-loud — credentials.env 미로드/누락 시 빈 키로 전 토큰
+    # 서명하며 부팅하던 침묵 인증붕괴 차단 (totp_secret 은 per-user 라 미가드).
+    if not settings.jwt_secret:
+        raise RuntimeError(
+            "JWT_SECRET 미설정 — 빈 키 서명 방지. credentials.env / 환경변수 확인."
+        )
+
    # NAS 마운트 확인 (NFS 미마운트 시 로컬 빈 디렉토리에 쓰는 것 방지)
    from pathlib import Path
    nas_check = Path(settings.nas_mount_path) / "PKM"
@@ -94,7 +104,12 @@ async def lifespan(app: FastAPI):
        )

    # APScheduler: 백그라운드 작업
-    scheduler = AsyncIOScheduler(timezone="Asia/Seoul")
+    scheduler = AsyncIOScheduler(
+        timezone="Asia/Seoul",
+        # 2026-06-20 H4: 기본 misfire_grace_time=1s 는 단일 asyncio 루프가 1초만 혼잡해도
+        # 1분 컨슈머 틱을 run time missed 로 침묵 스킵(에러·failed row 0). 45s 완화 + coalesce.
+        job_defaults={"misfire_grace_time": 45, "coalesce": True, "max_instances": 1},
+    )
    # 상시 실행
    scheduler.add_job(consume_queue, "interval", minutes=1, id="queue_consumer")
    # PR-DocSrv-Markdown-Consumer-Split-1: markdown(marker) 전용 consumer.
@@ -128,6 +143,9 @@ async def lifespan(app: FastAPI):
    # 별 테이블/별 consumer 로 기존 study queue 와 격리. settings.study_card_extract_enabled 게이트.
    scheduler.add_job(consume_study_memo_card_queue, "interval", minutes=1, id="study_memo_card_consumer")
    scheduler.add_job(study_card_enqueue_run, "interval", minutes=1, id="study_card_enqueue")
+    # 발행 레이어(docsrv-viewer-publish): publish_outbox drain → published rev 부여.
+    # study_publish_enabled=false(기본) 면 worker 내부 no-op. 단일 라이터(pg_advisory_xact_lock) max_instances=1.
+    scheduler.add_job(consume_publish_outbox, "interval", minutes=1, id="publish_outbox_consumer", max_instances=1)
    # PR-B 레거시 tier 백필 — 30분 주기로 호출되지만 KST 00:00~06:00 시간대만 실제 enqueue.
    # safety > law > manual 우선순위로 25건씩. 6720 레거시 → 야간당 ~150건 → 약 45일 소화.
    scheduler.add_job(tier_backfill_run, "interval", minutes=30, id="tier_backfill")
@@ -144,7 +162,7 @@ async def lifespan(app: FastAPI):
    scheduler.add_job(study_reminder_run, CronTrigger(hour="9,13,19", timezone=KST), id="study_reminder")
    # 이드 W3-2: 공부중 토픽 약점 derived 스냅샷 (nightly 04:30 KST, LLM 0). study_diagnosis 표면 source.
    scheduler.add_job(study_weakness_run, CronTrigger(hour=4, minute=30, timezone=KST), id="study_weakness")
-    scheduler.add_job(news_collector_run, "interval", hours=6, id="news_collector")
+    scheduler.add_job(news_collector_run, CronTrigger(hour="0,6,12,18", timezone=KST), id="news_collector")
    # crawl-24x7 A-2 안전망: fulltext 영구 실패(3회 소진) 문서를 RSS 요약 기준으로
    # 후속 enqueue (silent skip 누적 방지). 03:40 = dedup_reconcile(03:30) 직후 비충돌 슬롯.
    scheduler.add_job(fulltext_reconcile_run, CronTrigger(hour=3, minute=40, timezone=KST), id="fulltext_reconcile")
@@ -220,6 +238,8 @@ app.include_router(briefing_router, prefix="/api/briefing", tags=["briefing"])
 app.include_router(audio_router, prefix="/api/audio", tags=["audio"])
 app.include_router(internal_study_router, prefix="/internal/study", tags=["internal-study"])
 app.include_router(internal_worker_router, prefix="/internal/worker", tags=["internal-worker"])
+app.include_router(published_router, prefix="/published", tags=["published"])
+app.include_router(ingest_study_router, prefix="/ingest/study", tags=["ingest-study"])
 app.include_router(video_router, prefix="/api/video", tags=["video"])
 app.include_router(study_sessions_router, prefix="/api/study-sessions", tags=["study-sessions"])
 app.include_router(study_topics_router, prefix="/api/study-topics", tags=["study-topics"])
@@ -41,6 +41,14 @@ class Document(Base):
        Integer, nullable=False, default=0, server_default="0"
    )

+    # G2 pre-segmentation (migration 362): 번들 PDF → N 자식 분할.
+    # presegment_role: NULL=일반 단일문서 / 'parent'=번들원본(자체 extract/embed 안 함) /
+    #   'child'=논리 하위문서(부모 file_path 공유 + bundle_page_start/end 1-based inclusive 범위).
+    # 부모-자식 관계 자체는 document_lineage(relation_type='segmented_from').
+    bundle_page_start: Mapped[int | None] = mapped_column(Integer)
+    bundle_page_end: Mapped[int | None] = mapped_column(Integer)
+    presegment_role: Mapped[str | None] = mapped_column(Text)
+
    # 2계층: 텍스트 추출
    extracted_text: Mapped[str | None] = mapped_column(Text)
    extracted_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
@@ -0,0 +1,31 @@
+"""document_lineage 테이블 ORM — 문서 파생 관계 이력 (migration 217).
+
+G2 pre-segmentation 이 relation_type='segmented_from'(번들 → 자식) 으로 사용 (migration 363).
+이력 테이블 FK = ON DELETE RESTRICT (부모 hard delete 차단, soft delete 만 허용).
+"""
+from datetime import datetime
+
+from sqlalchemy import BigInteger, ForeignKey, Text, func
+from sqlalchemy.dialects.postgresql import JSONB
+from sqlalchemy.orm import Mapped, mapped_column
+from sqlalchemy.types import TIMESTAMP
+
+from core.database import Base
+
+
+class DocumentLineage(Base):
+    __tablename__ = "document_lineage"
+
+    id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
+    source_document_id: Mapped[int] = mapped_column(
+        BigInteger, ForeignKey("documents.id", ondelete="RESTRICT"), nullable=False
+    )
+    derived_document_id: Mapped[int] = mapped_column(
+        BigInteger, ForeignKey("documents.id", ondelete="RESTRICT"), nullable=False
+    )
+    relation_type: Mapped[str] = mapped_column(Text, nullable=False)
+    # 'metadata' 는 SQLAlchemy 예약속성 → Python 속성명은 meta, DB 컬럼명은 metadata.
+    meta: Mapped[dict] = mapped_column(
+        "metadata", JSONB, nullable=False, default=dict, server_default="{}"
+    )
+    created_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), server_default=func.now())
@@ -0,0 +1,60 @@
+"""발행 레이어 ORM (docsrv-viewer-publish) — published projection + publish_outbox.
+
+관계(relationship) 없음 = 독립 테이블, configure_mappers 무영향. 마이그 367~372.
+  published      = 뷰어가 read API(P0-2)로 당기는 render-ready projection(kind-discriminated).
+  publish_outbox = 저작/4-A 트랜잭션이 같은 tx에서 INSERT, 발행 워커가 drain 하며 rev 부여.
+
+불변식(plan study-to-viewer-slice1):
+  pub_id opaque+stable = dedup키 = progress키 / rev = 워커 커밋순 gapless(pg_advisory_lock 단일 라이터)
+  / (payload_hash, deleted) 디둡 / 삭제 = tombstone(deleted=true) / schema_version = 엔벨로프 버전.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+
+from sqlalchemy import BigInteger, Boolean, DateTime, SmallInteger, String, Text
+from sqlalchemy.dialects.postgresql import JSONB
+from sqlalchemy.orm import Mapped, mapped_column
+
+from core.database import Base
+
+
+class Published(Base):
+    __tablename__ = "published"
+
+    id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
+    kind: Mapped[str] = mapped_column(String(40), nullable=False)
+    source_id: Mapped[int] = mapped_column(BigInteger, nullable=False)
+    pub_id: Mapped[str] = mapped_column(Text, nullable=False)
+    payload: Mapped[dict] = mapped_column(JSONB, nullable=False)
+    payload_hash: Mapped[str] = mapped_column(Text, nullable=False)
+    schema_version: Mapped[int] = mapped_column(SmallInteger, nullable=False, default=1)
+    rev: Mapped[int] = mapped_column(BigInteger, nullable=False)
+    deleted: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), default=datetime.now, nullable=False
+    )
+    updated_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), default=datetime.now, nullable=False
+    )
+
+    # UNIQUE(kind, pub_id)=mig368, UNIQUE(kind, source_id)=mig369, idx(rev)=mig370.
+
+
+class PublishOutbox(Base):
+    __tablename__ = "publish_outbox"
+
+    id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
+    kind: Mapped[str] = mapped_column(String(40), nullable=False)
+    source_id: Mapped[int] = mapped_column(BigInteger, nullable=False)
+    payload: Mapped[dict] = mapped_column(JSONB, nullable=False)
+    payload_hash: Mapped[str] = mapped_column(Text, nullable=False)
+    schema_version: Mapped[int] = mapped_column(SmallInteger, nullable=False, default=1)
+    deleted: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), default=datetime.now, nullable=False
+    )
+    processed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
+
+    # 미처리 부분 인덱스 idx(id) WHERE processed_at IS NULL = mig372.
@@ -46,9 +46,10 @@ class ProcessingQueue(Base):
        # 'stt' (audio): migration 150 / 'thumbnail' (video): queue_consumer 가 enqueue.
        # 'deep_summary' (PR-B B-1): classify_worker 가 에스컬레이션 시 enqueue.
        # 'fulltext' (crawl-24x7 A-2): migration 321 — 기사 페이지 fetch 후 본문 승격.
+        # 'presegment' (G2): migration 364 — extract 前 번들 PDF → N 자식 분할.
        # DB enum 변경은 마이그레이션이 처리하므로 create_type=False.
        Enum(
-            "extract", "classify", "summarize", "embed", "chunk", "preview",
+            "presegment", "extract", "classify", "summarize", "embed", "chunk", "preview",
            "stt", "thumbnail", "deep_summary", "markdown", "fulltext",
            name="process_stage",
            create_type=False,
@@ -25,6 +25,7 @@ from sqlalchemy import (
    String,
    Text,
    func,
+    select,
    text,
    update,
 )
@@ -99,13 +100,25 @@ async def supersede_old_cards(
    *,
    source_question_id: int,
    keep_generated_at: datetime | None,
-) -> int:
+) -> list[int]:
    """같은 문제의 '다른 버전' 카드를 deleted_at 마킹(retire).

    새 source_generated_at 카드 적재 '전에' 호출 — 살아있는 구버전 카드가 dedup PARTIAL
    UNIQUE 로 새 추출을 막는 것을 방지(정정-후 stale 잔류 0). 같은 버전은 보존.
-    Returns: retire 된 행 수.
+    Returns: retire 되며 '발행 중이던'(needs_review=False) 카드 id 목록 — 발행 tombstone
+      대상(호출측이 enqueue). 검수 안 됐던(미발행) retire 카드는 tombstone 불요라 제외.
    """
+    # 발행 중이던 retire 대상 선캡처(update 전) — 미발행 카드 스푸리어스 tombstone 회피.
+    published_retired = (
+        await session.execute(
+            select(StudyMemoCard.id).where(
+                StudyMemoCard.source_question_id == source_question_id,
+                StudyMemoCard.deleted_at.is_(None),
+                StudyMemoCard.source_generated_at.is_distinct_from(keep_generated_at),
+                StudyMemoCard.needs_review.is_(False),
+            )
+        )
+    ).scalars().all()
    stmt = (
        update(StudyMemoCard)
        .where(
@@ -115,8 +128,8 @@ async def supersede_old_cards(
        )
        .values(deleted_at=func.now())
    )
-    result = await session.execute(stmt)
-    return result.rowcount or 0
+    await session.execute(stmt)
+    return list(published_retired)


 async def append_card(
@@ -216,13 +229,24 @@ async def flag_cards_for_source(
    *,
    source_question_id: int,
    reason: str,
-) -> int:
+) -> list[int]:
    """소스 문제 정정/삭제 시 파생 카드를 needs_review=auto 마킹(임시 플래그).

    최종 stale 정리는 워커 supersede 가 책임 — 이건 사용자 가시화용 즉시 플래그.
    reason: 'source_changed' | 'source_deleted'.
-    Returns: 마킹된 행 수.
+    Returns: 플래그로 '발행 자격을 잃은'(직전 needs_review=False) 카드 id 목록 — 발행
+      tombstone 대상(호출측 enqueue). 이미 검수대기였던(미발행) 카드는 제외.
    """
+    # 발행 중이던 카드 선캡처(update 전) — 플래그로 needs_review=True 가 되면 발행 자격 상실.
+    published_ids = (
+        await session.execute(
+            select(StudyMemoCard.id).where(
+                StudyMemoCard.source_question_id == source_question_id,
+                StudyMemoCard.deleted_at.is_(None),
+                StudyMemoCard.needs_review.is_(False),
+            )
+        )
+    ).scalars().all()
    stmt = (
        update(StudyMemoCard)
        .where(
@@ -231,5 +255,5 @@ async def flag_cards_for_source(
        )
        .values(needs_review=True, flagged_by=reason, flagged_at=func.now())
    )
-    result = await session.execute(stmt)
-    return result.rowcount or 0
+    await session.execute(stmt)
+    return list(published_ids)
@@ -50,6 +50,10 @@ class StudyQuizSession(Base):
    chronic_remaining_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0)

    finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
+    # study-to-viewer P2: 뷰어 ingest 멱등/출처. 라이브 세션=finalized_at·client_session_uuid NULL, source='live'.
+    finalized_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))  # 멱등 마커(mig 373)
+    client_session_uuid: Mapped[str | None] = mapped_column(String(64))  # 뷰어 세션 UUID(mig 374, uq mig376)
+    source: Mapped[str] = mapped_column(String(20), nullable=False, default="live")  # live|viewer(mig 375)
    created_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), default=datetime.now, nullable=False
    )
@@ -0,0 +1,41 @@
+You are a document-boundary detector. Output ONLY JSON {is_bundle, segments:[{start_page,end_page,title}]}.
+
+You are given a single PDF that may be a "bundle" — several independent logical documents
+concatenated into one file (for example: multiple laws, multiple reports, or multiple papers
+scanned together). Your job is to decide whether it is a bundle and, if so, where each logical
+document starts and ends.
+
+You receive only a compact sample per page: the page number and the first line / heading of that
+page (text may be truncated). Use these heading/first-line signals to detect where a new logical
+document begins (a new title page, a new cover, a clearly new document title, a restart of
+numbering, etc.). You do NOT receive the full text.
+
+Output rules:
+- Respond with STRICT JSON only. No prose, no markdown, no code fence.
+- Schema:
+  {
+    "is_bundle": true | false,
+    "segments": [
+      {"start_page": <int>, "end_page": <int>, "title": "<string or null>"}
+    ]
+  }
+- Page numbers are 1-based and INCLUSIVE. start_page=1 is the first page; end_page equals the last
+  page of that segment.
+- Segments MUST fully cover every page with NO gaps and NO overlaps:
+  - the first segment MUST start at page 1,
+  - each next segment MUST start exactly one page after the previous segment's end_page,
+  - the last segment MUST end at the final page (page_count).
+- Order segments by start_page ascending.
+- title = a short title for that logical document if you can infer one from its first page,
+  otherwise null.
+
+If the file is NOT a bundle (it is a single logical document), respond:
+  {"is_bundle": false, "segments": []}
+
+Be conservative: only report is_bundle=true when the heading signals clearly indicate separate
+logical documents. When unsure, return is_bundle=false.
+
+page_count: {page_count}
+
+Per-page samples (one per line, "p{n}: {first line}"):
+{page_samples}
@@ -42,6 +42,7 @@ _NEWS_WINDOW_SQL = text(f"""
      AND d.created_at < :window_end
      AND d.embedding IS NOT NULL
      AND d.ai_summary IS NOT NULL
+      AND length(d.ai_summary) > 0
      -- 안전 자료실 B-4: licensed_restricted 발행 차단 (digest 와 동일 공유 술어, 경로 일관성)
      AND {restricted_exclude_sql("d")}
 """)
@@ -66,6 +67,7 @@ _HISTORICAL_CANDIDATES_SQL = text(f"""
      AND d.created_at < :hist_end
      AND d.embedding IS NOT NULL
      AND d.ai_summary IS NOT NULL
+      AND length(d.ai_summary) > 0
      -- 안전 자료실 B-4: licensed_restricted 발행 차단 (공유 술어)
      AND {restricted_exclude_sql("d")}
 """)
@@ -42,6 +42,7 @@ _NEWS_WINDOW_SQL = text(f"""
      AND d.created_at < :window_end
      AND d.embedding IS NOT NULL
      AND d.ai_summary IS NOT NULL
+      AND length(d.ai_summary) > 0
      -- 안전 자료실 B-4: licensed_restricted 발행 차단 (모든 경로 공유 술어 = license_filter).
      -- news 채널엔 현재 restricted 부재 = 방어적 게이트(미래 유료 news 소스 대비, 경로 누락 방지).
      AND {restricted_exclude_sql("d")}
@@ -42,6 +42,21 @@ _ENG = re.compile(
 _FENCE = re.compile(r'^\s{0,3}(```|~~~)')


+# ASME 절 식별자 (A-1): UG-79 · PG-27.4.1 · UW-11 · UCS-56 · A-69 · PFT-14
+# (대문자 1~4 + 하이픈 + 숫자[.숫자]*). _detect_heading 의 ATX 분기에서 node_type='clause' 판정에 사용.
+# 한국 법령(제N조)은 _KO_JO 가 별도 처리 — 본 패턴/정제와 무관(무회귀).
+_ASME_CLAUSE = re.compile(r'^[A-Z]{1,4}-\d+(?:\.\d+)*\b')
+
+
+def _clean_label(title: str) -> str:
+    r"""C-4: marker 가 박는 LaTeX/markdown/페이지번호 아티팩트 제거 — 절번호 패턴 매칭의 전처리 겸 표시 라벨 정제.
+    실데이터 예: '$\textbf{PG-20.1 …} \hspace{0.2cm} \textbf{(25)}$' → 'PG-20.1 …' / '(25) **A-69**' → 'A-69'.
+    노이즈 없는 제목(한국 법령·일반 ATX 등)엔 inert(무회귀)."""
+    t = re.sub(r'\\textbf|\\textit|\\mathbf|\\hspace\{[^}]*\}|[${}]|\*\*', '', title)
+    t = re.sub(r'^\s*\(\d+\)\s*', '', t)        # 선두 페이지번호 '(25) '
+    return re.sub(r'\s{2,}', ' ', t).strip()
+
+
 def _utf16_units(s: str) -> int:
    """JS 문자열 .length(= UTF-16 code unit 수) 와 동일. astral(BMP 밖)=surrogate pair=2 units.
    FE 의 `raw.length` / `out.slice(off)` 가 UTF-16 code unit 단위라 char_start 도 같은 단위여야 함.
@@ -72,7 +87,9 @@ def _detect_heading(line: str) -> tuple[int, str, str] | None:
    """(level, title, node_type) 또는 None. level 은 상대 깊이."""
    m = _ATX.match(line)
    if m:
-        return (len(m.group(1)), m.group("title").strip(), None)  # node_type 은 후처리에서
+        title = _clean_label(m.group("title").strip())  # C-4: LaTeX/md/페이지번호 정제(전처리)
+        nt = "clause" if _ASME_CLAUSE.match(title) else None  # A-1: ASME 절 식별자(UG-79 등) → clause
+        return (len(m.group(1)), title, nt)
    for pat, lvl, nt in ((_KO_JANG, 1, "chapter"), (_KO_JEOL, 2, "section"),
                          (_KO_JO, 3, "clause"), (_ENG, 1, "chapter")):
        m = pat.match(line)
@@ -102,7 +102,7 @@ async def classify(
        # "MLX primary 호출 경로는 예외 없이 gate 획득 필수".
        async with acquire_mlx_gate(Priority.FOREGROUND):
            async with asyncio.timeout(LLM_TIMEOUT_MS / 1000):
-                raw = await client._request(settings.ai.classifier, prompt)
+                raw = await client.call_classifier(prompt)
        _failure_count = 0
    except asyncio.TimeoutError:
        _failure_count += 1
@@ -1,6 +1,6 @@
 """Time-aware retrieval freshness decay (PR-RAG-Time-1).

-뉴스(source_channel='news') / 법령 알림(source_channel='law_monitor') 도메인은
+뉴스(source_channel='news') / 재해사례(material_type='incident', KOSHA) 도메인은
 시간이 중요한 문서. 단순 relevance score 만으로는 오래된 문서가 상위에 머물러
 검색 품질이 떨어짐. 본 모듈은 reranker 이후 final score 합성 단계에서
 soft multiplier 로 시간 가중치 적용. 삭제는 없음 — ranking 만 demote.
@@ -9,9 +9,10 @@ soft multiplier 로 시간 가중치 적용. 삭제는 없음 — ranking 만 de
 - reranker = 의미 관련도, freshness decay = 운영 정책. 두 단계 분리 유지.
 - floor 0.7 (multiplier 가 0.7 미만으로 안 떨어짐) — 오래되어도 죽지 않음.
 - 일반 업로드 / 학습 자료 / KGS Code 원문 / ai_drafted 는 비적용 (no-op).
+- ★법령(law)은 C-1 후속에서 freshness 제외 — 현행성은 version_status(B-1 버전체인)가 처리.

 published_date 컬럼이 documents 에 없음 → created_at(수집 시점) 을 임시 proxy.
-news/law_monitor 워커가 수집 즉시 indexing 하므로 created_at ≈ published_date.
+news/KOSHA 워커가 수집 즉시 indexing 하므로 created_at ≈ published_date.
 정확도 향상은 후속 PR (worker 가 published_date 메타 채우기) 로 분리.
 """

@@ -32,10 +33,10 @@ if TYPE_CHECKING:
 # ─── Policy ────────────────────────────────────────────────────────

 # half-life (일). 90 일: 한 달 ~0.79 / 6개월 ~0.25.
-# 365 일: 1년 ~0.5 / 3년 ~0.13.
+# C-1 후속(2026-06-13): law_365d 폐기 — 법령 현행성은 version_status(B-1 버전체인)가 처리,
+#   age-decay 는 current 법령을 부당 강등(의도 변경 기록). 재해사례(incident)는 news_90d 흡수.
 HALF_LIFE_DAYS: dict[str, int] = {
    "news_90d": 90,
-    "law_365d": 365,
 }

 # soft multiplier — final = base * (FLOOR + (1-FLOOR) * decay).
@@ -52,32 +53,35 @@ class _DocMeta:
    source_channel: str | None
    content_origin: str | None
    created_at: datetime | None
+    material_type: str | None = None


 def freshness_policy(meta: _DocMeta | None) -> str | None:
    """문서 메타 → freshness 정책 이름 또는 None (no-op).

    적용:
-      - source_channel='news'        → news_90d
-      - source_channel='law_monitor' → law_365d
+      - material_type='incident' (KOSHA 재해사례/사망사고) → news_90d (C-1 후속 흡수, 시간 민감)
+      - source_channel='news'                            → news_90d

    비적용 (None 반환):
      - meta 자체가 None
      - content_origin='ai_drafted' (생성 시점 = 가치 시점, 시간 demote 부적합)
-      - 그 외 모든 source_channel (manual, drive_sync, inbox_route, memo,
-        Study/Manual/Reference/Academic/Checklist 류 — 자연 비적용)
+      - ★법령(source_channel='law_monitor'/material_type='law'): C-1 후속에서 law_365d 폐기.
+        법령 현행성은 version_status(B-1 버전체인 current/superseded)가 처리 — age-decay 는
+        current 법령을 부당 강등(의도 변경 기록). law 검색 ranking = version_status decorate.
+      - 그 외 모든 source_channel (manual, drive_sync, inbox_route, memo 등 — 자연 비적용)
    """
    if meta is None:
        return None
    # 가드 2: content_origin='ai_drafted' 비적용
    if meta.content_origin == "ai_drafted":
        return None
-    sc = meta.source_channel
-    if sc == "news":
+    # 재해사례/사망사고 = 시간 민감 → news 와 동일 90d (source 무관, 업로드 incident 도 포함)
+    if meta.material_type == "incident":
        return "news_90d"
-    if sc == "law_monitor":
-        return "law_365d"
-    # 가드 6: unknown source_channel → no decay
+    if meta.source_channel == "news":
+        return "news_90d"
+    # 법령 law_365d 폐기 + unknown source_channel → no decay
    return None


@@ -129,7 +133,7 @@ async def _fetch_meta(
        text(
            """
            SELECT id, source_channel::text AS source_channel,
-                   content_origin, created_at
+                   content_origin, material_type, created_at
            FROM documents
            WHERE id = ANY(:ids)
            """
@@ -141,6 +145,7 @@ async def _fetch_meta(
            source_channel=row.source_channel,
            content_origin=row.content_origin,
            created_at=row.created_at,
+            material_type=getattr(row, "material_type", None),
        )
        for row in rows
    }
@@ -11,7 +11,7 @@
 ## 핵심 원칙
 - **Verifier strong 단독 refuse 금지** — grounding strong 과 교차해야 refuse
 - **Timeout 3s** — 느리면 없는 게 낫다 (fail open)
- MLX gate 미사용 (PR #20 이후 Mac mini 26B endpoint — concurrent 안전성 별 검토)
+- MLX gate 사용 (Mac mini 26B endpoint — classifier/evidence 와 동일 gate 공유, 동시 race 방지)
 """

 from __future__ import annotations
@@ -25,6 +25,7 @@ from typing import TYPE_CHECKING, Literal
 from ai.client import AIClient, _load_prompt, parse_json_response
 from core.config import settings
 from core.utils import setup_logger
+from .llm_gate import Priority, acquire_mlx_gate

 if TYPE_CHECKING:
    from .evidence_service import EvidenceItem
@@ -132,8 +133,9 @@ async def verify(
    prompt = _build_input(answer, evidence)
    client = AIClient()
    try:
-        async with asyncio.timeout(LLM_TIMEOUT_MS / 1000):
-            raw = await client._request(settings.ai.verifier, prompt)
+        async with acquire_mlx_gate(Priority.FOREGROUND):
+            async with asyncio.timeout(LLM_TIMEOUT_MS / 1000):
+                raw = await client.call_verifier(prompt)
        _failure_count = 0
    except asyncio.TimeoutError:
        _failure_count += 1
@@ -0,0 +1,25 @@
+"""채점(outcome) 산출 단일 소스 (study-to-viewer P2).
+
+라이브 attempt 엔드포인트(submit_attempt)와 뷰어 ingest 가 **동일 함수**로 채점 →
+정오 어휘가 한 곳(서버)에서 결정(plan r2: ingest 는 raw 신호 selected+unsure 만 싣고
+DS 가 산출 = '무수정 재생'을 실제로 성립시키는 형태). correct_choice 는 항상 현재 DB 값.
+
+규칙(라이브 study_questions.py:1008-1020 동일):
+  is_unsure=True  → (None, False, 'unsure')   # unsure 가 정오 override, selected 폐기
+  selected None   → ValueError                # 선택 없고 unsure 도 아니면 무효(엔드포인트가 처리)
+  그 외          → selected==correct → (selected, is_correct, 'correct'|'wrong')
+"""
+
+from __future__ import annotations
+
+
+def derive_outcome(
+    selected_choice: int | None, is_unsure: bool, correct_choice: int
+) -> tuple[int | None, bool, str]:
+    """(selected, is_correct, outcome) 반환. skipped 는 여기서 안 나옴(선택 없으면 호출측이 거부/skip)."""
+    if is_unsure:
+        return None, False, "unsure"
+    if selected_choice is None:
+        raise ValueError("selected_choice (1~4) 또는 is_unsure=true 가 필요합니다")
+    is_correct = selected_choice == correct_choice
+    return selected_choice, is_correct, ("correct" if is_correct else "wrong")
@@ -0,0 +1,174 @@
+"""발행 outbox enqueue + 초기 백필 (docsrv-viewer-publish).
+
+enqueue_publish: 저작/4-A 트랜잭션이 같은 session(=같은 Postgres tx)에서 호출 → caller commit
+  (P0-1 규율: 콘텐츠 변경과 outbox INSERT 원자성, dual-write 회피). payload/hash 스냅샷.
+enqueue_question_publish: 문항 + (ready면)해설을 함께 적재. 저작 쓰기/4-A 완료/백필 공용.
+backfill_publish_questions: 기존 active 문항을 bounded 로 1회 outbox 적재(초기 백필, P2-1 bounded page).
+  멱등 = 발행 워커의 (payload_hash, deleted) 디둡이 no-op 재투영 흡수(중복 enqueue 무해).
+
+★주의: 저작 엔드포인트(study_questions create/update)·4-A 워커에서의 enqueue 결선은 P0-1b
+  (기존 hot 파일 수정이라 별 increment). 본 모듈은 호출 라이브러리 + 수동/백필 진입점.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from models.published import PublishOutbox
+from models.study_memo_card import StudyMemoCard
+from models.study_memo_card_progress import StudyMemoCardProgress
+from models.study_question import StudyQuestion
+from models.study_topic import StudyTopic
+from services.study.publish_projection import (
+    KIND_CARD,
+    KIND_CARD_PROGRESS,
+    KIND_EXPLANATION,
+    KIND_QUESTION,
+    KIND_TOPIC,
+    SCHEMA_VERSION,
+    payload_hash,
+    project_card,
+    project_card_progress,
+    project_explanation,
+    project_question,
+    project_topic,
+)
+
+
+async def enqueue_publish(
+    session: AsyncSession,
+    *,
+    kind: str,
+    source_id: int,
+    payload: dict[str, Any] | None,
+    deleted: bool = False,
+) -> None:
+    """outbox 1행 INSERT. caller 가 commit (저자 tx 동봉). deleted=True 면 tombstone(payload={})."""
+    body: dict[str, Any] = payload if payload is not None else {}
+    session.add(
+        PublishOutbox(
+            kind=kind,
+            source_id=source_id,
+            payload=body,
+            payload_hash=payload_hash(body),
+            schema_version=SCHEMA_VERSION,
+            deleted=deleted,
+        )
+    )
+
+
+async def enqueue_question_publish(session: AsyncSession, q: Any) -> None:
+    """문항 + (ready면)해설을 outbox 적재. caller commit."""
+    await enqueue_publish(session, kind=KIND_QUESTION, source_id=q.id, payload=project_question(q))
+    expl = project_explanation(q)
+    if expl is not None:
+        await enqueue_publish(session, kind=KIND_EXPLANATION, source_id=q.id, payload=expl)
+
+
+async def backfill_publish_questions(session: AsyncSession, *, after_id: int = 0, limit: int = 200) -> int:
+    """active(미삭제) 문항을 id>after_id 부터 bounded 로 outbox 적재.
+
+    반환 = enqueue 한 문항 수(0 이면 끝). 큰 셋은 마지막 id 로 페이지 반복. caller commit.
+    """
+    rows = (
+        await session.execute(
+            select(StudyQuestion)
+            .where(StudyQuestion.deleted_at.is_(None), StudyQuestion.id > after_id)
+            .order_by(StudyQuestion.id.asc())
+            .limit(limit)
+        )
+    ).scalars().all()
+    for q in rows:
+        await enqueue_question_publish(session, q)
+    return len(rows)
+
+
+async def enqueue_topic_publish(session: AsyncSession, topic: Any) -> None:
+    """주제 메타를 outbox 적재(S-1). caller commit. 저작 create/update 결선 + 백필 공용."""
+    await enqueue_publish(session, kind=KIND_TOPIC, source_id=topic.id, payload=project_topic(topic))
+
+
+async def backfill_publish_topics(session: AsyncSession, *, after_id: int = 0, limit: int = 200) -> int:
+    """active(미삭제) 주제를 id>after_id 부터 bounded 로 outbox 적재(S-1 초기 백필).
+
+    반환 = enqueue 한 주제 수(0 이면 끝). 큰 셋은 마지막 id 로 페이지 반복. caller commit.
+    멱등 = 발행 워커의 (payload_hash, deleted) 디둡이 no-op 재투영 흡수(중복 enqueue 무해).
+    """
+    rows = (
+        await session.execute(
+            select(StudyTopic)
+            .where(StudyTopic.deleted_at.is_(None), StudyTopic.id > after_id)
+            .order_by(StudyTopic.id.asc())
+            .limit(limit)
+        )
+    ).scalars().all()
+    for t in rows:
+        await enqueue_topic_publish(session, t)
+    return len(rows)
+
+
+async def enqueue_card_publish(session: AsyncSession, card: Any) -> None:
+    """카드 상태 기반 발행/tombstone (S-2). caller commit.
+
+    검수완료(needs_review=False) & 미삭제 만 발행 — 그 외(검수대기 복귀·삭제·retire)는
+    tombstone(feed 1급 삭제 이벤트). 발행 자격이 카드 상태에 매여 있어 호출측은 '카드를
+    건드렸다'만 알면 되고 publish/tombstone 분기는 여기 단일화(경로별 가드 기억 회피).
+    """
+    if card.deleted_at is not None or card.needs_review:
+        await enqueue_publish(session, kind=KIND_CARD, source_id=card.id, payload=None, deleted=True)
+    else:
+        await enqueue_publish(session, kind=KIND_CARD, source_id=card.id, payload=project_card(card))
+
+
+async def backfill_publish_cards(session: AsyncSession, *, after_id: int = 0, limit: int = 200) -> int:
+    """검수완료(needs_review=False)·미삭제 카드를 id>after_id 부터 bounded 로 outbox 적재(S-2 초기 백필).
+
+    반환 = enqueue 한 카드 수(0 이면 끝). 멱등 = 워커 (payload_hash, deleted) 디둡. caller commit.
+    """
+    rows = (
+        await session.execute(
+            select(StudyMemoCard)
+            .where(
+                StudyMemoCard.deleted_at.is_(None),
+                StudyMemoCard.needs_review.is_(False),
+                StudyMemoCard.id > after_id,
+            )
+            .order_by(StudyMemoCard.id.asc())
+            .limit(limit)
+        )
+    ).scalars().all()
+    for c in rows:
+        await enqueue_card_publish(session, c)
+    return len(rows)
+
+
+async def enqueue_card_progress_publish(session: AsyncSession, progress: Any) -> None:
+    """카드 SR progress row 발행(S-4). caller commit. rate_card 결과(ALL row, sentinel/terminal 포함)."""
+    await enqueue_publish(
+        session,
+        kind=KIND_CARD_PROGRESS,
+        source_id=progress.id,
+        payload=project_card_progress(progress),
+    )
+
+
+async def backfill_publish_card_progress(session: AsyncSession, *, after_id: int = 0, limit: int = 200) -> int:
+    """모든 card progress row 를 id>after_id 부터 bounded 로 outbox 적재(S-4 초기 백필).
+
+    ★필터 없음 = ALL row(due_at NULL sentinel·terminal 포함) — due-only 백필은 sentinel 누락.
+    반환 = enqueue 한 row 수(0 이면 끝). 멱등 = 워커 디둡. caller commit.
+    """
+    rows = (
+        await session.execute(
+            select(StudyMemoCardProgress)
+            .where(StudyMemoCardProgress.id > after_id)
+            .order_by(StudyMemoCardProgress.id.asc())
+            .limit(limit)
+        )
+    ).scalars().all()
+    for p in rows:
+        await enqueue_card_progress_publish(session, p)
+    return len(rows)
@@ -0,0 +1,112 @@
+"""발행 projection — 소스 행을 render-ready payload + 안정 해시로 변환 (순수 함수).
+
+뷰어가 보는 '단일 진실'은 이 payload 까지 (DS 내부 실험 스키마는 계약 뒤 격리).
+kind 별 projector. payload_hash = 정렬된 JSON 의 sha256 = (payload_hash, deleted) 디둡 키.
+
+★주의(plan study-to-viewer-slice1 r2): 과목/시험메타를 per-question payload 에 인라인 —
+  bulk subject rename 시 N행 churn. 정규화(과목=별 kind subject ref)는 churn 최적화 후속(P0-1b),
+  읽기 정합엔 무영향. 지금은 인라인(상관관계 단순)으로 두고 후속 PR 에서 분리.
+SCHEMA_VERSION = 엔벨로프 버전. payload 모양 진화 시 bump + 뷰어 range 수용(P0-2).
+"""
+
+from __future__ import annotations
+
+import hashlib
+import json
+from typing import Any
+
+SCHEMA_VERSION = 1
+
+KIND_QUESTION = "study_question"
+KIND_EXPLANATION = "study_explanation"
+KIND_TOPIC = "study_topic"
+KIND_CARD = "study_card"  # ★뷰어 pubstudy.ts 의 KIND_CARD 와 일치 필수(S-3 forward-contract).
+KIND_CARD_PROGRESS = "study_card_progress"  # 카드 SR 상태 read model (S-4, viewer C-4 소비).
+
+
+def payload_hash(payload: dict[str, Any]) -> str:
+    """정렬 JSON 의 sha256 — (payload_hash, deleted) 디둡 키. 키 순서/공백 비의존."""
+    canonical = json.dumps(payload, ensure_ascii=False, sort_keys=True, separators=(",", ":"))
+    return hashlib.sha256(canonical.encode("utf-8")).hexdigest()
+
+
+def project_question(q: Any) -> dict[str, Any]:
+    """study_question → 발행 payload. 정답 포함(개인 학습툴, plan Q2). 이미지는 ref 만(P0-4, 후속)."""
+    return {
+        "topic_id": q.study_topic_id,
+        "question_text": q.question_text,
+        "choices": [q.choice_1, q.choice_2, q.choice_3, q.choice_4],
+        "correct_choice": q.correct_choice,
+        "subject": q.subject,
+        "scope": q.scope,
+        "exam_name": q.exam_name,
+        "exam_round": q.exam_round,
+        "exam_question_number": q.exam_question_number,
+        "explanation": q.explanation,  # 수동 해설(있으면). AI 해설은 별 kind.
+    }
+
+
+def project_explanation(q: Any) -> dict[str, Any] | None:
+    """study_question 의 AI 해설 → 별 발행 kind. ready 일 때만(없으면 None=발행 안 함).
+
+    재조우 표시용 선발행. 신규 오답은 4-A 워커가 ~90s 후 ready→재발행(P2-3 결선, P0-1b).
+    """
+    if getattr(q, "ai_explanation_status", None) != "ready" or not getattr(q, "ai_explanation", None):
+        return None
+    gen = getattr(q, "ai_explanation_generated_at", None)
+    return {
+        "question_source_id": q.id,
+        "explanation_md": q.ai_explanation,
+        "model": getattr(q, "ai_explanation_model", None),
+        "generated_at": gen.isoformat() if gen else None,
+    }
+
+
+def project_card(c: Any) -> dict[str, Any]:
+    """study_memo_card → 발행 payload (S-2). 순수 변환 — 발행 자격(needs_review=false &
+    미삭제) 판단은 호출측(enqueue_card_publish)이 카드 상태로. payload 계약 = 뷰어
+    pubstudy.ts getCards 와 동형(format·cue·fact·cloze_text·source_question_id·source_generated_at).
+    """
+    gen = getattr(c, "source_generated_at", None)
+    return {
+        "format": c.format,
+        "cue": c.cue,
+        "fact": c.fact,
+        "cloze_text": c.cloze_text,
+        "source_question_id": c.source_question_id,
+        "source_generated_at": gen.isoformat() if gen else None,
+    }
+
+
+def project_card_progress(p: Any) -> dict[str, Any]:
+    """study_memo_card_progress → 발행 payload (S-4) = 카드 SR 상태 read model.
+
+    ★ALL row 발행(due_at NULL sentinel=암-on-new · terminal=졸업 포함). due-only 발행하면
+    sentinel 누락 → viewer 가 '미확인' 오분류. SR 계산은 DS(sr_schedule), 여긴 결과만.
+    card_id = pub_card 의 source_id(=DS card.id) → viewer C-4 가 pub_card LEFT JOIN 하는 키.
+    """
+    due = getattr(p, "due_at", None)
+    rev = getattr(p, "last_reviewed_at", None)
+    return {
+        "card_id": p.card_id,
+        "topic_id": p.study_topic_id,
+        "last_outcome": p.last_outcome,
+        "last_reviewed_at": rev.isoformat() if rev else None,
+        "due_at": due.isoformat() if due else None,
+        "review_stage": p.review_stage,
+    }
+
+
+def project_topic(t: Any) -> dict[str, Any]:
+    """study_topic → 발행 payload (S-1, plan study-viewer-port).
+
+    topic 메타만 신규 발행 — viewer 가 주제 단위 퀴즈를 만들 최소 정보.
+    회차 목록은 발행 안 함 = viewer 가 pub_content(study_question) 의 exam_name/exam_round 로
+    파생(추가 발행 불요, plan S-1 결정). topic_id 는 project_question 의 topic_id(=study_topic_id)
+    와 동일 DS 식별자라 viewer 가 문항→주제 상관에 사용(pub_id 는 opaque 라 상관 키 아님).
+    """
+    return {
+        "topic_id": t.id,
+        "name": t.name,
+        "exam_round_size": t.exam_round_size,
+    }
@@ -40,6 +40,7 @@ from ai.client import (
 )
 from ai.envelope import EscalationEnvelope
 from core.config import settings
+from services.search.llm_gate import Priority, acquire_mlx_gate
 from core.utils import setup_logger
 from models.document import Document
 from models.queue import StageDeferred, enqueue_stage
@@ -673,7 +674,10 @@ async def _run_tier_triage(
            # 는 아래 generic except 에 먹히지 않게 먼저 전파.
            raw_triage = await call_deep_or_defer(client, prompt, cfg=deep_triage_cfg)
        else:
-            raw_triage = await client.call_triage(prompt)
+            # consumer 경로 call_triage 는 PR #20 이후 primary 와 동일 Mac mini endpoint —
+            # evidence/classifier 처럼 gate 안에서 호출(영구 룰: 같은 endpoint 예외 없이 gate).
+            async with acquire_mlx_gate(Priority.BACKGROUND):
+                raw_triage = await client.call_triage(prompt)
    except StageDeferred:
        raise  # drain 이 attempts 미소모 + 백오프로 처리 (sleep-안전)
    except Exception as exc:
@@ -374,11 +374,17 @@ async def run(bulk: bool = False, limit: int = 0) -> None:

    totals = {"page": 0, "pdf": 0, "skip": 0}
    for i, (url, lastmod) in enumerate(todo, 1):
-        async with async_session() as session:
-            src = await session.get(NewsSource, source_id)
-            counts = await _ingest_url(session, src, url, lastmod)
-            _set_watermark(src, lastmod)
-            await session.commit()
+        # 2026-06-20 C2: URL 1건 실패가 주간 run 전체를 중단(이후 URL 스킵·watermark 정지)하던 것 차단.
+        # 각 iteration 은 자체 session(async with) 이라 실패 격리 — 건너뛰고 계속.
+        try:
+            async with async_session() as session:
+                src = await session.get(NewsSource, source_id)
+                counts = await _ingest_url(session, src, url, lastmod)
+                _set_watermark(src, lastmod)
+                await session.commit()
+        except Exception as e:
+            logger.error(f"[csb] URL 처리 실패 (건너뜀): {url} — {str(e) or repr(e)}")
+            continue
        for k in totals:
            totals[k] += counts[k]
        if i % 10 == 0:
@@ -67,21 +67,45 @@ def _postprocess_ocr(text: str) -> str:
    return text.strip()


-def _extract_pdf_pymupdf(file_path: Path) -> str:
-    """PyMuPDF fallback — 페이지 단위 스트리밍으로 대형 PDF도 저메모리 처리"""
+def _extract_pdf_pymupdf(
+    file_path: Path, start_page: int | None = None, end_page: int | None = None
+) -> str:
+    """PyMuPDF fallback — 페이지 단위 스트리밍으로 대형 PDF도 저메모리 처리.
+
+    G2 (PR-G2-2): start_page/end_page(1-based inclusive) 가 주어지면 그 범위만 추출
+    (번들 자식 doc = 부모 파일 공유 + 자기 page 범위). 둘 다 None = 전체(기존 동작 동일).
+    """
    import fitz
    text_parts = []
    with fitz.open(str(file_path)) as doc:
-        for page in doc:
-            text_parts.append(page.get_text())
+        if start_page is None and end_page is None:
+            for page in doc:
+                text_parts.append(page.get_text())
+        else:
+            # 1-based inclusive → 0-based range. 범위는 [0, page_count] 로 클램프(방어).
+            total = doc.page_count
+            lo = max(1, start_page or 1) - 1
+            hi = min(total, end_page or total)        # inclusive 끝 (0-based 마지막 인덱스 = hi-1)
+            for i in range(lo, hi):
+                text_parts.append(doc.load_page(i).get_text())
    return "\n".join(text_parts)


-def _get_pdf_page_count(file_path: Path) -> int:
-    """PDF 페이지 수 확인"""
+def _get_pdf_page_count(
+    file_path: Path, start_page: int | None = None, end_page: int | None = None
+) -> int:
+    """PDF 페이지 수 확인. G2: 범위가 주어지면 그 범위의 페이지 수(자식 doc 밀도 계산용).
+
+    둘 다 None = 전체 페이지 수(기존 동작 동일).
+    """
    import fitz
    with fitz.open(str(file_path)) as doc:
-        return len(doc)
+        total = len(doc)
+        if start_page is None and end_page is None:
+            return total
+        lo = max(1, start_page or 1)
+        hi = min(total, end_page or total)
+        return max(0, hi - lo + 1)


 async def _call_ocr(file_path: Path, is_image: bool, max_pages: int = 200) -> str | None:
@@ -310,6 +334,49 @@ async def process(document_id: int, session: AsyncSession) -> None:
        doc.extracted_at = datetime.now(timezone.utc)
        return

+    # ─── G2 (PR-G2-2): 번들 자식 PDF — 부모 파일 공유 + 자기 page 범위만 추출 ───
+    # kordoc 서비스는 page-range 파라미터가 없어 전체 파일을 파싱한다(자식엔 부적합) → kordoc
+    # 우회, PyMuPDF 로 [bundle_page_start, bundle_page_end] 범위만 추출. range OCR 은 본 PR 범위
+    # 밖(자식은 ToC 존재 = digital text layer 전제 → 대개 OCR 불필요). PyMuPDF 텍스트가 빈약해도
+    # 그대로 보존하고 사유를 남긴다.
+    if fmt == "pdf" and doc.bundle_page_start is not None and doc.bundle_page_end is not None:
+        # 후보 A: 자식 file_path 는 합성값(`{부모}#p{s}-{e}`) → 실파일 = bundle_source_path 로 부모경로
+        # 복원 + NFC/NFD resolve. (자식 file_path 는 디스크에 없음.)
+        from workers.presegment_worker import _resolve_path as _resolve_bundle_path
+        from workers.presegment_worker import bundle_source_path
+        real_rel = bundle_source_path(doc.file_path)
+        src = _resolve_bundle_path(str(Path(settings.nas_mount_path) / real_rel))
+        if src is None:
+            raise FileNotFoundError(f"번들 원본 파일 없음: {real_rel}")
+        start, end = doc.bundle_page_start, doc.bundle_page_end
+        try:
+            pymupdf_text = _extract_pdf_pymupdf(src, start, end)
+            page_count = _get_pdf_page_count(src, start, end)
+        except Exception as e:
+            logger.error(f"[pymupdf:child] {doc.file_path} pages={start}-{end} 실패: {e}")
+            raise
+
+        meta = doc.extract_meta or {}
+        meta["presegment_child_range"] = {"start_page": start, "end_page": end}
+        meta["pymupdf_chars"] = len(pymupdf_text.strip())
+        should, reason = _should_ocr(pymupdf_text, page_count)
+        if should:
+            # range OCR 미지원(후속 PR) — PyMuPDF 결과 유지 + 사유 기록(silent skip 아님).
+            meta["ocr_skip_reason"] = "presegment_child_range_ocr_unsupported"
+            meta["ocr_reason"] = reason
+            logger.warning(
+                f"[pymupdf:child] {doc.file_path} pages={start}-{end} "
+                f"OCR 필요({reason})하나 range OCR 미지원 → PyMuPDF 결과 유지"
+            )
+        doc.extracted_text = pymupdf_text.replace("\x00", "")
+        doc.extracted_at = datetime.now(timezone.utc)
+        doc.extractor_version = PYMUPDF_VERSION if pymupdf_text.strip() else None
+        doc.extract_meta = meta
+        logger.info(
+            f"[pymupdf:child] {doc.file_path} pages={start}-{end} ({len(pymupdf_text)}자)"
+        )
+        return
+
    # ─── kordoc 파싱 (HWP/HWPX/PDF) + PyMuPDF fallback + OCR ───
    if fmt in KORDOC_FORMATS:
        container_path = f"/documents/{doc.file_path}"
@@ -118,16 +118,18 @@ def _route_media(path: Path, expected_category: str | None) -> tuple[str | None,
    if expected_category == "library":
        # 외부 작성 학습 자료 (KGS Code, 시행규칙 등). 문서 확장자만 수락.
        # frontmatter 해석은 classify_worker (옵션 C) 가 담당. file_watcher 는 라우팅만.
+        # G2: 첫 stage=presegment (후보 A 검증완료). 非PDF/단일 통과, 번들 PDF 만 분할.
        if ext in LIBRARY_DOC_EXTS:
-            return ("library", False, "extract")
+            return ("library", False, "presegment")
        if ext in AUDIO_EXTS or ext in VIDEO_DIRECT_EXTS or ext in VIDEO_QUARANTINE_EXTS:
            return (None, False, None)  # audio/video 잘못 들어오면 skip
        return (None, False, None)  # 기타 알 수 없는 확장자 skip

    # Inbox: 문서 파이프 (기존). audio/video 확장자가 실수로 여기 들어오면 skip.
+    # G2: 첫 stage=presegment (후보 A 검증완료). 非PDF/단일 통과, 번들 PDF 만 분할.
    if ext in AUDIO_EXTS or ext in VIDEO_DIRECT_EXTS or ext in VIDEO_QUARANTINE_EXTS:
        return (None, False, None)
-    return (None, False, "extract")
+    return (None, False, "presegment")


 # ─── Web/Blog ingest (devonagent 트랙) 헬퍼 ──────────────────────────────────
@@ -226,7 +228,8 @@ async def _ingest_web_file(session, file_path: Path, rel_path: str) -> tuple[int
    )
    session.add(doc)
    await session.flush()
-    await enqueue_stage(session, doc.id, "extract")
+    # G2: 첫 stage=presegment (후보 A 검증완료). HTML(非PDF)은 presegment 가 무변 통과 → extract.
+    await enqueue_stage(session, doc.id, "presegment")
    return (1, 0)


@@ -1,367 +0,0 @@
-"""법령 모니터 워커 — 국가법령정보센터 API 연동
-
-26개 법령 모니터링, 편/장 단위 분할 저장, 변경 이력 추적.
-매일 07:00 실행 (APScheduler).
-"""
-
-import os
-import re
-from datetime import date, datetime, timezone
-from pathlib import Path
-from xml.etree import ElementTree as ET
-
-import httpx
-from sqlalchemy import select
-
-from core.config import settings
-from core.database import async_session
-from core.utils import create_caldav_todo, file_hash, setup_logger
-from models.automation import AutomationState
-from models.document import Document
-from models.queue import enqueue_stage
-
-logger = setup_logger("law_monitor")
-
-LAW_SEARCH_URL = "https://www.law.go.kr/DRF/lawSearch.do"
-LAW_SERVICE_URL = "https://www.law.go.kr/DRF/lawService.do"
-
-# 모니터링 대상 법령 (26개)
-MONITORED_LAWS = [
-    # 산업안전보건 핵심
-    "산업안전보건법",
-    "산업안전보건법 시행령",
-    "산업안전보건법 시행규칙",
-    "산업안전보건기준에 관한 규칙",
-    "유해위험작업의 취업 제한에 관한 규칙",
-    "중대재해 처벌 등에 관한 법률",
-    "중대재해 처벌 등에 관한 법률 시행령",
-    # 건설안전
-    "건설기술 진흥법",
-    "건설기술 진흥법 시행령",
-    "건설기술 진흥법 시행규칙",
-    "시설물의 안전 및 유지관리에 관한 특별법",
-    # 위험물/화학
-    "위험물안전관리법",
-    "위험물안전관리법 시행령",
-    "위험물안전관리법 시행규칙",
-    "화학물질관리법",
-    "화학물질관리법 시행령",
-    "화학물질의 등록 및 평가 등에 관한 법률",
-    # 소방/전기/가스
-    "소방시설 설치 및 관리에 관한 법률",
-    "소방시설 설치 및 관리에 관한 법률 시행령",
-    "전기사업법",
-    "전기안전관리법",
-    "고압가스 안전관리법",
-    "고압가스 안전관리법 시행령",
-    "액화석유가스의 안전관리 및 사업법",
-    # 근로/환경
-    "근로기준법",
-    "환경영향평가법",
-]
-
-
-async def run():
-    """법령 변경 모니터링 실행"""
-    law_oc = os.getenv("LAW_OC", "")
-    if not law_oc:
-        logger.warning("LAW_OC 미설정 — 법령 API 승인 대기 중")
-        return
-
-    async with async_session() as session:
-        state = await session.execute(
-            select(AutomationState).where(AutomationState.job_name == "law_monitor")
-        )
-        state_row = state.scalar_one_or_none()
-        last_check = state_row.last_check_value if state_row else None
-
-        today = datetime.now(timezone.utc).strftime("%Y%m%d")
-        if last_check == today:
-            logger.info("오늘 이미 체크 완료")
-            return
-
-        new_count = 0
-        async with httpx.AsyncClient(timeout=30) as client:
-            for law_name in MONITORED_LAWS:
-                try:
-                    count = await _check_law(client, law_oc, law_name, session)
-                    new_count += count
-                except Exception as e:
-                    logger.error(f"[{law_name}] 체크 실패: {e}")
-
-        # 상태 업데이트
-        if state_row:
-            state_row.last_check_value = today
-            state_row.last_run_at = datetime.now(timezone.utc)
-        else:
-            session.add(AutomationState(
-                job_name="law_monitor",
-                last_check_value=today,
-                last_run_at=datetime.now(timezone.utc),
-            ))
-
-        await session.commit()
-        logger.info(f"법령 모니터 완료: {new_count}건 신규/변경 감지")
-
-
-async def _check_law(
-    client: httpx.AsyncClient,
-    law_oc: str,
-    law_name: str,
-    session,
-) -> int:
-    """단일 법령 검색 → 변경 감지 → 분할 저장"""
-    # 법령 검색 (lawSearch.do)
-    resp = await client.get(
-        LAW_SEARCH_URL,
-        params={"OC": law_oc, "target": "law", "type": "XML", "query": law_name},
-    )
-    resp.raise_for_status()
-
-    root = ET.fromstring(resp.text)
-    total = root.findtext(".//totalCnt", "0")
-    if total == "0":
-        logger.debug(f"[{law_name}] 검색 결과 없음")
-        return 0
-
-    # 정확히 일치하는 법령 찾기
-    for law_elem in root.findall(".//law"):
-        found_name = law_elem.findtext("법령명한글", "").strip()
-        if found_name != law_name:
-            continue
-
-        mst = law_elem.findtext("법령일련번호", "")
-        proclamation_date = law_elem.findtext("공포일자", "")
-        revision_type = law_elem.findtext("제개정구분명", "")
-
-        if not mst:
-            continue
-
-        # 이미 등록된 법령인지 확인 (같은 법령명 + 공포일자)
-        existing = await session.execute(
-            select(Document).where(
-                Document.title.like(f"{law_name}%"),
-                Document.source_channel == "law_monitor",
-            )
-        )
-        existing_docs = existing.scalars().all()
-
-        # 같은 공포일자 이미 있으면 skip
-        for doc in existing_docs:
-            if proclamation_date in (doc.title or ""):
-                return 0
-
-        # 이전 공포일 찾기 (변경 이력용)
-        prev_date = ""
-        if existing_docs:
-            prev_date = max(
-                (re.search(r'\d{8}', doc.title or "").group() for doc in existing_docs
-                 if re.search(r'\d{8}', doc.title or "")),
-                default=""
-            )
-
-        # 본문 조회 (lawService.do)
-        text_resp = await client.get(
-            LAW_SERVICE_URL,
-            params={"OC": law_oc, "target": "law", "MST": mst, "type": "XML"},
-        )
-        text_resp.raise_for_status()
-
-        # 분할 저장
-        count = await _save_law_split(
-            session, text_resp.text, law_name, proclamation_date,
-            revision_type, prev_date,
-        )
-
-        # DB 먼저 커밋 (알림 실패가 저장을 막지 않도록)
-        await session.commit()
-
-        # CalDAV + SMTP 알림 (실패해도 무시)
-        try:
-            _send_notifications(law_name, proclamation_date, revision_type)
-        except Exception as e:
-            logger.warning(f"[{law_name}] 알림 발송 실패 (무시): {e}")
-
-        return count
-
-    return 0
-
-
-async def _save_law_split(
-    session, xml_text: str, law_name: str, proclamation_date: str,
-    revision_type: str, prev_date: str,
-) -> int:
-    """법령 XML → 장(章) 단위 Markdown 분할 저장"""
-    root = ET.fromstring(xml_text)
-
-    # 조문단위에서 장 구분자 찾기 (조문키가 000으로 끝나는 조문)
-    units = root.findall(".//조문단위")
-    chapters = []  # [(장제목, [조문들])]
-    current_chapter = None
-    current_articles = []
-
-    for unit in units:
-        key = unit.attrib.get("조문키", "")
-        content = (unit.findtext("조문내용", "") or "").strip()
-
-        # 장 구분자: 키가 000으로 끝나고 내용에 "제X장" 포함
-        if key.endswith("000") and re.search(r"제\d+장", content):
-            # 이전 장/서문 저장
-            if current_articles:
-                chapter_name = current_chapter or "서문"
-                chapters.append((chapter_name, current_articles))
-            chapter_match = re.search(r"(제\d+장\s*.+)", content)
-            current_chapter = chapter_match.group(1).strip() if chapter_match else content.strip()
-            current_articles = []
-        else:
-            current_articles.append(unit)
-
-    # 마지막 장 저장
-    if current_articles:
-        chapter_name = current_chapter or "서문"
-        chapters.append((chapter_name, current_articles))
-
-    # 장 분할 성공
-    sections = []
-    if chapters:
-        for chapter_title, articles in chapters:
-            md_lines = [f"# {law_name}\n", f"## {chapter_title}\n"]
-            for article in articles:
-                title = article.findtext("조문제목", "")
-                content = article.findtext("조문내용", "")
-                if title:
-                    md_lines.append(f"\n### {title}\n")
-                if content:
-                    md_lines.append(content.strip())
-            section_name = _safe_name(chapter_title)
-            sections.append((section_name, "\n".join(md_lines)))
-    else:
-        # 장 분할 실패 → 전체 1파일
-        full_md = _law_xml_to_markdown(xml_text, law_name)
-        sections.append(("전문", full_md))
-
-    # 각 섹션 저장
-    inbox_dir = Path(settings.nas_mount_path) / "PKM" / "Inbox"
-    inbox_dir.mkdir(parents=True, exist_ok=True)
-    count = 0
-
-    for section_name, content in sections:
-        filename = f"{law_name}_{proclamation_date}_{section_name}.md"
-        file_path = inbox_dir / filename
-        file_path.write_text(content, encoding="utf-8")
-
-        rel_path = str(file_path.relative_to(Path(settings.nas_mount_path)))
-
-        # 변경 이력 메모
-        note = ""
-        if prev_date:
-            note = (
-                f"[자동] 법령 개정 감지\n"
-                f"이전 공포일: {prev_date}\n"
-                f"현재 공포일: {proclamation_date}\n"
-                f"개정구분: {revision_type}"
-            )
-
-        # 안전 자료실 A-2 — 공포일 파싱 (law published_date = COALESCE(시행일, 공포일) 계약,
-        # 본 레거시 워커는 공포일만 보유 — 시행일 기반 버전 체인은 B-1 statute_collector 소관)
-        _digits = re.sub(r"\D", "", str(proclamation_date or ""))
-        pub_date = None
-        if len(_digits) == 8:
-            try:
-                pub_date = date(int(_digits[:4]), int(_digits[4:6]), int(_digits[6:8]))
-            except ValueError:
-                pub_date = None
-
-        doc = Document(
-            file_path=rel_path,
-            file_hash=file_hash(file_path),
-            file_format="md",
-            file_size=len(content.encode()),
-            file_type="immutable",
-            title=f"{law_name} ({proclamation_date}) {section_name}",
-            source_channel="law_monitor",
-            data_origin="work",
-            category="law",
-            # 안전 자료실 A-2 — ingest 시점 deterministic. 법령 텍스트 = 저작권법 제7조
-            # 비보호 저작물 (public domain). 본 워커는 휴면(LAW_OC 미설정)이나 코드 경로 유지.
-            material_type="law",
-            jurisdiction="KR",
-            published_date=pub_date,
-            extract_meta={"license": {"scheme": "public_domain", "redistribute": True,
-                                      "attribution": "국가법령정보센터"}},
-            user_note=note or None,
-        )
-        session.add(doc)
-        await session.flush()
-
-        await enqueue_stage(session, doc.id, "extract")
-        count += 1
-
-    logger.info(f"[법령] {law_name} ({proclamation_date}) → {count}개 섹션 저장")
-    return count
-
-
-def _xml_section_to_markdown(elem) -> str:
-    """XML 섹션(편/장)을 Markdown으로 변환"""
-    lines = []
-    for article in elem.iter():
-        tag = article.tag
-        text = (article.text or "").strip()
-        if not text:
-            continue
-        if "조" in tag:
-            lines.append(f"\n### {text}\n")
-        elif "항" in tag:
-            lines.append(f"\n{text}\n")
-        elif "호" in tag:
-            lines.append(f"- {text}")
-        elif "목" in tag:
-            lines.append(f"  - {text}")
-        else:
-            lines.append(text)
-    return "\n".join(lines)
-
-
-def _law_xml_to_markdown(xml_text: str, law_name: str) -> str:
-    """법령 XML 전체를 Markdown으로 변환"""
-    root = ET.fromstring(xml_text)
-    lines = [f"# {law_name}\n"]
-
-    for elem in root.iter():
-        tag = elem.tag
-        text = (elem.text or "").strip()
-        if not text:
-            continue
-        if "편" in tag and "제목" not in tag:
-            lines.append(f"\n## {text}\n")
-        elif "장" in tag and "제목" not in tag:
-            lines.append(f"\n## {text}\n")
-        elif "조" in tag:
-            lines.append(f"\n### {text}\n")
-        elif "항" in tag:
-            lines.append(f"\n{text}\n")
-        elif "호" in tag:
-            lines.append(f"- {text}")
-        elif "목" in tag:
-            lines.append(f"  - {text}")
-
-    return "\n".join(lines)
-
-
-def _safe_name(name: str) -> str:
-    """파일명 안전 변환"""
-    return re.sub(r'[^\w가-힣-]', '_', name).strip("_")
-
-
-def _send_notifications(law_name: str, proclamation_date: str, revision_type: str):
-    """CalDAV 할일 알림 (SMTP 발송은 2026-06-10 폐기 — CalDAV 가 단일 알림 채널)"""
-    caldav_url = os.getenv("CALDAV_URL", "")
-    caldav_user = os.getenv("CALDAV_USER", "")
-    caldav_pass = os.getenv("CALDAV_PASS", "")
-    if caldav_url and caldav_user:
-        create_caldav_todo(
-            caldav_url, caldav_user, caldav_pass,
-            title=f"법령 검토: {law_name}",
-            description=f"공포일자: {proclamation_date}, 개정구분: {revision_type}",
-            due_days=7,
-        )
@@ -39,7 +39,11 @@ from models.queue import ProcessingQueue

 logger = logging.getLogger(__name__)

-MARKER_ENDPOINT = "http://marker-service:3300/convert"
+# 마크다운 추출 엔드포인트. compose env `MARKER_ENDPOINT`(base URL)에서 읽는다 —
+# 기본=marker(무변), 컷오버=`http://mineru-service:3301` 로 env 플립만으로 전환.
+# marker/mineru 가 동일 /convert 계약(file_path·start/end·md+base64 images)이라 워커 무변.
+_MARKDOWN_BASE = os.getenv("MARKER_ENDPOINT", "http://marker-service:3300").rstrip("/")
+MARKER_ENDPOINT = _MARKDOWN_BASE if _MARKDOWN_BASE.endswith("/convert") else _MARKDOWN_BASE + "/convert"
 MARKER_TIMEOUT = 300                    # 큰 PDF 5 분 한도
 MAX_PAGES = 200                         # 소형 1-shot 경로 /convert max_pages 안전장치

@@ -181,7 +185,10 @@ async def process(document_id: int, session: AsyncSession) -> None:
        await _fail(session, document_id, "no file_path")
        return

-    container_path = _to_marker_path(doc.file_path)
+    # 후보 A: 자식(bundle cols)은 합성 file_path(`{부모}#p{s}-{e}`) → 실파일 = bundle_source_path
+    # 로 부모경로 복원. 일반 doc 은 그대로(접미사 없음). marker/mineru 는 실파일 + page 범위로 변환.
+    from workers.presegment_worker import bundle_source_path
+    container_path = _to_marker_path(bundle_source_path(doc.file_path))
    suffix = Path(container_path).suffix.lower()

    # ---- (3) office/hwp → md (C-2): PDF 외 지원 포맷은 office_md 하이브리드 변환 ----
@@ -203,7 +210,21 @@ async def process(document_id: int, session: AsyncSession) -> None:
        return

    # ---- (4) page_count gauge + 분기 (LargeDoc split) ----
-    page_count = _get_page_count(container_path)
+    # G2 (PR-G2-2): 번들 자식 doc 은 부모 파일 공유 + 자기 page 범위([bundle_page_start, end],
+    # 1-based inclusive)만 변환해야 한다. page_offset = 절대 시작페이지(부모 파일 기준), page_count =
+    # 자식 범위의 페이지 수. cols 가 NULL(일반 doc)이면 page_offset=1 + 전체 page_count = 기존 동작 동일.
+    file_page_count = _get_page_count(container_path)
+    is_child = doc.bundle_page_start is not None and doc.bundle_page_end is not None
+    if is_child:
+        page_offset = doc.bundle_page_start
+        if file_page_count is not None:
+            child_end = min(doc.bundle_page_end, file_page_count)
+            page_count = max(0, child_end - doc.bundle_page_start + 1)
+        else:
+            page_count = doc.bundle_page_end - doc.bundle_page_start + 1
+    else:
+        page_offset = 1
+        page_count = file_page_count

    # >MAX_SPLIT_PAGES = 변환 안전상태(manual_review). silently skip 아님.
    if page_count is not None and page_count > MAX_SPLIT_PAGES:
@@ -222,20 +243,35 @@ async def process(document_id: int, session: AsyncSession) -> None:

    # ---- (6) 변환 분기: 소형 1-shot / 대형(>SPLIT_THRESHOLD) page-range 분할 ----
    if page_count is not None and page_count > SPLIT_THRESHOLD_PAGES:
-        await _process_split(doc, document_id, container_path, page_count, session)
+        await _process_split(doc, document_id, container_path, page_count, session, page_offset)
    else:
-        await _process_single(doc, document_id, container_path, session)
+        await _process_single(doc, document_id, container_path, session, page_count, page_offset)


 async def _process_single(
-    doc: Document, document_id: int, container_path: str, session: AsyncSession
+    doc: Document, document_id: int, container_path: str, session: AsyncSession,
+    page_count: int | None = None, page_offset: int = 1,
 ) -> None:
-    """소형 PDF(≤ SPLIT_THRESHOLD_PAGES) 통째 1-shot 변환 (Phase 1B/1B.5 기존 경로)."""
+    """소형 PDF(≤ SPLIT_THRESHOLD_PAGES) 통째 1-shot 변환 (Phase 1B/1B.5 기존 경로).
+
+    G2 (PR-G2-2): 번들 자식(page_offset>1)은 [page_offset, page_offset+page_count-1] 범위만
+    변환하도록 marker 에 start_page/end_page 를 명시한다. 일반 doc(page_offset=1)은 기존과
+    동일하게 max_pages 만 보낸다(payload byte-identical).
+    """
+    # 일반 doc = 기존 payload 유지. 자식만 절대 page 범위를 명시(부모 파일 기준 1-based inclusive).
+    if page_offset > 1 and page_count is not None:
+        req_json = {
+            "file_path": container_path,
+            "start_page": page_offset,
+            "end_page": page_offset + page_count - 1,
+        }
+    else:
+        req_json = {"file_path": container_path, "max_pages": MAX_PAGES}
    try:
        async with httpx.AsyncClient(timeout=MARKER_TIMEOUT) as client:
            resp = await client.post(
                MARKER_ENDPOINT,
-                json={"file_path": container_path, "max_pages": MAX_PAGES},
+                json=req_json,
            )
            resp.raise_for_status()
            data = resp.json()
@@ -271,6 +307,10 @@ async def _process_single(

    # ---- (7) image persist + md_content rewrite (Phase 1B.5) ----
    md_content_raw = data["md_content"]
+    # 2026-06-20 H1: 빈 추출(스캔/이미지 PDF)을 md_status=success + 빈 md 로 박제 X
+    # (계약: md_status in {success,partial} => md 非공백). office arm 동형 raise → queue 재시도 후 failed.
+    if not md_content_raw.strip():
+        raise ValueError("empty md_content (blank extraction) — success 박제 차단")
    images_resp = data.get("images") if MARKDOWN_IMAGE_PERSIST else None

    saved_images: list[dict[str, Any]] = []
@@ -509,6 +549,7 @@ async def _process_split(
    container_path: str,
    page_count: int,
    session: AsyncSession,
+    page_offset: int = 1,
 ) -> None:
    """대형 PDF page-range 분할 변환.

@@ -519,6 +560,10 @@ async def _process_split(

    invariant: page numbering = 1-based inclusive (batch1: 1..BATCH_PAGES, ...).
    marker slug(`_page_0_*`) 는 batch 마다 재시작 → batch 별 rewrite 후 stitch (충돌 회피).
+
+    G2 (PR-G2-2): page_offset = 부모 파일 기준 절대 시작페이지(번들 자식). marker 에 보내는
+    page 는 절대값(page_offset 가산), manifest/기록은 자식 상대값(1-based) 유지 — 일반 doc
+    (page_offset=1)은 abs==rel 이라 기존 동작과 동일.
    """
    n_batches = (page_count + BATCH_PAGES - 1) // BATCH_PAGES
    succeeded: list[dict[str, Any]] = []   # {start_page, end_page, md}
@@ -530,15 +575,17 @@ async def _process_split(

    async with httpx.AsyncClient(timeout=MARKER_TIMEOUT) as client:
        for b in range(n_batches):
-            start_page = b * BATCH_PAGES + 1
+            start_page = b * BATCH_PAGES + 1          # 자식 상대 1-based (manifest/기록용)
            end_page = min((b + 1) * BATCH_PAGES, page_count)
+            abs_start = start_page + (page_offset - 1)  # 부모 파일 절대 page (marker 요청용)
+            abs_end = end_page + (page_offset - 1)
            try:
                resp = await client.post(
                    MARKER_ENDPOINT,
                    json={
                        "file_path": container_path,
-                        "start_page": start_page,
-                        "end_page": end_page,
+                        "start_page": abs_start,
+                        "end_page": abs_end,
                    },
                )
                resp.raise_for_status()
@@ -610,6 +657,8 @@ async def _process_split(

    md_status = "success" if not failed else "partial"
    stitched = "\n\n".join(b["md"] for b in succeeded)
+    if not stitched.strip():
+        raise ValueError("empty stitched md_content (all batches blank) — success 박제 차단")
    md_content = _build_large_md_content(stitched[:LARGE_DOC_MD_CONTENT_HEAD_CHARS], manifest)

    quality = _compute_quality(stitched, doc.extracted_text or "", {"page_count": page_count})
@@ -213,17 +213,25 @@ async def _run_locked():
        result = await session.execute(
            select(NewsSource).where(NewsSource.enabled == True)
        )
-        sources = result.scalars().all()
+        source_ids = [s.id for s in result.scalars().all()]

-        if not sources:
-            logger.info("활성화된 뉴스 소스 없음")
-            return
+    if not source_ids:
+        logger.info("활성화된 뉴스 소스 없음")
+        return

-        total = 0
-        for source in sources:
-            health = await _get_or_create_health(session, source.id)
+    # 2026-06-20 H3: 소스마다 독립 세션 — 한 소스의 DB 오류가 종단 단일 commit 을 깨뜨려
+    # 전 소스 insert 를 잃던 것 차단. 실패 시 rollback 후 깨끗한 상태에서 failure 기록.
+    # (csb_collector 의 per-iteration 세션 패턴과 동형.)
+    total = 0
+    for sid in source_ids:
+        async with async_session() as session:
+            source = await session.get(NewsSource, sid)
+            if source is None:
+                continue
+            sname = source.name
+            health = await _get_or_create_health(session, sid)
            if not _should_attempt(health, now):
-                logger.info(f"[{source.name}] circuit {health.circuit_state} — 이번 사이클 skip")
+                logger.info(f"[{sname}] circuit {health.circuit_state} — 이번 사이클 skip")
                continue
            try:
                if source.feed_type == "api":
@@ -234,14 +242,18 @@ async def _run_locked():
                source.last_fetched_at = datetime.now(timezone.utc)
                _record_success(health, count, status == "not_modified", now)
                total += count
+                await session.commit()
            except Exception as e:
                # str 이 빈 예외(httpx.ConnectError('')) 대비 — health 기록과 동일 규칙
-                logger.error(f"[{source.name}] 수집 실패: {str(e) or repr(e)}")
-                source.last_fetched_at = datetime.now(timezone.utc)
+                await session.rollback()
+                logger.error(f"[{sname}] 수집 실패: {str(e) or repr(e)}")
+                health = await _get_or_create_health(session, sid)
+                src = await session.get(NewsSource, sid)
+                if src is not None:
+                    src.last_fetched_at = datetime.now(timezone.utc)
                _record_failure(health, str(e) or repr(e), now)
-
-        await session.commit()
-        logger.info(f"뉴스 수집 완료: {total}건 신규")
+                await session.commit()
+    logger.info(f"뉴스 수집 완료: {total}건 신규")


 MAX_RESPONSE_SIZE = 5 * 1024 * 1024  # 5MB
@@ -0,0 +1,562 @@
+"""presegment_worker — extract 前 번들 PDF(여러 논리문서 한 파일) → N 자식 분할 (G2 / PR-G2-2).
+
+전 문서가 presegment stage 로 진입한다(worker-side gating):
+  - 非PDF(file_format != pdf · suffix != .pdf) = 즉시 fast-exit → enqueue_next_stage 가 extract 로 흘림.
+  - PDF = PyMuPDF ToC(level-1) deterministic 분석. '명확한 번들' 만 자식 분할, 나머지는 단일문서로 extract.
+
+deterministic 경로(PR-G2-2): 판정이 애매하면 보수적으로 분할하지 않고 단일문서로 둔다
+(bias to NOT splitting). 분할 = '확실한 번들' 만:
+  - page_count >= MIN_BUNDLE_PAGES AND level-1 ToC 항목 >= 2 AND 모든 자식 >= MIN_CHILD_PAGES
+    AND 단조 증가·비중첩 AND [1, page_count] 전 범위 커버 AND 2 <= N <= MAX_CHILDREN.
+
+LLM 경계 폴백(PR-G2-3, env PRESEGMENT_LLM_FALLBACK, 기본 OFF — scaffold-first): deterministic
+이 '명확한 번들' 을 못 만든 대형 PDF(ToC 없음/level-1 없음/게이트 미달)에 한해, OFF 면 오늘과
+동일(단일문서)이고 ON 이면 off-card Qwen(맥북, 라우터 :8890, model=qwen-macbook)에게 경계를
+제안받는다. compact per-page heading 샘플만 전송(본문 미전송). LLM 출력은 **동일 검증 게이트
+(_is_clear_bundle)** 통과 시에만 deterministic 과 같은 _create_children 경로로 분할 —
+is_bundle=false / 파싱·검증 실패 = 단일문서(오늘과 동일) + presegment_llm_rejected 로깅.
+맥북 불가(503/연결/절단)는 StageDeferred 로 큐 재시도(백오프, no silent fallback).
+
+분할 시 ★후보 A(물리분할 없음, uq_documents_file_path 해소): 자식 file_path = unique 합성값
+`{부모경로}#p{start}-{end}` (UNIQUE 제약 통과), 실파일은 `bundle_source_path()` 로 부모 경로 복원.
+자식은 bundle_page_start/end(1-based inclusive) 로 부모 파일의 자기 page 범위만 가리킨다.
+부모-자식 관계 정본 = document_lineage(relation_type='segmented_from'). 부모(presegment_role='parent')는
+파일 홀더라 자체 extract/embed 안 함 — enqueue_next_stage 의 presegment→extract 전이가 'parent' 면
+억제된다(queue_consumer 참조). 자식의 extract 는 이 워커가 직접 enqueue. extract_worker/marker_worker
+가 자식 처리 시 bundle_source_path() 로 실파일 접근.
+
+멱등: 재실행 시 같은 부모로 이미 자식이 있으면(document_lineage segmented_from) 재생성하지 않고
+수렴(각 자식이 extract 활성/완료 상태인지만 보장)한다.
+
+★해결 이력 (2026-06-18): 최초 Option A(자식이 부모 file_path 그대로 공유)는 uq_documents_file_path
+UNIQUE 위반(실번들 검증서 발견) → 합성 file_path(후보 A)로 해소. 인제스트 재활성 = 합성번들 재검증 PASS 후.
+
+plan: G2 pre-segmentation (PR-G2-2 deterministic ToC segmentation)
+"""
+
+import hashlib
+import os
+import re
+import unicodedata
+from pathlib import Path
+
+from pydantic import BaseModel, ValidationError
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from ai.client import AIClient, call_deep_or_defer, parse_json_response
+from core.config import settings
+from core.utils import setup_logger
+from models.document import Document
+from models.document_lineage import DocumentLineage
+from models.queue import enqueue_stage
+
+logger = setup_logger("presegment_worker")
+
+# ─── 임계값 (모듈 상수, env-override 가능, 보수적 = 분할 안 하는 쪽으로 bias) ───
+# MIN_BUNDLE_PAGES: 이 미만이면 번들로 보지 않음(단일문서). 짧은 문서의 우연한 level-1 ToC 보호.
+MIN_BUNDLE_PAGES = int(os.getenv("PRESEGMENT_MIN_BUNDLE_PAGES", "60"))
+# MIN_CHILD_PAGES: 자식 하나라도 이 미만이면 분할 거부(표지/목차만 떼지는 over-split 방지).
+MIN_CHILD_PAGES = int(os.getenv("PRESEGMENT_MIN_CHILD_PAGES", "5"))
+# MAX_CHILDREN: 자식 수 상한. 초과 = ToC 가 챕터/소제목 수준이라 논리문서 경계가 아님 → 분할 거부.
+MAX_CHILDREN = int(os.getenv("PRESEGMENT_MAX_CHILDREN", "50"))
+
+# marker_worker._to_marker_path 와 동일 — NAS 상대경로 → 컨테이너 절대경로 prefix.
+CONTAINER_PATH_PREFIX = os.getenv("MARKER_CONTAINER_PATH_PREFIX", "/documents")
+
+# ─── PR-G2-3 LLM 경계 폴백 (scaffold-first, 기본 OFF) ───
+# PRESEGMENT_LLM_FALLBACK: 기본 "false". OFF 면 deterministic 경로만(=오늘과 동일 — 애매하면
+# 단일문서). ON 이면 deterministic 이 '명확한 번들' 을 못 만든 대형 PDF(page_count >=
+# MIN_BUNDLE_PAGES) 에 한해 off-card Qwen(맥북, 라우터 :8890 경유)에게 경계를 제안받아
+# **동일 검증 게이트(_is_clear_bundle)** 통과 시에만 deterministic 과 같은 자식 생성 경로로 분할.
+# 검증 실패/파싱 실패/is_bundle=false = 단일문서(오늘과 동일) + presegment_llm_rejected 로깅.
+PRESEGMENT_LLM_FALLBACK = os.getenv("PRESEGMENT_LLM_FALLBACK", "false").lower() in (
+    "1", "true", "yes", "on",
+)
+# LLM 에 보내는 per-page 샘플의 page 당 char 상한 (heading/첫줄만 — 본문 미전송).
+PRESEGMENT_LLM_PAGE_CHARS = int(os.getenv("PRESEGMENT_LLM_PAGE_CHARS", "80"))
+# 전체 page-sample 블록의 char 상한 (수 KB 가드 — 초과 시 잘라냄, 본문 누출/페이로드 폭발 방지).
+PRESEGMENT_LLM_SAMPLE_CHARS = int(os.getenv("PRESEGMENT_LLM_SAMPLE_CHARS", "12000"))
+
+# 경계 폴백 프롬프트 (app/prompts/presegment_boundaries.txt). system 지시 + 1-based inclusive·
+# 전범위 커버·무중첩 규칙. {page_count}/{page_samples} 를 str.replace 로 주입.
+_PRESEGMENT_PROMPT_PATH = Path(__file__).parent.parent / "prompts" / "presegment_boundaries.txt"
+
+
+class Segment(BaseModel):
+    """LLM 이 제안하는 1-based inclusive page 범위 한 조각."""
+
+    start_page: int
+    end_page: int
+    title: str | None = None
+
+
+class SegmentationOutput(BaseModel):
+    """presegment_boundaries 응답 스키마. parse_json_response → model_validate."""
+
+    is_bundle: bool = False
+    segments: list[Segment] = []
+    confidence: float | None = None
+
+
+def _resolve_path(file_path: str) -> Path | None:
+    """NFC(DB) vs NFD(NFS) 한글 경로 차이 흡수. thumbnail_worker._resolve_path 와 동일 패턴."""
+    candidates = [
+        file_path,
+        unicodedata.normalize("NFD", file_path),
+        unicodedata.normalize("NFC", file_path),
+    ]
+    for c in candidates:
+        p = Path(c)
+        if p.exists():
+            return p
+    parent = Path(file_path).parent
+    if parent.exists():
+        target = unicodedata.normalize("NFC", Path(file_path).name)
+        for child in parent.iterdir():
+            if unicodedata.normalize("NFC", child.name) == target:
+                return child
+    return None
+
+
+def _to_container_path(file_path: str) -> str:
+    """file_path 를 컨테이너 내부 절대경로로 변환 (marker_worker._to_marker_path 와 동일)."""
+    if file_path.startswith("/"):
+        return file_path
+    return f"{CONTAINER_PATH_PREFIX}/{file_path}"
+
+
+# 후보 A: 자식 합성 file_path 패턴 `{부모경로}#p{start}-{end}` (uq_documents_file_path 유일성).
+_BUNDLE_SUFFIX_RE = re.compile(r"#p\d+-\d+$")
+
+
+def bundle_source_path(file_path: str | None) -> str | None:
+    """자식 합성 file_path → 부모 실파일 경로 복원. 일반 doc(접미사 없음)은 그대로 반환.
+
+    extract_worker/marker_worker 가 자식 처리 시 실제 파일 접근에 사용 (자식 file_path 는
+    합성값이라 디스크에 없음). 결정적·세션 불필요. lineage 가 부모-자식 관계의 정본 기록.
+    """
+    if not file_path:
+        return file_path
+    return _BUNDLE_SUFFIX_RE.sub("", file_path)
+
+
+def _is_pdf(doc: Document) -> bool:
+    """PDF 판정 — file_format=pdf 또는 .pdf 확장자."""
+    fmt = (doc.file_format or "").lower()
+    if fmt == "pdf":
+        return True
+    if doc.file_path:
+        return Path(doc.file_path).suffix.lower() == ".pdf"
+    return False
+
+
+def _level1_segments(toc: list, page_count: int) -> list[dict]:
+    """get_toc(simple=True) 결과에서 level-1 항목만 골라 자식 후보 segment 리스트 생성.
+
+    toc 항목 = [level, title, page] (page 는 1-based). level==1 만 채택.
+    end_page = 다음 level-1 항목의 page - 1, 마지막 = page_count.
+    동일 page 에서 시작하는 level-1 이 여럿이면 정렬 후 인접 항목으로 경계 계산되며,
+    그 경우 0-페이지 segment 가 생겨 후속 검증(MIN_CHILD_PAGES·단조)에서 거부된다.
+    """
+    starts = []
+    for entry in toc:
+        # simple=True 는 [level, title, page]. 방어적으로 길이 체크.
+        if not entry or len(entry) < 3:
+            continue
+        level, title, page = entry[0], entry[1], entry[2]
+        if level != 1:
+            continue
+        # ToC page 가 범위 밖(0/음수/page_count 초과)이면 깨진 ToC → 후속 검증에서 거부됨.
+        starts.append((int(page), (title or "").strip()))
+
+    # ToC 가 정렬돼 있지 않을 수 있으므로 page 기준 정렬(원본 순서 보존 위해 안정 정렬).
+    starts.sort(key=lambda x: x[0])
+
+    segments: list[dict] = []
+    for i, (start_page, title) in enumerate(starts):
+        if i + 1 < len(starts):
+            end_page = starts[i + 1][0] - 1
+        else:
+            end_page = page_count
+        segments.append({"start_page": start_page, "end_page": end_page, "title": title})
+    return segments
+
+
+def _is_clear_bundle(segments: list[dict], page_count: int) -> tuple[bool, str]:
+    """deterministic '명확한 번들' 판정. (clear, reason) 반환.
+
+    clear=True 면 reason="" / clear=False 면 reason 은 거부 사유(로깅용).
+    모든 조건은 보수적 — 하나라도 어긋나면 단일문서로 처리(분할 안 함).
+    """
+    n = len(segments)
+    if n < 2:
+        return False, f"too_few_level1_entries(n={n})"
+    if n > MAX_CHILDREN:
+        return False, f"too_many_children(n={n}>{MAX_CHILDREN})"
+
+    # 첫 segment 가 1페이지에서 시작 + 마지막이 page_count 에서 끝 = 전 범위 커버.
+    if segments[0]["start_page"] != 1:
+        return False, f"first_start_not_1(start={segments[0]['start_page']})"
+    if segments[-1]["end_page"] != page_count:
+        return False, f"last_end_not_page_count(end={segments[-1]['end_page']},pc={page_count})"
+
+    prev_end = 0
+    for seg in segments:
+        start, end = seg["start_page"], seg["end_page"]
+        # 단조 증가 · 비중첩: 각 start 는 직전 end + 1 이어야 빈틈/겹침 없이 [1,pc] 정확 분할.
+        if start != prev_end + 1:
+            return False, f"non_contiguous(start={start},prev_end={prev_end})"
+        if end < start:
+            return False, f"non_monotonic(start={start},end={end})"
+        if (end - start + 1) < MIN_CHILD_PAGES:
+            return False, f"child_too_small(pages={end - start + 1}<{MIN_CHILD_PAGES})"
+        prev_end = end
+
+    if prev_end != page_count:
+        return False, f"coverage_gap(covered={prev_end},pc={page_count})"
+
+    return True, ""
+
+
+def _child_title(parent: Document, seg: dict) -> str:
+    """자식 제목 = 부모 제목 + ' — ' + (segment 제목 또는 page 범위)."""
+    base = (parent.title or "").strip() or (parent.original_filename or "") or "문서"
+    seg_title = (seg.get("title") or "").strip()
+    suffix = seg_title if seg_title else f"p.{seg['start_page']}-{seg['end_page']}"
+    return f"{base} — {suffix}"
+
+
+def _child_file_hash(parent_hash: str, start: int, end: int) -> str:
+    """자식 file_hash = sha256(f"{parent.file_hash}:{start}-{end}"). 결정적 → 재실행 멱등.
+
+    부모 file_hash 가 NULL 일 수는 없으나(NOT NULL) 방어적으로 빈 문자열 처리.
+    """
+    return hashlib.sha256(f"{parent_hash or ''}:{start}-{end}".encode("utf-8")).hexdigest()
+
+
+async def _ensure_child_extract(session: AsyncSession, child_id: int) -> None:
+    """자식이 아직 extract 안 됐으면 extract enqueue (멱등 수렴 경로).
+
+    이미 extracted_text 가 채워졌거나 활성 큐 행이 있으면 enqueue_stage 가 no-op/skip.
+    """
+    child = await session.get(Document, child_id)
+    if child is None:
+        return
+    # 이미 추출 완료면 재enqueue 불필요 (큐 중복은 enqueue_stage 가 막지만 의미상으로도 skip).
+    if child.extracted_at is not None and child.extracted_text is not None:
+        return
+    await enqueue_stage(session, child_id, "extract")
+
+
+async def _create_children(
+    doc: Document, segments: list[dict], session: AsyncSession
+) -> int:
+    """검증된 segments 로 자식 N개 생성 + lineage + extract enqueue + 부모 표식 (멱등).
+
+    deterministic '명확한 번들' 경로와 LLM 폴백 경로가 공유하는 단일 자식 생성 경로.
+    호출 전 segments 는 반드시 _is_clear_bundle 검증을 통과해야 한다(여기선 재검증 X).
+    commit 까지 수행. 반환값 = 실제 생성한 자식 수(이미 존재해 수렴만 한 경우 0).
+    """
+    # ─── 멱등 체크: 이미 자식이 있으면 수렴만 (재생성 금지) ───
+    existing_children = (
+        await session.execute(
+            select(DocumentLineage.derived_document_id).where(
+                DocumentLineage.source_document_id == doc.id,
+                DocumentLineage.relation_type == "segmented_from",
+            )
+        )
+    ).scalars().all()
+
+    if existing_children:
+        # 부모 표식이 누락된 경우 보정(이전 부분실패 복구).
+        if doc.presegment_role != "parent":
+            doc.presegment_role = "parent"
+        for child_id in existing_children:
+            await _ensure_child_extract(session, child_id)
+        await session.commit()
+        logger.info(
+            f"[presegment] id={doc.id} children already exist "
+            f"(n={len(existing_children)}) → converge(ensure extract), no re-create"
+        )
+        return 0
+
+    # ─── 자식 N개 생성 + lineage + extract enqueue ───
+    created_ids: list[int] = []
+    for seg in segments:
+        start, end = seg["start_page"], seg["end_page"]
+        child = Document(
+            # 후보 A: 자식 file_path = unique 합성값 `{부모경로}#p{s}-{e}` (uq_documents_file_path
+            # 충돌 회피). 실파일은 bundle_source_path() 로 복원(부모 경로). 물리 분할 없음 —
+            # 자식은 bundle_page_start/end 로 부모 파일을 슬라이스.
+            file_path=f"{doc.file_path}#p{start}-{end}",
+            file_hash=_child_file_hash(doc.file_hash, start, end),
+            file_format=doc.file_format,
+            file_size=doc.file_size,
+            file_type=doc.file_type,
+            import_source=doc.import_source,
+            original_filename=doc.original_filename,
+            source_channel=doc.source_channel,
+            category=doc.category,
+            data_origin=doc.data_origin,
+            doc_purpose=doc.doc_purpose,
+            # 안전 자료실 축은 부모에서 상속(분할이 자료유형/관할을 바꾸지 않음).
+            material_type=doc.material_type,
+            jurisdiction=doc.jurisdiction,
+            title=_child_title(doc, seg),
+            bundle_page_start=start,
+            bundle_page_end=end,
+            presegment_role="child",
+        )
+        session.add(child)
+        await session.flush()  # child.id 확보
+        created_ids.append(child.id)
+
+        session.add(
+            DocumentLineage(
+                source_document_id=doc.id,
+                derived_document_id=child.id,
+                relation_type="segmented_from",
+                meta={"start_page": start, "end_page": end},
+            )
+        )
+        # 자식 extract 는 워커가 직접 enqueue (부모는 'parent' 라 extract 로 흐르지 않음).
+        await enqueue_stage(session, child.id, "extract")
+
+    # 부모 = 파일 홀더. presegment→extract 전이는 enqueue_next_stage 가 'parent' 면 억제.
+    doc.presegment_role = "parent"
+    await session.commit()
+
+    logger.info(
+        f"[presegment] id={doc.id} SPLIT into {len(created_ids)} children "
+        f"child_ids={created_ids}"
+    )
+    return len(created_ids)
+
+
+def _segments_from_output(out: "SegmentationOutput") -> list[dict]:
+    """SegmentationOutput.segments(Pydantic) → _is_clear_bundle / _create_children 가 쓰는 dict 형태."""
+    return [
+        {"start_page": s.start_page, "end_page": s.end_page, "title": (s.title or "")}
+        for s in out.segments
+    ]
+
+
+def _page_samples(pdf, page_count: int) -> str:
+    """LLM 입력용 compact per-page 샘플 — page 당 heading/첫줄만(`p{n}: {firstline}`).
+
+    PyMuPDF page.get_text() 로 page 별 텍스트를 스트리밍하되 page 당 첫 비공백 줄만,
+    PRESEGMENT_LLM_PAGE_CHARS 로 잘라 본문 누출 차단. 전체 블록은 PRESEGMENT_LLM_SAMPLE_CHARS
+    가드로 상한(수 KB) — 초과 시 그 지점에서 중단(앞쪽 페이지 우선 보존).
+    """
+    lines: list[str] = []
+    total = 0
+    for i in range(page_count):
+        try:
+            text = pdf[i].get_text() or ""
+        except Exception:
+            text = ""
+        first = ""
+        for ln in text.splitlines():
+            ln = ln.strip()
+            if ln:
+                first = ln
+                break
+        first = first[:PRESEGMENT_LLM_PAGE_CHARS]
+        entry = f"p{i + 1}: {first}"
+        if total + len(entry) + 1 > PRESEGMENT_LLM_SAMPLE_CHARS:
+            break
+        lines.append(entry)
+        total += len(entry) + 1
+    return "\n".join(lines)
+
+
+async def _llm_boundary_fallback(
+    doc: Document, source: Path, page_count: int, session: AsyncSession
+) -> bool:
+    """애매 + 대형(ToC-less 등) PDF 에 대해 off-card Qwen 으로 경계 제안 → 검증 → 분할.
+
+    반환 True = LLM 경로가 분할을 수행(또는 멱등 수렴)했으므로 호출자는 추가 처리 없이 return.
+    반환 False = is_bundle=false / 파싱 실패 / 검증 실패 → 호출자는 단일문서(오늘과 동일) 처리.
+    맥북 불가(503/연결/절단)는 call_deep_or_defer 가 StageDeferred 로 raise → 큐 재시도(백오프).
+    silent fallback 금지 — deep 슬롯 외 다른 backend 자동 호출 안 함.
+    """
+    import fitz  # PyMuPDF — deterministic 경로와 동일 의존
+
+    # per-page 샘플은 파일을 다시 열어 스트리밍(deterministic with 블록과 분리해 그 경로 무회귀).
+    try:
+        with fitz.open(str(source)) as pdf:
+            samples = _page_samples(pdf, page_count)
+    except Exception as exc:
+        logger.warning(
+            f"[presegment] id={doc.id} llm fallback sample 실패 "
+            f"({type(exc).__name__}: {exc}) → single doc(extract)"
+        )
+        return False
+
+    try:
+        template = _PRESEGMENT_PROMPT_PATH.read_text(encoding="utf-8")
+    except Exception as exc:
+        logger.warning(
+            f"[presegment] id={doc.id} prompt 로드 실패 ({type(exc).__name__}: {exc}) "
+            f"→ single doc(extract)"
+        )
+        return False
+
+    prompt = template.replace("{page_count}", str(page_count)).replace(
+        "{page_samples}", samples
+    )
+
+    # off-card 호출 — call_deep_or_defer 가 deep 슬롯(맥북, 라우터 :8890, model=qwen-macbook)
+    # 으로 라우팅. 맥북 불가는 StageDeferred 로 전파(여기서 잡지 않음 → 큐가 보류/백오프).
+    # classify_worker 와 동일하게 AIClient() 인스턴스화.
+    client = AIClient()
+    try:
+        raw = await call_deep_or_defer(client, prompt)
+    finally:
+        await client.close()
+
+    parsed = parse_json_response(raw)
+    if not parsed:
+        logger.info(
+            f"[presegment] presegment_llm_rejected id={doc.id} "
+            f"reason=parse_failed raw={raw[:160]!r} → single doc(extract)"
+        )
+        return False
+
+    try:
+        out = SegmentationOutput.model_validate(parsed)
+    except (ValidationError, ValueError, TypeError) as exc:
+        logger.info(
+            f"[presegment] presegment_llm_rejected id={doc.id} "
+            f"reason=schema_invalid({type(exc).__name__}) → single doc(extract)"
+        )
+        return False
+
+    if not out.is_bundle:
+        logger.info(
+            f"[presegment] presegment_llm_rejected id={doc.id} "
+            f"reason=is_bundle_false → single doc(extract)"
+        )
+        return False
+
+    segments = _segments_from_output(out)
+    clear, reason = _is_clear_bundle(segments, page_count)
+    if not clear:
+        # LLM 출력을 그대로 믿지 않음 — deterministic 과 동일 게이트 미달이면 단일문서.
+        logger.info(
+            f"[presegment] presegment_llm_rejected id={doc.id} "
+            f"reason={reason} n={len(segments)} pages={page_count} → single doc(extract)"
+        )
+        return False
+
+    n = await _create_children(doc, segments, session)
+    logger.info(
+        f"[presegment] id={doc.id} LLM-SPLIT accepted "
+        f"(pages={page_count} n={len(segments)} created={n} "
+        f"confidence={out.confidence})"
+    )
+    return True
+
+
+async def process(document_id: int, session: AsyncSession) -> None:
+    """presegment stage 워커 진입점. queue_consumer 가 호출.
+
+    전 문서가 진입하며, 非PDF·단일문서는 변경 없이 통과(presegment_role 그대로 NULL) → extract 로 흐른다.
+    '명확한 번들' PDF 만 자식 분할 + 부모를 'parent' 로 표식(이 경우 부모는 extract 로 흐르지 않음).
+    """
+    doc = await session.get(Document, document_id)
+    if doc is None:
+        logger.warning(f"[presegment] document {document_id} not found")
+        return
+
+    # ─── (0) 非PDF — fast-exit. presegment_role 그대로 NULL → enqueue_next_stage 가 extract 로 흘림 ───
+    if not _is_pdf(doc):
+        logger.info(f"[presegment] id={document_id} non-pdf (fmt={doc.file_format}) → extract")
+        return
+
+    # ─── (0.5) file_path 없음(예: note) — 분할 불가, 단일문서로 통과 ───
+    if not doc.file_path:
+        logger.info(f"[presegment] id={document_id} no file_path → extract")
+        return
+
+    # ─── (1) 이미 분할된 자식 자신이 presegment 로 다시 들어온 경우 — 재분할 금지 ───
+    # (정상 흐름에선 자식은 곧장 extract 로 enqueue 되지만, 재처리 스크립트 등으로 들어올 수 있음.)
+    if doc.presegment_role in ("child", "parent"):
+        logger.info(
+            f"[presegment] id={document_id} already presegment_role={doc.presegment_role} → skip"
+        )
+        return
+
+    # ─── (2) 파일 열기 + page_count ───
+    raw = str(Path(settings.nas_mount_path) / doc.file_path)
+    source = _resolve_path(raw)
+    if source is None:
+        # 파일 부재 = extract 가 동일 상황에서 FileNotFoundError 로 처리할 사안.
+        # presegment 는 분할 불가일 뿐이므로 단일문서로 통과시켜 extract 가 일관되게 처리하게 둔다.
+        logger.warning(f"[presegment] id={document_id} file not found ({raw}) → extract")
+        return
+
+    import fitz  # PyMuPDF — extract_worker/marker_worker 와 동일 의존
+
+    try:
+        with fitz.open(str(source)) as pdf:
+            page_count = pdf.page_count
+            toc = pdf.get_toc(simple=True) or []
+    except Exception as exc:
+        # PDF 손상 등 — 분할 불가. 단일문서로 통과(extract 가 PyMuPDF/OCR 로 재시도하며 가시화).
+        logger.warning(
+            f"[presegment] id={document_id} fitz open/toc failed "
+            f"({type(exc).__name__}: {exc}) → extract"
+        )
+        return
+
+    # ─── (3) page_count 가 임계 미만 = 단일문서 (대다수 경로) ───
+    if page_count < MIN_BUNDLE_PAGES:
+        logger.info(
+            f"[presegment] id={document_id} single doc "
+            f"(pages={page_count}<{MIN_BUNDLE_PAGES}) → extract"
+        )
+        return
+
+    # ─── (4) level-1 ToC → 자식 후보 segment ───
+    segments = _level1_segments(toc, page_count)
+
+    if not segments:
+        # 큰 PDF 인데 ToC 없음/level-1 없음 = 애매. flag ON 이면 LLM 경계 폴백(PR-G2-3),
+        # OFF(기본) 이면 오늘과 동일 — 단일문서로 처리하고 사유를 남긴다.
+        if PRESEGMENT_LLM_FALLBACK:
+            logger.info(
+                f"[presegment] presegment_ambiguous id={document_id} "
+                f"reason=no_level1_toc pages={page_count} → LLM fallback"
+            )
+            if await _llm_boundary_fallback(doc, source, page_count, session):
+                return
+            # LLM 이 분할하지 않음(is_bundle=false / 검증·파싱 실패) — 단일문서.
+            return
+        logger.info(
+            f"[presegment] presegment_ambiguous id={document_id} "
+            f"reason=no_level1_toc pages={page_count} → single doc(extract)"
+        )
+        return
+
+    clear, reason = _is_clear_bundle(segments, page_count)
+    if not clear:
+        # 큰 PDF + ToC 는 있으나 '명확한 번들' 기준 미달 = 애매. flag ON 이면 LLM 경계 폴백,
+        # OFF(기본) 이면 오늘과 동일 — 단일문서(분할 안 함).
+        if PRESEGMENT_LLM_FALLBACK:
+            logger.info(
+                f"[presegment] presegment_ambiguous id={document_id} "
+                f"reason={reason} pages={page_count} level1={len(segments)} → LLM fallback"
+            )
+            if await _llm_boundary_fallback(doc, source, page_count, session):
+                return
+            return
+        logger.info(
+            f"[presegment] presegment_ambiguous id={document_id} "
+            f"reason={reason} pages={page_count} level1={len(segments)} → single doc(extract)"
+        )
+        return
+
+    # ─── (5) 명확한 번들 (deterministic) — 공유 자식 생성 경로 (멱등 수렴 포함) ───
+    await _create_children(doc, segments, session)
@@ -31,9 +31,9 @@ _hold_logged = False
 # embed/chunk 1→10 (2026-06-12 fast-consumer): 건당 <1s 실측 — Phase 0.1 초기 보수값이
 # LLM 사이클에 인질로 잡혀 실효 ~580/일 vs 수요 최대 2,700/일 → 적체 원인이었음.
 # 10 = TEI/marker 와 GPU 공유 고려한 보수 상향(전용 1분 잡 기준 캡 ~14,400/일).
-BATCH_SIZE = {"extract": 5, "classify": 3, "summarize": 3, "embed": 10, "chunk": 10,
-              "preview": 2, "stt": 1, "thumbnail": 3, "deep_summary": 1, "markdown": 1,
-              "fulltext": 3}
+BATCH_SIZE = {"presegment": 3, "extract": 5, "classify": 3, "summarize": 3, "embed": 10,
+              "chunk": 10, "preview": 2, "stt": 1, "thumbnail": 3, "deep_summary": 1,
+              "markdown": 1, "fulltext": 3}
 STALE_THRESHOLD_MINUTES = 10
 # markdown 대형 split 변환은 한 doc 이 수십 분(5210 ≈ 40분) 동안 processing 상태로 머문다.
 # marker_worker 는 queue 행에 heartbeat 를 찍지 않으므로(started_at 고정), main 의 10분
@@ -46,7 +46,7 @@ MARKDOWN_STALE_THRESHOLD_MINUTES = int(os.getenv("MARKDOWN_STALE_MINUTES", "120"
 # (reset_stale_items 가 자기 집합만 reset, 교차 시 이중 복구 위험).
 # STT 도 장기 작업 가능성이 있으나 본 PR 범위 밖 — main 에 유지(follow-up).
 MAIN_QUEUE_STAGES = [
-    "extract", "classify", "summarize",
+    "presegment", "extract", "classify", "summarize",
    "preview", "stt", "thumbnail", "fulltext",
 ]
 MARKDOWN_QUEUE_STAGES = ["markdown"]
@@ -165,6 +165,10 @@ async def enqueue_next_stage(document_id: int, current_stage: str):
    }

    next_stages = {
+        # G2 (PR-G2-2): 전 문서가 presegment → extract. 단, 번들 분할로 'parent' 가 된 문서는
+        # 파일 홀더라 자체 extract 안 함 — 아래 suppression 으로 이 전이를 건너뛴다(자식 extract 는
+        # presegment_worker 가 직접 enqueue). 단일/非PDF 문서(role NULL)는 정상적으로 extract 로 흐름.
+        "presegment": ["extract"],
        "extract": ["classify", "preview"],
        "classify": ["embed", "chunk", "markdown"],
        "stt": ["classify"],
@@ -180,6 +184,18 @@ async def enqueue_next_stage(document_id: int, current_stage: str):
            stages = extract_override_by_channel[sc]
        else:
            stages = next_stages.get(current_stage, [])
+    elif current_stage == "presegment":
+        # 번들 분할 parent 는 extract 로 흐르지 않게 억제 (자식이 부모 extract 에 가려지는 것 방지).
+        # role NULL(단일/非PDF) / 'child' 는 정상 전이. presegment_worker 가 자식 extract 를 직접
+        # enqueue 하므로 'parent' 만 여기서 no-op.
+        from models.document import Document
+        async with async_session() as lookup_session:
+            doc = await lookup_session.get(Document, document_id)
+            role = doc.presegment_role if doc else None
+        if role == "parent":
+            stages = []
+        else:
+            stages = next_stages.get(current_stage, [])
    else:
        stages = next_stages.get(current_stage, [])

@@ -199,6 +215,7 @@ def _load_workers():
    from workers.deep_summary_worker import process as deep_summary_process
    from workers.embed_worker import process as embed_process
    from workers.extract_worker import process as extract_process
+    from workers.presegment_worker import process as presegment_process
    from workers.preview_worker import process as preview_process
    from workers.stt_worker import process as stt_process
    from workers.summarize_worker import process as summarize_process
@@ -207,6 +224,8 @@ def _load_workers():
    from workers.fulltext_worker import process as fulltext_process

    return {
+        # G2 (PR-G2-2): extract 前 번들 PDF → N 자식 분할 (deterministic ToC). 非PDF/단일은 통과.
+        "presegment": presegment_process,
        "extract": extract_process,
        "classify": classify_process,
        "summarize": summarize_process,
@@ -25,6 +25,7 @@ import httpx
 from sqlalchemy.ext.asyncio import AsyncSession

 from ai.client import AIClient, parse_json_response
+from core.config import settings
 from models.study_question import StudyQuestion
 from models.study_question_job import StudyQuestionJob
 from services.search.llm_gate import Priority, acquire_mlx_gate
@@ -32,11 +33,12 @@ from services.study.explanation_rag import (
    gather_explanation_context,
    render_evidence_block,
 )
+from services.study.publish_enqueue import enqueue_question_publish

 logger = logging.getLogger(__name__)

-# PR-3 LLM_TIMEOUT_S 와 동일 안전 마진 (26B 평균 ~10s, gate 직렬화 고려)
-LLM_TIMEOUT_S = 30.0
+# 2026-06-20: config 단일소스 (구 하드코딩 30s = 빠른 Gemma 기준, Qwen 27B 교체 sweep 누락).
+LLM_TIMEOUT_S = settings.llm_call_timeout_s

 # explanation_md hard cap — 운영 데이터 793/838/866자 사례에서 1200 으로 시작
 # (800 은 공식·오답·핵심개념 묶이는 기사시험 풀이에 빡빡함). 1차 운영 후 조정.
@@ -226,6 +228,10 @@ async def run_explanation_job(session: AsyncSession, job: StudyQuestionJob) -> N
        question.ai_explanation_model = f"mlx:{primary_name}"
        question.updated_at = question.ai_explanation_generated_at

+        # 발행 재투영(같은 tx, caller commit) — 4-A 해설 ready → 문항+해설 발행. P0-1b.
+        if settings.study_publish_enabled:
+            await enqueue_question_publish(session, question)
+
        job.status = "completed"
        job.completed_at = now()
        return
@@ -24,6 +24,7 @@ import httpx
 from sqlalchemy.ext.asyncio import AsyncSession

 from ai.client import AIClient, parse_json_response
+from core.config import settings
 from models.study_memo_card import (
    append_card,
    append_card_evidence,
@@ -33,6 +34,8 @@ from models.study_memo_card_job import StudyMemoCardJob
 from models.study_question import StudyQuestion
 from models.user import User  # noqa: F401  (mapper 초기화 defensive)
 from services.search.llm_gate import Priority, acquire_mlx_gate
+from services.study.publish_enqueue import enqueue_publish
+from services.study.publish_projection import KIND_CARD
 from services.study.explanation_rag import (
    gather_explanation_context,
    render_evidence_block,
@@ -41,8 +44,8 @@ from services.study.study_memo_card_guards import guard_cards

 logger = logging.getLogger("study_memo_card_worker")

-# 다카드 출력이라 explanation(30s)보다 여유. config primary.timeout(180, soft-lock)은 미변경.
-CARD_LLM_TIMEOUT_S = 45.0
+# 2026-06-20: config 단일소스 (구 하드코딩 45s = 빠른 Gemma 기준).
+CARD_LLM_TIMEOUT_S = settings.llm_call_timeout_s
 SOURCE_KIND_QUESTION = "question"

 _ENVELOPE_PROMPT_FILE = "study_card_envelope.txt"
@@ -183,9 +186,13 @@ async def run_card_extract_job(session: AsyncSession, job: StudyMemoCardJob) ->
            return

        # 5. 성공 — 구버전 카드 retire 후 append (dedup partial unique 충돌 회피).
-        await supersede_old_cards(
+        retired_published_ids = await supersede_old_cards(
            session, source_question_id=question.id, keep_generated_at=source_version
        )
+        # 발행 중이던 구버전 카드 tombstone(같은 tx) — 재추출 retire 후 viewer stale 잔류 0. S-2.
+        if settings.study_publish_enabled:
+            for cid in retired_published_ids:
+                await enqueue_publish(session, kind=KIND_CARD, source_id=cid, payload=None, deleted=True)
        model_name = f"mlx:{primary_name}"
        inserted = 0
        for g in guarded:
@@ -0,0 +1,120 @@
+"""발행 워커 — publish_outbox drain → published 에 rev 부여 (docsrv-viewer-publish).
+
+APScheduler 1분(max_instances=1). pg_advisory_xact_lock 단일 라이터 → rev 커밋순 gapless
+(인플라이트 갭 차단: bigserial seq 폴링이 아니라 outbox id 순 + 단일 라이터 rev 부여).
+  outbox 를 id(커밋순) 순으로 처리, (kind, source_id) 당 published upsert:
+    - 기존 행과 (payload_hash, deleted) 동일 → no-op(디둡, rev 안 올림) + processed 마킹
+    - 그 외 → pub_id 재사용(기존)|신규 uuid, rev = MAX(rev)+1, payload/hash/deleted 갱신
+  tombstone(deleted=True)은 디둡 복합키라 안 삼켜짐. 배치 단일 트랜잭션.
+  배치 내 같은 (kind, source_id) 가 두 번 오면 flush 로 직전 반영을 다음 select 가 보게 함(최신 승).
+
+study_publish_enabled=False(기본) 면 no-op — 저자/4-A enqueue 결선(P0-1b) 전까지 inert.
+"""
+
+from __future__ import annotations
+
+import uuid
+from datetime import datetime, timezone
+
+from sqlalchemy import func, select, text
+
+from core.config import settings
+from core.database import async_session
+from core.utils import setup_logger
+from models.published import Published, PublishOutbox
+
+logger = setup_logger("study_publish_worker")
+
+BATCH_SIZE = 500
+# pg_advisory_xact_lock 전역 단일 라이터 키(발행 워커 전용 임의 상수, 타 advisory 락과 비충돌).
+ADVISORY_LOCK_KEY = 838201
+
+
+async def consume_publish_outbox() -> None:
+    """APScheduler 진입점. 미처리 outbox 를 rev 부여하며 published 로 반영."""
+    if not settings.study_publish_enabled:
+        logger.debug("study_publish 비활성 (study_publish_enabled=false)")
+        return
+
+    async with async_session() as session:
+        try:
+            # 1) 전역 단일 라이터 락(트랜잭션 스코프 — commit/rollback 시 자동 해제).
+            await session.execute(
+                text("SELECT pg_advisory_xact_lock(:k)").bindparams(k=ADVISORY_LOCK_KEY)
+            )
+            # 2) 현재 최대 rev.
+            max_rev = int(
+                (await session.execute(select(func.coalesce(func.max(Published.rev), 0)))).scalar() or 0
+            )
+            # 3) 미처리 outbox 를 커밋순(id)으로.
+            rows = (
+                await session.execute(
+                    select(PublishOutbox)
+                    .where(PublishOutbox.processed_at.is_(None))
+                    .order_by(PublishOutbox.id.asc())
+                    .limit(BATCH_SIZE)
+                )
+            ).scalars().all()
+            if not rows:
+                return
+
+            now = datetime.now(timezone.utc)
+            published_count = 0
+            for ob in rows:
+                existing = (
+                    await session.execute(
+                        select(Published).where(
+                            Published.kind == ob.kind,
+                            Published.source_id == ob.source_id,
+                        )
+                    )
+                ).scalar_one_or_none()
+
+                # (payload_hash, deleted) 디둡 — no-op 재투영은 rev 안 올림.
+                if (
+                    existing is not None
+                    and existing.payload_hash == ob.payload_hash
+                    and existing.deleted == ob.deleted
+                ):
+                    ob.processed_at = now
+                    continue
+
+                max_rev += 1
+                if existing is None:
+                    session.add(
+                        Published(
+                            kind=ob.kind,
+                            source_id=ob.source_id,
+                            pub_id=uuid.uuid4().hex,
+                            payload=ob.payload,
+                            payload_hash=ob.payload_hash,
+                            schema_version=ob.schema_version,
+                            rev=max_rev,
+                            deleted=ob.deleted,
+                            created_at=now,
+                            updated_at=now,
+                        )
+                    )
+                else:
+                    existing.payload = ob.payload
+                    existing.payload_hash = ob.payload_hash
+                    existing.schema_version = ob.schema_version
+                    existing.deleted = ob.deleted
+                    existing.rev = max_rev
+                    existing.updated_at = now
+
+                ob.processed_at = now
+                # 배치 내 동일 (kind, source_id) 후속 행이 직전 반영을 보도록 flush(최신 승).
+                await session.flush()
+                published_count += 1
+
+            await session.commit()
+            logger.info(
+                "publish_outbox_drained scanned=%s published=%s max_rev=%s",
+                len(rows),
+                published_count,
+                max_rev,
+            )
+        except Exception as e:
+            await session.rollback()
+            logger.exception("publish_outbox_drain_failed: %s", e)
@@ -28,6 +28,7 @@ from sqlalchemy.dialects.postgresql import insert as pg_insert
 from sqlalchemy.ext.asyncio import AsyncSession

 from ai.client import AIClient, parse_json_response
+from core.config import settings
 from models.study_question import StudyQuestion, StudyQuestionAttempt
 from models.study_quiz_session import StudyQuizSession
 from models.study_quiz_session_analysis import StudyQuizSessionAnalysis
@@ -42,8 +43,8 @@ from services.study.session_summary_rag import gather_session_summary_context

 logger = logging.getLogger(__name__)

-# 4-A 와 동일 안전 마진 (26B 평균 ~10s, gate 직렬화 고려)
-LLM_TIMEOUT_S = 30.0
+# 2026-06-20: config 단일소스 (구 하드코딩 30s = 빠른 Gemma 기준).
+LLM_TIMEOUT_S = settings.llm_call_timeout_s
 # wrong/unsure 5 미만은 분석 의미 X — insufficient_attempts skip
 MIN_ATTEMPTS_FOR_ANALYSIS = 5
 # 큰 세션 (84건 등) 에서 prompt 과대 + LLM timeout 방어. 가장 최근 attempt 기준 cap.
@@ -91,7 +91,12 @@ async def process(document_id: int, session: AsyncSession, *, use_deep: bool = F

        # sleep-안전 불변식: 쓰기는 전체 완주 후에만 — 중간 절단은 StageDeferred 로 빠져
        # 이 지점에 도달하지 않는다 (carry 는 로컬 변수, doc 무변경).
-        doc.ai_summary = strip_thinking(summary)
+        final_summary = strip_thinking(summary)
+        # 2026-06-20 H2: 빈/think-only 요약을 ai_summary 빈문자열로 박제 → completed 마크 → briefing/digest 누출.
+        # raise → queue 재시도 후 failed(가시화). 기존 raise 계약(not-found·empty-text)과 동형.
+        if not final_summary.strip():
+            raise ValueError(f"empty ai_summary after strip (document_id={document_id})")
+        doc.ai_summary = final_summary
        doc.ai_model_version = used_cfg.model
        doc.ai_processed_at = datetime.now(timezone.utc)
        logger.info(
@@ -1,8 +1,6 @@
 # hyungi_Document_Server 설정

 ai:
-  gateway:
-    endpoint: "http://ai-gateway:8080"

  models:
    # ─── 단일 generation 호스트 routing (2026-05-14 GPU LLM 제거) ───
@@ -29,6 +27,8 @@ ai:
      context_char_limit: 260000
      temperature: 0.3
      top_p: 0.9
+      repetition_penalty: 1.05  # 한국어 장문 반복/코드스위칭(CJK·라틴 누수) 억제 (보수적 시작값)
+      top_k: 20                 # Qwen3 권장

    # deep: 야간 night-drain 전용 — 맥북 M5 Max Qwen3.6-27B-6bit (llm-router :8890 경유,
    # model=qwen-macbook alias). 2026-06-11 재도입 (사용자: 자기 전 night-drain 으로 백로그 분담).
@@ -43,6 +43,8 @@ ai:
      context_char_limit: 260000
      temperature: 0.3
      top_p: 0.9
+      repetition_penalty: 1.05  # 한국어 장문 반복/코드스위칭 억제 (보수적 시작값)
+      top_k: 20

    # fallback: primary 장애 시 최후 방어선. Claude Sonnet 4 API (소액 한도, 자동 trigger).
    # 호출 빈도 낮음 가정 (Mac mini 가 거의 항상 up) → premium 과 budget 공유 OK.
@@ -210,3 +212,6 @@ pipeline:
  digest_llm_timeout_s: 300
  digest_llm_attempts: 2
  digest_pipeline_hard_cap_s: 5400
+  # 2026-06-20: study/analyze 단일 primary-call 타임아웃 (구 하드코딩 30~60s = 빠른 Gemma 기준).
+  # Qwen 27B(콜당 ~40~150s)에 맞춰 단일소스화 — 구 30s 즉사 = 사용자 504 + 워커 영구 재시도.
+  llm_call_timeout_s: 300
@@ -1,135 +0,0 @@
-# Phase 2A — Embedding candidate compose override (Diagnose only)
-#
-# Profile-isolated: `--profile embed-cand` 명시 opt-in. default up 시 미기동.
-# production fastapi/postgres/reranker 에 영향 0.
-# 본 PR 종료 시 별 chore (PR-2A-Chunks-Cand-Cleanup-1) 에서 제거.
-#
-# 후보 상태 (2026-05-23):
-#   - me5_large_inst : ✅ smoke PASS (dim 1024)
-#   - bge_mgemma2    : ❌ Phase 2A-Extended 별 PR 이관 (9B FP16 → VRAM OOM risk + 다운로드 cost)
-#   - me5_ko         : ❌ 폐기 (401 Unauthorized, gated/모델명 부정확)
-#   - snowflake_l_v2 : 신규 추가 (Snowflake/snowflake-arctic-embed-l-v2.0, 2024-12, multilingual 강화)
-#
-# 사용:
-#   docker compose -f docker-compose.yml -f docker-compose.override.cand.yml \
-#     --profile embed-cand up -d embedding-cand-me5-inst
-#
-# 호출 (DS network 내부):
-#   http://embedding-cand-me5-inst:80/embed
-#   http://embedding-cand-snowflake-l-v2:80/embed
-
-services:
-  embedding-cand-me5-inst:
-    image: ghcr.io/huggingface/text-embeddings-inference:1.7
-    restart: unless-stopped
-    container_name: hyungi_document_server-embedding-cand-me5-inst-1
-    expose:
-      - "80"
-    environment:
-      - MODEL_ID=intfloat/multilingual-e5-large-instruct
-      - MAX_BATCH_TOKENS=8192
-      - MAX_CONCURRENT_REQUESTS=4
-    volumes:
-      - embedding_cand_me5_inst_cache:/data
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: 1
-              capabilities: [gpu]
-    healthcheck:
-      test: ["CMD", "curl", "-fsS", "http://localhost/health"]
-      interval: 30s
-      timeout: 10s
-      retries: 5
-      start_period: 60s
-    profiles: ["embed-cand"]
-
-  embedding-cand-snowflake-l-v2:
-    image: ghcr.io/huggingface/text-embeddings-inference:1.7
-    restart: unless-stopped
-    container_name: hyungi_document_server-embedding-cand-snowflake-l-v2-1
-    expose:
-      - "80"
-    environment:
-      - MODEL_ID=Snowflake/snowflake-arctic-embed-l-v2.0
-      - MAX_BATCH_TOKENS=8192
-      - MAX_CONCURRENT_REQUESTS=4
-    volumes:
-      - embedding_cand_snowflake_l_v2_cache:/data
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: 1
-              capabilities: [gpu]
-    healthcheck:
-      test: ["CMD", "curl", "-fsS", "http://localhost/health"]
-      interval: 30s
-      timeout: 10s
-      retries: 5
-      start_period: 60s
-    profiles: ["embed-cand"]
-
-  # ===== 비활성 후보 (Phase 2A-Extended 별 PR 이관 또는 폐기) =====
-  # 진단 박제만 보존. 본 PR scope 외.
-
-  embedding-cand-bge-mgemma2:
-    image: ghcr.io/huggingface/text-embeddings-inference:1.7
-    container_name: hyungi_document_server-embedding-cand-bge-mgemma2-1
-    expose:
-      - "80"
-    environment:
-      - MODEL_ID=BAAI/bge-multilingual-gemma2
-      - MAX_BATCH_TOKENS=8192
-      - MAX_CONCURRENT_REQUESTS=4
-    volumes:
-      - embedding_cand_bge_mgemma2_cache:/data
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: 1
-              capabilities: [gpu]
-    healthcheck:
-      test: ["CMD", "curl", "-fsS", "http://localhost/health"]
-      interval: 30s
-      timeout: 10s
-      retries: 5
-      start_period: 300s
-    profiles: ["embed-cand-extended"]   # 본 PR 미사용. extended 별 profile.
-
-  embedding-cand-me5-ko:
-    image: ghcr.io/huggingface/text-embeddings-inference:1.7
-    container_name: hyungi_document_server-embedding-cand-me5-ko-1
-    expose:
-      - "80"
-    environment:
-      - MODEL_ID=dragonkue/multilingual-e5-large-ko
-      - MAX_BATCH_TOKENS=8192
-      - MAX_CONCURRENT_REQUESTS=4
-    volumes:
-      - embedding_cand_me5_ko_cache:/data
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: 1
-              capabilities: [gpu]
-    healthcheck:
-      test: ["CMD", "curl", "-fsS", "http://localhost/health"]
-      interval: 30s
-      timeout: 10s
-      retries: 5
-      start_period: 60s
-    profiles: ["embed-cand-disabled"]   # 401 fail. 사용 X.
-
-volumes:
-  embedding_cand_me5_inst_cache:
-  embedding_cand_snowflake_l_v2_cache:
-  embedding_cand_bge_mgemma2_cache:
-  embedding_cand_me5_ko_cache:
@@ -1,101 +0,0 @@
-# Phase 2B — Reranker candidate compose override (Diagnose only)
-#
-# Profile-isolated: `--profile rerank-cand` 명시 opt-in. default up 시 미기동.
-# production fastapi/postgres/reranker(bge-reranker-v2-m3) 에 영향 0.
-# 본 PR 종료 후 별 chore (PR-2B-Rerank-Cand-Cleanup-1) 에서 제거.
-#
-# 후보 상태 (2026-05-23):
-#   - gte_ml_base       : Apache 2.0, 305M, smoke 대기
-#   - mxbai_large       : Apache 2.0, ~435M, safetensors 부재 — TEI smoke risk
-#   - bge_v2_gemma_2b   : Gemma 라이센스, 2.5B FP16 ~5GB, smoke 대기
-#
-# 사용:
-#   docker compose -f docker-compose.yml -f docker-compose.override.rerank-cand.yml \
-#     --profile rerank-cand up -d rerank-cand-gte-ml-base
-
-services:
-  rerank-cand-gte-ml-base:
-    image: ghcr.io/huggingface/text-embeddings-inference:1.7
-    restart: unless-stopped
-    container_name: hyungi_document_server-rerank-cand-gte-ml-base-1
-    expose:
-      - "80"
-    environment:
-      - MODEL_ID=Alibaba-NLP/gte-multilingual-reranker-base
-      - MAX_BATCH_TOKENS=8192
-      - MAX_CONCURRENT_REQUESTS=4
-    volumes:
-      - rerank_cand_gte_ml_base_cache:/data
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: 1
-              capabilities: [gpu]
-    healthcheck:
-      test: ["CMD", "curl", "-fsS", "http://localhost/health"]
-      interval: 30s
-      timeout: 10s
-      retries: 5
-      start_period: 60s
-    profiles: ["rerank-cand"]
-
-  rerank-cand-mxbai-large:
-    image: ghcr.io/huggingface/text-embeddings-inference:1.7
-    restart: unless-stopped
-    container_name: hyungi_document_server-rerank-cand-mxbai-large-1
-    expose:
-      - "80"
-    environment:
-      - MODEL_ID=mixedbread-ai/mxbai-rerank-large-v1
-      - MAX_BATCH_TOKENS=8192
-      - MAX_CONCURRENT_REQUESTS=4
-    volumes:
-      - rerank_cand_mxbai_large_cache:/data
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: 1
-              capabilities: [gpu]
-    healthcheck:
-      test: ["CMD", "curl", "-fsS", "http://localhost/health"]
-      interval: 30s
-      timeout: 10s
-      retries: 5
-      start_period: 60s
-    profiles: ["rerank-cand"]
-
-  rerank-cand-bge-v2-gemma-2b:
-    image: ghcr.io/huggingface/text-embeddings-inference:1.7
-    restart: unless-stopped
-    container_name: hyungi_document_server-rerank-cand-bge-v2-gemma-2b-1
-    expose:
-      - "80"
-    environment:
-      - MODEL_ID=BAAI/bge-reranker-v2-gemma
-      - MAX_BATCH_TOKENS=8192
-      - MAX_CONCURRENT_REQUESTS=2
-    volumes:
-      - rerank_cand_bge_v2_gemma_2b_cache:/data
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: 1
-              capabilities: [gpu]
-    healthcheck:
-      test: ["CMD", "curl", "-fsS", "http://localhost/health"]
-      interval: 30s
-      timeout: 10s
-      retries: 5
-      start_period: 120s
-    profiles: ["rerank-cand"]
-
-volumes:
-  rerank_cand_gte_ml_base_cache:
-  rerank_cand_mxbai_large_cache:
-  rerank_cand_bge_v2_gemma_2b_cache:
@@ -16,6 +16,8 @@ services:
      timeout: 5s
      retries: 5
    restart: unless-stopped
+    # 2026-06-20 tier-0 무장: 글로벌 OOM 시 커널이 postgres(prod DB)를 reap 하지 않도록.
+    oom_score_adj: -900

  kordoc-service:
    build: ./services/kordoc
@@ -54,24 +56,28 @@ services:
      start_period: 180s
    restart: unless-stopped

-  # Phase 1B (2026-05-01): PDF → markdown 변환. ocr-service 와 별도 컨테이너 (deps 충돌 회피).
-  marker-service:
-    build: ./services/marker
+  # MinerU 2.5 VLM PDF→markdown 추출 — ★ marker-service 대체(컷오버 2026-06-18, A/B 8/8 PASS).
+  # 단일카드 markdown VRAM ~10GB(marker)→~5.9GB 고정. fastapi 가 MARKER_ENDPOINT 로 호출.
+  # 동기 do_parse 버그 회피 위해 server.py 는 async aio_do_parse 사용. 포트 3301.
+  mineru-service:
+    build: ./services/mineru
+    mem_limit: 16g  # 2026-06-20: VLM 스파이크 봉쇄 (steady ~12GB) — 호스트 30GB 글로벌 OOM 차단
    ports:
-      - "127.0.0.1:3300:3300"
+      - "127.0.0.1:3301:3301"
    expose:
-      - "3300"
+      - "3301"
    environment:
-      - HF_HOME=/models/huggingface
-      - TORCH_HOME=/models/torch
-      # D-1 (crawl-24x7): idle-unload 전환 — 영구 점유(~3.5GB) 해제가 90% 봉투의 전제.
-      # /ready 는 idle 에서도 200 (fastapi depends_on service_healthy 유지).
-      # 롤백 = MARKER_PRELOAD=1 + MARKER_IDLE_UNLOAD_MINUTES=0.
-      - MARKER_PRELOAD=0
-      - MARKER_IDLE_UNLOAD_MINUTES=${MARKER_IDLE_UNLOAD_MINUTES:-30}
+      # vlm-engine = 순수 VLM 단일모델. 기본 hybrid-engine 은 다중모델 로드 = OOM(반드시 명시).
+      - MINERU_BACKEND=vlm-engine
+      - MINERU_LANG=${MINERU_LANG:-korean}
+      # 공유 16GB 카드 공존: 절대 VRAM 캡(GB, 공유카드 robust) + vLLM 분율 캡 병용.
+      - MINERU_VIRTUAL_VRAM_SIZE=${MINERU_VIRTUAL_VRAM_SIZE:-6}
+      - MINERU_GPU_MEMORY_UTILIZATION=${MINERU_GPU_MEMORY_UTILIZATION:-0.40}
+      - MINERU_PRELOAD=${MINERU_PRELOAD:-1}
    volumes:
      - ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents:ro
-      - marker_models:/models
+      - mineru_models:/root/.cache
+    ipc: host                      # vLLM 공유메모리 — 공식 run 의 --ipc=host 대응.
    deploy:
      resources:
        reservations:
@@ -80,11 +86,11 @@ services:
              count: 1
              capabilities: [gpu]
    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:3300/ready"]
+      test: ["CMD", "curl", "-f", "http://localhost:3301/ready"]
      interval: 30s
      timeout: 10s
      retries: 3
-      start_period: 300s
+      start_period: 900s           # VLM 모델 lazy 다운로드(~2.4GB)+엔진 로드 여유.
    restart: unless-stopped

  stt-service:
@@ -149,7 +155,7 @@ services:
      #       → 32 한도 초과 → 413. 64 로 늘림.
      # GPU VRAM free 6199MiB 충분. baseline path (MAX_RERANK_INPUT=200) 영향 0.
      - MAX_BATCH_TOKENS=16384
-      - MAX_CLIENT_BATCH_SIZE=64
+      - MAX_CLIENT_BATCH_SIZE=256  # 2026-06-18 fix: 64→256, MAX_RERANK_INPUT=200 커버 (batch>64 ERROR=RRF silent fallback 해소; MAX_BATCH_TOKENS가 VRAM 상한이라 entries 증가는 VRAM 무관)
      - MAX_CONCURRENT_REQUESTS=4
    volumes:
      - reranker_cache:/data
@@ -168,21 +174,9 @@ services:
      start_period: 120s
    restart: unless-stopped

-  ai-gateway:
-    build: ./gpu-server/services/ai-gateway
-    ports:
-      - "127.0.0.1:8081:8080"
-    environment:
-      - PRIMARY_ENDPOINT=http://100.76.254.116:8801/v1/chat/completions
-      - FALLBACK_ENDPOINT=http://ollama:11434/v1/chat/completions
-      - CLAUDE_API_KEY=${CLAUDE_API_KEY:-}
-      - DAILY_BUDGET_USD=${DAILY_BUDGET_USD:-5.00}
-    # depends_on: ollama 제거 (2026-06-08) — ollama 서비스가 standalone 으로 이관됨.
-    # FALLBACK_ENDPOINT 의 ollama:11434 는 standalone(동일 hostname, DS 망 부착)으로 해소.
-    restart: unless-stopped
-
  fastapi:
    build: ./app
+    oom_score_adj: -900  # 2026-06-20 tier-0 무장 (앱+스케줄러 SPOF 보호)
    ports:
      - "100.110.63.63:8000:8000"
    volumes:
@@ -197,7 +191,8 @@ services:
        condition: service_healthy
      kordoc-service:
        condition: service_healthy
-      marker-service:
+      # 마크다운 엔진 = mineru-service (marker-service 제거 2026-06-18, 롤백=git history).
+      mineru-service:
        condition: service_healthy
    env_file:
      - credentials.env
@@ -205,7 +200,8 @@ services:
      - DATABASE_URL=postgresql+asyncpg://pkm:${POSTGRES_PASSWORD}@postgres:5432/pkm
      - KORDOC_ENDPOINT=http://kordoc-service:3100
      - OCR_ENDPOINT=http://ocr-service:3200
-      - MARKER_ENDPOINT=http://marker-service:3300
+      # ★ 컷오버 2026-06-18: marker-service:3300 → mineru-service:3301 (동일 /convert 계약).
+      - MARKER_ENDPOINT=http://mineru-service:3301
      - MARKER_CONTAINER_PATH_PREFIX=/documents
      # 2026-05-08 (D9 Track B revised): GPU stt-service 정식 승격, 내부 DNS 사용.
      - STT_ENDPOINT=http://stt-service:3300
@@ -214,6 +210,14 @@ services:
      # PR-MacMini-Derived-Worker-1
      - STUDY_EXPLANATION_ENABLED=${STUDY_EXPLANATION_ENABLED:-true}
      - INTERNAL_WORKER_TOKEN=${INTERNAL_WORKER_TOKEN}
+      # docsrv-viewer-publish: 발행 워커/저작 enqueue 게이트(기본 false=inert) + 뷰어↔DS feed Bearer.
+      - STUDY_PUBLISH_ENABLED=${STUDY_PUBLISH_ENABLED:-false}
+      - DIGEST_PUBLISH_ENABLED=${DIGEST_PUBLISH_ENABLED:-false}
+      - MAINTENANCE_MODE=${MAINTENANCE_MODE:-false}
+      - MAINTENANCE_NOTE=${MAINTENANCE_NOTE:-}
+      - VIEWER_SYNC_TOKEN=${VIEWER_SYNC_TOKEN:-}
+      # study-to-viewer P2: 뷰어 write-back ingest 게이트(기본 false=inert, 검증 후 점등).
+      - STUDY_INGEST_ENABLED=${STUDY_INGEST_ENABLED:-false}
      # Voice Memo PoC v1 — bot 계정 한정 long-expiry access token. default false → 일반 운영 영향 0.
      # 활성화: host .env 에 VOICE_MEMO_BOT_TOKEN_ENABLED=true. plan: rosy-launching-otter.md
      - VOICE_MEMO_BOT_TOKEN_ENABLED=${VOICE_MEMO_BOT_TOKEN_ENABLED:-false}
@@ -267,7 +271,7 @@ services:
  caddy:
    image: caddy:2
    ports:
-      - "8080:80"
+      - "127.0.0.1:8080:80"  # 2026-06-20: LAN 우회 차단 (실 ingress=home-caddy→caddy:80 도커망)
    volumes:
      - ./Caddyfile:/etc/caddy/Caddyfile
      - caddy_data:/data
@@ -283,4 +287,4 @@ volumes:
  reranker_cache:
  ocr_models:
  stt_models:
-  marker_models:
+  mineru_models:
@@ -2,7 +2,7 @@
  import { page } from '$app/stores';
  import { goto } from '$app/navigation';
  import { api } from '$lib/api';
-  import { ChevronRight, ChevronDown, FolderOpen, FolderTree, Inbox, Clock, Mail, Scale, StickyNote, GraduationCap, CalendarCheck, MessageCircle } from 'lucide-svelte';
+  import { ChevronRight, ChevronDown, FolderOpen, FolderTree, Inbox, Clock, Mail, Scale, StickyNote, GraduationCap, CalendarCheck, MessageCircle, Hash } from 'lucide-svelte';

  let tree = $state([]);
  let loading = $state(true);
@@ -195,6 +195,13 @@
    >
      <FolderTree size={14} /> 자료실
    </a>
+    <a
+      href="/clause"
+      class="w-full flex items-center gap-2 px-3 py-1.5 rounded-md text-sm transition-colors
+        {$page.url.pathname === '/clause' ? 'bg-accent/15 text-accent' : 'text-dim hover:bg-surface hover:text-text'}"
+    >
+      <Hash size={14} /> 절 바로가기
+    </a>
  </div>

  <!-- 메모 & Inbox -->
@@ -0,0 +1,73 @@
+<script>
+  // 절(clause) 바로가기 — ASME 절 식별자(예: UG-79)로 크로스-doc 위치를 조회해 읽기뷰로 이동 (U-1).
+  // 절은 in_corpus=false(의미검색 비활성)라 일반 검색으론 안 잡히므로 라벨 정확지목 전용 진입점.
+  import { api } from '$lib/api';
+  import { goto } from '$app/navigation';
+
+  let label = $state('');
+  let hits = $state([]);
+  let loading = $state(false);
+  let searched = $state(false);
+  let error = $state('');
+
+  async function lookup() {
+    const q = label.trim();
+    if (!q) return;
+    loading = true;
+    error = '';
+    try {
+      const res = await api(`/documents/clause-lookup?label=${encodeURIComponent(q)}`);
+      hits = res?.hits ?? [];
+      searched = true;
+    } catch (e) {
+      error = '조회에 실패했습니다.';
+      hits = [];
+    } finally {
+      loading = false;
+    }
+  }
+</script>
+
+<div class="mx-auto max-w-3xl px-6 py-10">
+  <h1 class="mb-1 text-2xl font-bold text-base">절 바로가기</h1>
+  <p class="mb-6 text-sm text-dim">
+    ASME 절 식별자(예: <code class="text-accent">UG-79</code>, <code class="text-accent">PG-5</code>)로 문서·위치를 찾아 이동합니다.
+  </p>
+
+  <form onsubmit={(e) => { e.preventDefault(); lookup(); }} class="mb-6 flex gap-2">
+    <input
+      bind:value={label}
+      placeholder="절 식별자 (UG-79, PG-5.6, A-1 …)"
+      autocomplete="off"
+      class="flex-1 rounded-lg border border-default bg-surface px-4 py-2.5 text-base outline-none focus:border-accent"
+    />
+    <button
+      type="submit"
+      disabled={loading || !label.trim()}
+      class="rounded-lg bg-accent px-5 py-2.5 font-medium text-white hover:bg-accent-hover disabled:opacity-50"
+    >
+      {loading ? '조회 중…' : '찾기'}
+    </button>
+  </form>
+
+  {#if error}
+    <p class="text-sm text-accent">{error}</p>
+  {:else if searched && hits.length === 0}
+    <p class="text-sm text-dim">'{label}' 에 해당하는 절을 찾지 못했습니다. (절은 분해된 코드 문서에만 존재합니다)</p>
+  {:else if hits.length > 0}
+    <div class="space-y-2">
+      {#if hits.length > 1}
+        <p class="text-xs text-dim">{hits.length}개 문서에 존재 — 에디션/부록을 선택하세요.</p>
+      {/if}
+      {#each hits as hit (hit.chunk_id)}
+        <button
+          onclick={() => goto(`/documents/${hit.doc_id}?section=${hit.chunk_id}`)}
+          class="block w-full rounded-lg border border-default bg-surface px-4 py-3 text-left transition hover:border-accent hover:bg-surface-hover"
+        >
+          <div class="font-medium text-base">{hit.section_title}</div>
+          <div class="mt-0.5 text-xs text-dim">{hit.doc_title}</div>
+        </button>
+      {/each}
+    </div>
+  {/if}
+</div>
@@ -77,10 +77,14 @@
  let treeGroupIndex = $derived(treeGroups ? groupKeyByChunkId(treeGroups) : null);
  let treeExpanded = $state({}); // key 없으면 접힘(기본 전부 접힘). Svelte5 deep-proxy 반응형.
  function toggleTreeGroup(key) { treeExpanded[key] = !treeExpanded[key]; }
+  // sections 로딩 완료 플래그 — 미완 동안 fallback 풀-문서 뷰어를 띄우면, 곧 절뷰로 교체되며
+  // 풀-문서 이미지가 '살짝 보였다 사라지는' 플래시가 난다(절 보유 문서). 로딩 중엔 skeleton.
+  let sectionsLoaded = $state(false);
  async function loadSections() {
    const reqId = docId;
    try { const r = await api(`/documents/${reqId}/sections`); if (reqId === docId) sections = r?.sections ?? []; }
    catch { if (reqId === docId) sections = []; }
+    finally { if (reqId === docId) sectionsLoaded = true; }
  }

  onMount(async () => {
@@ -126,6 +130,9 @@
  let manageOpen = $state(false);
  // 기본 선택 = 첫 본문 Part 의 첫 절(front-matter TOC 가 아니라 실제 내용으로 진입, front-matter 접힘 유지).
  let defaultSelId = $derived.by(() => {
+    // 딥링크 진입: ?section=<chunk_id> 가 outline 에 있으면 그 절로 (/clause 절 바로가기 → 해당 절 표시).
+    const deep = Number($page.url.searchParams.get('section'));
+    if (deep && outline.some((it) => it.section.chunk_id === deep)) return deep;
    if (treeGroups) {
      const body = treeGroups.find((g) => !g.isFrontMatter);
      if (body && body.items.length) return body.items[0].section.chunk_id;
@@ -149,7 +156,9 @@
    const gk = idx.get(sel);
    if (gk) untrack(() => { treeExpanded[gk] = true; });
  });
-  let selectedItem = $derived(outline.find((it) => it.section.chunk_id === selectedSectionId) ?? outline[0] ?? null);
+  // selectedSectionId 미설정(초기) 시 defaultSelId(첫 본문 Part)로 바로 해석 — outline[0](표지/front-matter)
+  // 를 잠깐 렌더했다 effect 가 defaultSelId 로 바꾸는 절뷰 내부 플래시 차단.
+  let selectedItem = $derived(outline.find((it) => it.section.chunk_id === (selectedSectionId ?? defaultSelId)) ?? outline[0] ?? null);
  let selectedSection = $derived(selectedItem?.section ?? null);
  let selIdx = $derived(outline.findIndex((it) => it.section.chunk_id === selectedItem?.section?.chunk_id));
  // 절 본문 = 청크 원문(it.bodyText, window 조각 합본) 직접 렌더. 과거 char_start 로 md_content 를
@@ -290,8 +299,8 @@
        {/if}
      </div>
    {/if}
-    {#if selectedBodyHtml}
-      <div class="prose prose-base max-w-none text-text">{@html selectedBodyHtml}</div>
+    {#if selectedItem?.bodyText}
+      <MarkdownDoc documentId={doc.id} mdContent={selectedItem.bodyText} mdStatus={null} class="prose prose-base max-w-none text-text" />
    {:else}
      <p style="color:#9aa090;font-size:14px;font-style:italic;">이 절의 본문은 추출되지 않았습니다. 헤더의 '원본'에서 확인하세요.</p>
    {/if}
@@ -390,7 +399,7 @@
    {#if it.bodyText}
      <details class="m-secbody" ontoggle={(e) => { if (e.currentTarget.open) mBodyOpen[s.chunk_id] = true; }}>
        <summary style="cursor:pointer;list-style:none;font-size:12px;color:#697061;padding:5px 0;user-select:none;display:flex;align-items:center;gap:5px;">본문 보기 <span class="m-chev" style="transition:transform .16s;color:#9aa090;">›</span></summary>
-        {#if mBodyOpen[s.chunk_id]}<div class="prose prose-sm max-w-none text-text" style="margin-top:6px;">{@html bodyHtml(it)}</div>{/if}
+        {#if mBodyOpen[s.chunk_id]}<div style="margin-top:6px;"><MarkdownDoc documentId={doc.id} mdContent={it.bodyText} mdStatus={null} class="prose prose-sm max-w-none text-text" /></div>{/if}
      </details>
    {/if}
  </div>
@@ -435,7 +444,10 @@
      </div>
    </div>

-    {#if useSectionView}
+    {#if !sectionsLoaded}
+      <!-- sections 로딩 중: fallback 풀-문서(이미지)→절뷰 교체 플래시 방지용 skeleton -->
+      <Skeleton h="h-96" rounded="card" />
+    {:else if useSectionView}
      <!-- 데스크탑(xl+): 3영역 -->
      <div class="hidden xl:grid" style="grid-template-columns:252px minmax(0,1fr) 336px;gap:13px;align-items:start;">
        <div style="background:#f4f7f1;border:1px solid #dde3d6;border-radius:14px;padding:13px 11px;position:sticky;top:14px;max-height:calc(100vh - 2rem);overflow-y:auto;">{@render treeNav(false)}</div>
@@ -0,0 +1,10 @@
+-- 362: G2 pre-segmentation — 번들 PDF(여러 논리문서 한 파일) → N 자식 문서 분할.
+-- 자식 doc 의 원본 내 page 범위(1-based inclusive) + 분할 역할 표식.
+-- 부모-자식 관계 자체는 document_lineage(relation_type='segmented_from', migration 363).
+-- presegment_role: NULL=일반 단일문서(대다수) / 'parent'=번들원본(자체 extract/embed 안 함) /
+--   'child'=논리 하위문서(부모 file_path 공유 + bundle_page_start/end 범위로 슬라이스).
+-- 단일 ALTER(다중 절) = 1 statement (asyncpg 멀티스테이트먼트 제약 준수).
+ALTER TABLE documents
+  ADD COLUMN IF NOT EXISTS bundle_page_start INTEGER,
+  ADD COLUMN IF NOT EXISTS bundle_page_end   INTEGER,
+  ADD COLUMN IF NOT EXISTS presegment_role   TEXT;
@@ -0,0 +1,8 @@
+-- 363: G2 — document_lineage.relation_type 에 'segmented_from'(번들 → 자식) 추가.
+-- 217 의 column-level CHECK(PG 자동명 document_lineage_relation_type_check, 배포 DB 실측 확인)
+-- 를 교체. DROP + ADD 를 단일 ALTER 의 두 절로 = 1 statement.
+-- 멱등: DROP ... IF EXISTS 라 재실행 안전(이미 교체됐으면 새 제약 DROP 후 동일 재생성).
+ALTER TABLE document_lineage
+  DROP CONSTRAINT IF EXISTS document_lineage_relation_type_check,
+  ADD CONSTRAINT document_lineage_relation_type_check
+    CHECK (relation_type IN ('cited','summarized_from','generated_from','revised_from','segmented_from'));
@@ -0,0 +1,5 @@
+-- 364: G2 — process_stage 큐 스테이지 enum 에 'presegment' 추가 (extract 前 번들 분할 단계).
+-- PG16: ALTER TYPE ADD VALUE 는 트랜잭션 내 실행 가능(값 추가만, 同 트랜잭션 내 사용은 안 함 —
+--   사용은 후속 마이그/런타임). IF NOT EXISTS = 재실행 멱등.
+-- (이 한 줄 단독 파일 — 1 statement.)
+ALTER TYPE process_stage ADD VALUE IF NOT EXISTS 'presegment';
@@ -0,0 +1,56 @@
+-- 스캔 기능: 잡 모델 + 배치 + 에이전트 생존 (plan: scan-feature-build r3)
+-- 웹(fastapi)=intent/명령, 호스트 스캔 에이전트=결과. 싱글톤 스캐너 직렬화.
+-- 주: 러너 규약상 이 파일은 schema_migrations 를 건드리지 않음(스탬프는 외부). BEGIN/COMMIT 없음.
+-- 순서: 테이블 먼저 → 시드 → 인덱스 (인덱스 실패가 테이블 생성 막지 않게).
+
+-- 잡: 한 스캔 세션 = 한 논리 문서 (배치 N개 → 합본 1 PDF → Inbox)
+CREATE TABLE IF NOT EXISTS scan_jobs (
+    id                BIGSERIAL PRIMARY KEY,
+    title             TEXT NOT NULL,                       -- 사람 입력 제목 (commit 시 documents.title 로 전파)
+    settings          JSONB NOT NULL DEFAULT '{}'::jsonb,  -- mode/resolution/source(ADF Duplex) 등 스캔 프로파일
+    status            TEXT NOT NULL DEFAULT 'draft',       -- draft|queued|ready|scanning|assembling|preview|committing|committed|failed|canceled
+    batch_count       INTEGER NOT NULL DEFAULT 0,          -- 스캔 완료 배치 수
+    page_count        INTEGER,                             -- 최종 합본 페이지 수 (assembling 후)
+    last_activity_at  TIMESTAMPTZ,                         -- ready 휴지 벽시계 idle 타임아웃 기준 (방치 데드락 방지)
+    last_progress_at  TIMESTAMPTZ,                         -- 잡 진행 갱신 (에이전트 생존과 분리)
+    staging_path      TEXT,                                -- 호스트 로컬 잡 스테이징 디렉토리
+    nas_staging_path  TEXT,                                -- NAS .scan-staging 합본 경로 (B안 미리보기/commit 소스)
+    inbox_path        TEXT,                                -- 최종 PKM/Inbox 경로 (commit 후)
+    file_hash         CHAR(64),                            -- 합본 sha256 = 정체성/멱등 커밋 키 (commit 시 채움)
+    doc_id            BIGINT REFERENCES documents(id) ON DELETE SET NULL,  -- commit 후 연결 (title 전파)
+    error             TEXT,                                -- failed 사유 (no-silent)
+    created_at        TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at        TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+-- 배치: 스캔 1회(ADF 한 묶음) 단위. batch_seq = 결합 순서(글롭 정렬 아님).
+CREATE TABLE IF NOT EXISTS scan_job_batches (
+    id            BIGSERIAL PRIMARY KEY,
+    job_id        BIGINT REFERENCES scan_jobs(id) ON DELETE CASCADE NOT NULL,
+    batch_seq     INTEGER NOT NULL,                    -- 1-based 결합 순서
+    staging_path  TEXT,                                -- 이 배치 PDF (호스트 로컬)
+    page_count    INTEGER,
+    status        TEXT NOT NULL DEFAULT 'scanned',     -- scanned | discarded (잼 폐기 후 재스캔)
+    created_at    TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    UNIQUE (job_id, batch_seq)
+);
+
+-- 에이전트 생존: 싱글톤 1행. 잡 진행(last_progress_at)과 분리 — queued 잡 stale 오탐 방지.
+CREATE TABLE IF NOT EXISTS scan_agent_status (
+    id              INTEGER PRIMARY KEY DEFAULT 1 CHECK (id = 1),  -- 단일 행 강제
+    last_heartbeat  TIMESTAMPTZ,
+    agent_version   TEXT,
+    current_job_id  BIGINT REFERENCES scan_jobs(id) ON DELETE SET NULL,
+    updated_at      TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+INSERT INTO scan_agent_status (id) VALUES (1) ON CONFLICT (id) DO NOTHING;  -- 시드 1행
+
+-- 활성 잡 락: 스캐너 싱글톤 → in-progress 잡은 전체에서 1개만(나머지 queued).
+-- 상수 TRUE 에 unique + in-progress 필터 = 그 상태 행 최대 1개 강제.
+CREATE UNIQUE INDEX IF NOT EXISTS uq_scan_jobs_single_active
+    ON scan_jobs ((TRUE))
+    WHERE status IN ('ready','scanning','assembling','preview','committing');
+
+CREATE INDEX IF NOT EXISTS idx_scan_jobs_queued     ON scan_jobs (created_at) WHERE status = 'queued';
+CREATE INDEX IF NOT EXISTS idx_scan_jobs_file_hash  ON scan_jobs (file_hash)  WHERE file_hash IS NOT NULL;
+CREATE INDEX IF NOT EXISTS idx_scan_job_batches_job ON scan_job_batches (job_id, batch_seq);
@@ -0,0 +1,4 @@
+-- 스캔 잡 명령 채널 (이중 라이터: API=intent/명령, 에이전트=result) — plan scan-feature-build r3
+-- API/수동이 pending_command 설정 → 에이전트가 조건부 claim(WHERE pending_command=X AND status=기대값) → 실행 → 결과 status write.
+ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS pending_command      TEXT;          -- scan_batch | finish | commit | cancel
+ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS command_requested_at TIMESTAMPTZ;   -- 명령 요청 시각(staleness/디버그)
@@ -0,0 +1,21 @@
+-- 367_published.sql
+-- 발행 레이어(docsrv-viewer-publish) projection 테이블. 뷰어가 read API로 당겨 자기 SQLite로 복제.
+-- kind-discriminated 단일 테이블(study_question | study_explanation | ... 후속 news/document).
+--   pub_id    = opaque+stable(워커가 (kind,source_id)당 1회 부여, republish=rev bump에도 불변) = 뷰어 dedup키=progress키.
+--   source_id = 내부 소스 행 id (pub_id→내부 역매핑, ingest write-back 해소용).
+--   rev       = 발행 워커 커밋순 gapless 커서(pg_advisory_lock 단일 라이터). 뷰어 feed = WHERE rev>since.
+--   payload_hash = sha256(정렬 JSON). (payload_hash, deleted) 디둡 — no-op 재투영 억제, tombstone 보존.
+--   deleted   = tombstone(삭제/만료도 feed 1급 이벤트). schema_version = 엔벨로프 버전(미지원 가시거부).
+CREATE TABLE IF NOT EXISTS published (
+    id             BIGSERIAL    PRIMARY KEY,
+    kind           VARCHAR(40)  NOT NULL,
+    source_id      BIGINT       NOT NULL,
+    pub_id         TEXT         NOT NULL,
+    payload        JSONB        NOT NULL,
+    payload_hash   TEXT         NOT NULL,
+    schema_version SMALLINT     NOT NULL DEFAULT 1,
+    rev            BIGINT       NOT NULL,
+    deleted        BOOLEAN      NOT NULL DEFAULT false,
+    created_at     TIMESTAMPTZ  NOT NULL DEFAULT now(),
+    updated_at     TIMESTAMPTZ  NOT NULL DEFAULT now()
+);
@@ -0,0 +1,3 @@
+-- 368_published_kind_pubid_uq.sql
+-- pub_id 는 kind 내 유일(뷰어 dedup/progress 키 무결성, pub_id→내부 역해소 유일성 보장).
+CREATE UNIQUE INDEX IF NOT EXISTS published_kind_pubid_uq ON published (kind, pub_id);
@@ -0,0 +1,3 @@
+-- 369_published_kind_source_uq.sql
+-- (kind, source_id) 당 발행 행 1개 — 발행 워커 upsert 타깃 + pub_id 재사용(같은 source=같은 pub_id) 키.
+CREATE UNIQUE INDEX IF NOT EXISTS published_kind_source_uq ON published (kind, source_id);
@@ -0,0 +1,3 @@
+-- 370_published_rev_idx.sql
+-- 뷰어 pull-sync feed: SELECT ... WHERE rev > :since ORDER BY rev LIMIT :page (P0-2).
+CREATE INDEX IF NOT EXISTS published_rev_idx ON published (rev);
@@ -0,0 +1,15 @@
+-- 371_publish_outbox.sql
+-- transactional outbox — 저작/4-A 트랜잭션이 같은 tx에서 여기 INSERT(P0-1 규율),
+--   단일 발행 워커가 id(커밋순) 순으로 drain 하며 published 에 rev 부여(소스 updated_at 폴링 금지=갭 재발).
+--   processed_at = 워커 drain 시 스탬프(NULL=미처리). payload/hash 는 enqueue 시점 스냅샷.
+CREATE TABLE IF NOT EXISTS publish_outbox (
+    id             BIGSERIAL    PRIMARY KEY,
+    kind           VARCHAR(40)  NOT NULL,
+    source_id      BIGINT       NOT NULL,
+    payload        JSONB        NOT NULL,
+    payload_hash   TEXT         NOT NULL,
+    schema_version SMALLINT     NOT NULL DEFAULT 1,
+    deleted        BOOLEAN      NOT NULL DEFAULT false,
+    created_at     TIMESTAMPTZ  NOT NULL DEFAULT now(),
+    processed_at   TIMESTAMPTZ
+);
@@ -0,0 +1,3 @@
+-- 372_publish_outbox_unprocessed_idx.sql
+-- 워커 drain 쿼리: WHERE processed_at IS NULL ORDER BY id (커밋순). 부분 인덱스로 미처리분만.
+CREATE INDEX IF NOT EXISTS publish_outbox_unprocessed_idx ON publish_outbox (id) WHERE processed_at IS NULL;
@@ -0,0 +1,4 @@
+-- 373_quiz_session_finalized_at.sql
+-- 발행 ingest(study-to-viewer P2) finalize 멱등 마커. finalize 성공 후 스탬프 →
+-- 같은 세션 재전송(at-least-once outbox) 시 SR 이중 advance 차단. 라이브 세션은 NULL 유지(무영향).
+ALTER TABLE study_quiz_sessions ADD COLUMN IF NOT EXISTS finalized_at TIMESTAMPTZ;
@@ -0,0 +1,3 @@
+-- 374_quiz_session_client_uuid.sql
+-- 뷰어 로컬 세션 UUID. ingest 가 (uuid, topic) 로 DS 세션 find-or-create = 멱등 키. 라이브=NULL.
+ALTER TABLE study_quiz_sessions ADD COLUMN IF NOT EXISTS client_session_uuid TEXT;
@@ -0,0 +1,3 @@
+-- 375_quiz_session_source.sql
+-- 세션 출처 구분(live | viewer). 감사/필터용. 기존 행=live.
+ALTER TABLE study_quiz_sessions ADD COLUMN IF NOT EXISTS source VARCHAR(20) NOT NULL DEFAULT 'live';
@@ -0,0 +1,3 @@
+-- 376_quiz_session_client_uuid_uq.sql
+-- (client_session_uuid, study_topic_id) 유일 — 뷰어 1세션이 topic 별 1 DS세션. partial(uuid 있는 viewer 행만).
+CREATE UNIQUE INDEX IF NOT EXISTS study_quiz_sessions_client_uuid_topic_uq ON study_quiz_sessions (client_session_uuid, study_topic_id) WHERE client_session_uuid IS NOT NULL;
@@ -289,7 +289,7 @@ async def run(topic_id: int, exam_round: str, apply: bool, abort_threshold: int)
        host="postgres",
        port=5432,
        user="pkm",
-        password="uW38friypljVS0X2ULoMnw",
+        password=os.environ["POSTGRES_PASSWORD"],
        database="pkm",
    )
    try:
@@ -0,0 +1,70 @@
+"""S-4 초기 백필 — 모든 study_memo_card_progress row 를 발행 outbox 에 적재.
+
+★ALL row(필터 없음) — due_at NULL sentinel(암-on-new)·terminal(졸업) 포함. due-only 백필은
+sentinel 누락 → viewer 미확인 오분류. 멱등(워커 (payload_hash, deleted) 디둡). flag on 시 워커 drain.
+
+실행 (GPU 서버):
+  docker exec hyungi_document_server-fastapi-1 python /app/scripts/backfill_publish_card_progress.py
+  docker exec hyungi_document_server-fastapi-1 python /app/scripts/backfill_publish_card_progress.py --dry-run
+"""
+
+import argparse
+import asyncio
+import os
+import sys
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+# standalone-model-registry-fix: app(라우터 경유 전 모델 import)과 달리 script 는 부분 모델만
+# import → SQLAlchemy mapper string 관계(StudyTopic.sessions->StudySession 등) 해소 실패.
+# 전 모델 모듈 import 로 레지스트리 완성(전부 컨테이너서 import 가능 = app 이 기동 시 로드).
+import importlib as _il, pkgutil as _pu
+import models as _mp
+for _m in _pu.iter_modules(_mp.__path__):
+    _il.import_module("models." + _m.name)
+
+
+from sqlalchemy import func, select
+
+from core.config import settings
+from core.database import async_session
+from models.study_memo_card_progress import StudyMemoCardProgress
+from services.study.publish_enqueue import backfill_publish_card_progress
+
+# 개인 학습툴 progress row 대비 넉넉. 도달 시 가드 경보.
+PAGE = 100000
+
+
+async def run(dry_run: bool) -> None:
+    async with async_session() as session:
+        total = (
+            await session.execute(
+                select(func.count()).select_from(StudyMemoCardProgress)
+            )
+        ).scalar() or 0
+
+    print(f"[info] study_publish_enabled={settings.study_publish_enabled} "
+          f"(False 면 적재는 되나 워커가 drain 안 함)")
+    print(f"[info] card progress row {total}건 (ALL row 발행)")
+    if dry_run:
+        print("[dry-run] 적재 안 함. 실제 실행은 --dry-run 제거.")
+        return
+
+    async with async_session() as session:
+        n = await backfill_publish_card_progress(session, after_id=0, limit=PAGE)
+        await session.commit()
+
+    print(f"\n[ok] outbox 적재 {n}건 — 발행 워커가 drain(flag on 시) 하며 rev 부여.")
+    if n >= PAGE:
+        print(f"[warn] PAGE({PAGE}) 도달 — progress 가 더 있을 수 있음. after_id 페이징 추가 필요.")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="S-4 pub_card_progress 초기 백필")
+    parser.add_argument("--dry-run", action="store_true", default=False)
+    args = parser.parse_args()
+    asyncio.run(run(args.dry_run))
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,75 @@
+"""S-2 초기 백필 — 검수완료(needs_review=False)·미삭제 study_memo_cards 를 발행 outbox 에 적재.
+
+publish_outbox 에만 적재(멱등: 워커 (payload_hash, deleted) 디둡). study_publish_enabled=True
+일 때 발행 워커가 drain → published(kind=study_card) rev 부여 → viewer pull-sync.
+
+실행 (GPU 서버):
+  docker exec hyungi_document_server-fastapi-1 python /app/scripts/backfill_publish_cards.py
+  docker exec hyungi_document_server-fastapi-1 python /app/scripts/backfill_publish_cards.py --dry-run
+"""
+
+import argparse
+import asyncio
+import os
+import sys
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+# standalone-model-registry-fix: app(라우터 경유 전 모델 import)과 달리 script 는 부분 모델만
+# import → SQLAlchemy mapper string 관계(StudyTopic.sessions->StudySession 등) 해소 실패.
+# 전 모델 모듈 import 로 레지스트리 완성(전부 컨테이너서 import 가능 = app 이 기동 시 로드).
+import importlib as _il, pkgutil as _pu
+import models as _mp
+for _m in _pu.iter_modules(_mp.__path__):
+    _il.import_module("models." + _m.name)
+
+
+from sqlalchemy import func, select
+
+from core.config import settings
+from core.database import async_session
+from models.study_memo_card import StudyMemoCard
+from services.study.publish_enqueue import backfill_publish_cards
+
+# 개인 학습툴 카드 수 대비 넉넉(단일 outbox 적재 tx, 워커는 BATCH_SIZE 로 drain). 도달 시 가드 경보.
+PAGE = 100000
+
+
+async def run(dry_run: bool) -> None:
+    async with async_session() as session:
+        active = (
+            await session.execute(
+                select(func.count())
+                .select_from(StudyMemoCard)
+                .where(
+                    StudyMemoCard.deleted_at.is_(None),
+                    StudyMemoCard.needs_review.is_(False),
+                )
+            )
+        ).scalar() or 0
+
+    print(f"[info] study_publish_enabled={settings.study_publish_enabled} "
+          f"(False 면 적재는 되나 워커가 drain 안 함)")
+    print(f"[info] 검수완료·미삭제 카드 {active}건")
+    if dry_run:
+        print("[dry-run] 적재 안 함. 실제 실행은 --dry-run 제거.")
+        return
+
+    async with async_session() as session:
+        n = await backfill_publish_cards(session, after_id=0, limit=PAGE)
+        await session.commit()
+
+    print(f"\n[ok] outbox 적재 {n}건 — 발행 워커가 drain(flag on 시) 하며 rev 부여.")
+    if n >= PAGE:
+        print(f"[warn] PAGE({PAGE}) 도달 — 카드가 더 있을 수 있음. after_id 페이징 추가 필요.")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="S-2 pub_card 초기 백필")
+    parser.add_argument("--dry-run", action="store_true", default=False)
+    args = parser.parse_args()
+    asyncio.run(run(args.dry_run))
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,78 @@
+"""S-1 초기 백필 — 기존 active study_topics 를 발행 outbox 에 1회 적재.
+
+publish_outbox 에만 적재한다(멱등: 발행 워커의 (payload_hash, deleted) 디둡이
+중복 enqueue 를 no-op 으로 흡수). study_publish_enabled=True 일 때 발행 워커가
+1분 주기로 drain → published 에 rev 부여 → viewer pull-sync.
+
+주제 수는 개인 학습툴이라 소량 — bounded page 사실상 1페이지지만 PAGE 도달 시
+overflow 가드로 페이징 누락을 경보(silent truncation 금지).
+
+실행 (GPU 서버):
+  docker exec hyungi_document_server-fastapi-1 python /app/scripts/backfill_publish_topics.py
+  # dry-run(적재 없이 카운트만):
+  docker exec hyungi_document_server-fastapi-1 python /app/scripts/backfill_publish_topics.py --dry-run
+"""
+
+import argparse
+import asyncio
+import os
+import sys
+
+# fastapi 컨테이너 WORKDIR=/app — `from models...` import 가능하게 path 추가.
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+# standalone-model-registry-fix: app(라우터 경유 전 모델 import)과 달리 script 는 부분 모델만
+# import → SQLAlchemy mapper string 관계(StudyTopic.sessions->StudySession 등) 해소 실패.
+# 전 모델 모듈 import 로 레지스트리 완성(전부 컨테이너서 import 가능 = app 이 기동 시 로드).
+import importlib as _il, pkgutil as _pu
+import models as _mp
+for _m in _pu.iter_modules(_mp.__path__):
+    _il.import_module("models." + _m.name)
+
+
+from sqlalchemy import func, select
+
+from core.config import settings
+from core.database import async_session
+from models.study_topic import StudyTopic
+from services.study.publish_enqueue import backfill_publish_topics
+
+# 개인 학습툴 주제 수 대비 넉넉. 도달 시 overflow 가드가 경보.
+PAGE = 5000
+
+
+async def run(dry_run: bool) -> None:
+    async with async_session() as session:
+        active = (
+            await session.execute(
+                select(func.count())
+                .select_from(StudyTopic)
+                .where(StudyTopic.deleted_at.is_(None))
+            )
+        ).scalar() or 0
+
+    print(f"[info] study_publish_enabled={settings.study_publish_enabled} "
+          f"(False 면 적재는 되나 워커가 drain 안 함)")
+    print(f"[info] active 주제 {active}건")
+    if dry_run:
+        print("[dry-run] 적재 안 함. 실제 실행은 --dry-run 제거.")
+        return
+
+    async with async_session() as session:
+        n = await backfill_publish_topics(session, after_id=0, limit=PAGE)
+        await session.commit()
+
+    print(f"\n[ok] outbox 적재 {n}건 — 발행 워커가 drain(flag on 시) 하며 rev 부여.")
+    if n >= PAGE:
+        print(f"[warn] PAGE({PAGE}) 도달 — 주제가 더 있을 수 있음. after_id 페이징 추가 필요.")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="S-1 pub_topics 초기 백필")
+    parser.add_argument("--dry-run", action="store_true", default=False)
+    args = parser.parse_args()
+    asyncio.run(run(args.dry_run))
+
+
+if __name__ == "__main__":
+    main()
@@ -16,6 +16,7 @@ dry-run 먼저 출력 (각 필드 N건). 그 다음 --apply 옵션으로 UPDATE.
 from __future__ import annotations

 import asyncio
+import os
 import re
 import sys

@@ -99,7 +100,7 @@ async def main() -> None:
        host="postgres",
        port=5432,
        user="pkm",
-        password="uW38friypljVS0X2ULoMnw",
+        password=os.environ["POSTGRES_PASSWORD"],
        database="pkm",
    )
    try:
@@ -1,22 +0,0 @@
-FROM python:3.12-slim
-
-WORKDIR /app
-
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    libgl1 libglib2.0-0 curl \
-    && apt-get clean && rm -rf /var/lib/apt/lists/*
-
-COPY requirements.txt .
-RUN pip install --no-cache-dir \
-    --extra-index-url https://download.pytorch.org/whl/cu126 \
-    -r requirements.txt
-
-# 모델 미다운로드 (HF cache volume → 첫 호출/warmup 시 적재).
-
-COPY server.py .
-
-EXPOSE 3300
-HEALTHCHECK --start-period=300s --interval=30s --timeout=10s --retries=3 \
-    CMD curl -f http://localhost:3300/ready || exit 1
-
-CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "3300"]
@@ -1,9 +0,0 @@
-torch==2.11.0+cu126
-torchvision==0.26.0+cu126
-transformers==4.57.6
-surya-ocr==0.17.1
-marker-pdf==1.10.2
-pymupdf>=1.24.0,<2.0.0
-fastapi>=0.110.0,<1.0.0
-uvicorn[standard]>=0.27.0,<1.0.0
-pillow>=10.0.0,<12.0.0
@@ -1,325 +0,0 @@
-"""marker-service — POST /convert: PDF → markdown + 추출 이미지 base64.
-
-Phase 1B   (2026-05-01) — 텍스트만 응답, 이미지 폐기.
-Phase 1B.5             — `_images` 직렬화해서 base64 응답에 포함. NAS write 권한이
-                         없는 stateless 변환기 유지 (fastapi 가 NAS persist 담당).
-D-1 (plan crawl-24x7-1, 2026-06-10) — idle-unload 운영 전환:
-  MARKER_PRELOAD=0           : startup warmup 끔 (첫 /convert 시 lazy load)
-  MARKER_IDLE_UNLOAD_MINUTES : N분 유휴 시 모델 해제 (0=비활성, 기존 동작)
-  /ready 는 idle(미적재)에서도 200 — fastapi 의 depends_on service_healthy 가
-  lazy 모드에서 영구 미기동으로 굳는 것 방지. 503 은 warmup_failed 한정.
-
-plan: ~/.claude/plans/piped-humming-crystal.md
-"""
-import base64
-import gc
-import hashlib
-import io
-import logging
-import os
-import threading
-import time
-from pathlib import Path
-
-from fastapi import FastAPI, HTTPException, Response
-from pydantic import BaseModel, Field
-
-from marker.converters.pdf import PdfConverter
-from marker.models import create_model_dict
-from marker.output import text_from_rendered
-import marker as marker_module
-
-logger = logging.getLogger(__name__)
-app = FastAPI()
-
-os.environ.setdefault("HF_HOME", "/models/huggingface")
-os.environ.setdefault("TORCH_HOME", "/models/torch")
-
-_models = None
-_converter = None
-try:
-    import importlib.metadata
-    _engine_version = importlib.metadata.version("marker-pdf")
-except Exception:
-    _engine_version = "unknown"
-_warmup_done = False
-_warmup_error: str | None = None
-_warmup_lock = threading.Lock()
-
-# D-1 idle-unload 상태 — 전이는 전부 _warmup_lock 아래
-_PRELOAD = os.getenv("MARKER_PRELOAD", "1") != "0"
-_IDLE_UNLOAD_MINUTES = int(os.getenv("MARKER_IDLE_UNLOAD_MINUTES", "0"))
-_inflight = 0
-_last_used = time.monotonic()
-
-# 이미지 응답 cap. base64 응답 크기 폭주 방지. 사용자 PDF 풀 측정 (Phase 1D) 시
-# 가장 이미지 많은 문서가 ~30건 수준 → 200 은 안전 마진. 초과 시 truncate flag 응답.
-MAX_IMAGES_PER_DOC = int(os.getenv("MARKER_MAX_IMAGES_PER_DOC", "200"))
-# per-image 최대 raw bytes (base64 전). 그래픽이 많은 풀페이지 스캔 회피.
-MAX_BYTES_PER_IMAGE = int(os.getenv("MARKER_MAX_BYTES_PER_IMAGE", str(10 * 1024 * 1024)))
-
-
-def _ensure_warmup() -> None:
-    """첫 /convert 또는 startup hook 시 모델 로드. HF cache volume 활용."""
-    global _models, _converter, _warmup_done, _warmup_error
-    if _warmup_done:
-        return
-    with _warmup_lock:
-        if _warmup_done:
-            return
-        try:
-            logger.info("[marker-service] warmup start")
-            _models = create_model_dict()
-            _converter = PdfConverter(artifact_dict=_models)
-            _warmup_done = True
-            _warmup_error = None
-            logger.info(f"[marker-service] warmup done engine_version={_engine_version}")
-        except Exception as exc:
-            _warmup_error = f"{type(exc).__name__}: {exc}"
-            logger.exception("[marker-service] warmup failed")
-            raise
-
-
-def _acquire_models():
-    """warmup 보장 + inflight 진입을 원자적으로 — ensure 직후 reaper 가 해제하는 경합 차단."""
-    global _inflight
-    while True:
-        _ensure_warmup()
-        with _warmup_lock:
-            if _warmup_done:
-                _inflight += 1
-                return
-        # ensure 와 lock 재진입 사이에 unload 가 끼어든 희귀 경합 — 재시도
-
-
-def _release_models():
-    global _inflight, _last_used
-    with _warmup_lock:
-        _inflight -= 1
-        _last_used = time.monotonic()
-
-
-def _maybe_unload() -> None:
-    """유휴 시 모델 해제. 변환 중(inflight>0)이면 절대 해제하지 않는다.
-
-    split 변환의 배치 사이 간격은 초 단위 — N>=1분 임계면 배치 사이 해제 없음.
-    """
-    global _models, _converter, _warmup_done
-    with _warmup_lock:
-        if not _warmup_done or _inflight > 0:
-            return
-        if time.monotonic() - _last_used < _IDLE_UNLOAD_MINUTES * 60:
-            return
-        _models = None
-        _converter = None
-        _warmup_done = False
-    gc.collect()
-    try:
-        import torch
-        torch.cuda.empty_cache()
-    except Exception:
-        pass
-    logger.info(f"[marker-service] idle-unload: 모델 해제 (유휴 {_IDLE_UNLOAD_MINUTES}분 초과)")
-
-
-async def _idle_reaper():
-    import asyncio
-    while True:
-        await asyncio.sleep(60)
-        try:
-            _maybe_unload()
-        except Exception:
-            logger.exception("[marker-service] idle reaper 오류")
-
-
-@app.on_event("startup")
-async def startup():
-    """startup hook — warmup 은 MARKER_PRELOAD 게이트 (D-1: lazy 기본 전환은 compose 가)."""
-    import asyncio
-    if _PRELOAD:
-        asyncio.create_task(asyncio.to_thread(_ensure_warmup))
-    if _IDLE_UNLOAD_MINUTES > 0:
-        asyncio.create_task(_idle_reaper())
-        logger.info(f"[marker-service] idle-unload 활성: {_IDLE_UNLOAD_MINUTES}분")
-
-
-class ConvertRequest(BaseModel):
-    file_path: str
-    max_pages: int | None = None
-    # page range (1-based inclusive) — LargeDoc split 변환용. marker 내부 0-based 변환은
-    # convert() 에 격리 (page numbering invariant: DB/API=1-based, marker=0-based).
-    start_page: int | None = None
-    end_page: int | None = None
-
-
-class ConvertImage(BaseModel):
-    """marker 추출 이미지 1건. fastapi 가 NAS 에 쓰고 docimg:img_NNN 으로 ref 정규화."""
-    slug: str           # marker 원본 slug (예: '_page_0_Picture_3.jpeg')
-    format: str         # 'png' | 'jpeg' | 'webp' | 'gif'
-    width: int | None = None
-    height: int | None = None
-    bytes_b64: str      # base64-encoded raw bytes
-
-
-class ConvertResponse(BaseModel):
-    md_content: str
-    md_content_hash: str
-    engine: str
-    engine_version: str
-    elapsed_ms: int
-    raw_metrics: dict
-    images: list[ConvertImage] = Field(default_factory=list)
-    images_truncated: bool = False
-
-
-@app.get("/health")
-def health():
-    return {"status": "ok", "service": "marker-service"}
-
-
-@app.get("/ready")
-async def ready(response: Response):
-    """Round 4 #1+#2: Response.status_code 명시 + warmup_error 노출.
-
-    D-1: idle(미적재) = 200. 503 은 warmup_failed 한정 — lazy 모드에서 fastapi
-    depends_on service_healthy 가 영구 미기동으로 굳지 않게. 배포 검증에서
-    'status=ready' 단언하던 runbook 은 강제 warm 호출(/convert 1건)로 대체.
-    """
-    if _warmup_error:
-        response.status_code = 503
-        return {
-            "status": "warmup_failed",
-            "engine": "marker",
-            "engine_version": _engine_version,
-            "error": _warmup_error,
-        }
-    if not _warmup_done:
-        return {
-            "status": "warming_up" if _PRELOAD else "idle",
-            "engine": "marker",
-            "engine_version": _engine_version,
-            "models_loaded": False,
-            "idle_unload_minutes": _IDLE_UNLOAD_MINUTES,
-        }
-    return {
-        "status": "ready",
-        "engine": "marker",
-        "engine_version": _engine_version,
-        "models_loaded": True,
-        "inflight": _inflight,
-        "idle_unload_minutes": _IDLE_UNLOAD_MINUTES,
-    }
-
-
-@app.post("/convert", response_model=ConvertResponse)
-async def convert(req: ConvertRequest):
-    p = Path(req.file_path)
-    if not p.is_file():
-        raise HTTPException(404, detail={"code": "file_not_found", "message": str(p)})
-    if req.start_page is not None and req.end_page is not None:
-        if req.start_page < 1 or req.end_page < req.start_page:
-            raise HTTPException(
-                422,
-                detail={
-                    "code": "bad_page_range",
-                    "message": f"start_page={req.start_page} end_page={req.end_page}",
-                },
-            )
-
-    # D-1: warmup 보장 + inflight 진입 원자화 — 변환 중 reaper 해제 차단. 해제는 finally.
-    _acquire_models()
-    try:
-        start = time.monotonic()
-        # page range 지정 시 per-request converter (모델 _models 재사용 → reload 없음).
-        # invariant: req.start_page/end_page = 1-based inclusive → marker 0-based 로 변환.
-        converter = _converter
-        if req.start_page is not None and req.end_page is not None:
-            page_range = list(range(req.start_page - 1, req.end_page))  # 0-based inclusive
-            converter = PdfConverter(artifact_dict=_models, config={"page_range": page_range})
-        try:
-            rendered = converter(str(p))
-        except Exception as exc:
-            logger.exception(f"[marker-service] conversion failed path={p}: {exc}")
-            raise HTTPException(
-                status_code=422,
-                detail={
-                    "code": "conversion_failed",
-                    "message": f"{type(exc).__name__}: {exc}",
-                },
-            ) from exc
-
-        md_text, _meta, raw_images = text_from_rendered(rendered)
-        elapsed_ms = int((time.monotonic() - start) * 1000)
-    finally:
-        _release_models()
-
-    images_payload, truncated = _serialize_images(raw_images, str(p))
-
-    return ConvertResponse(
-        md_content=md_text,
-        md_content_hash=hashlib.sha256(md_text.encode("utf-8")).hexdigest(),
-        engine="marker",
-        engine_version=_engine_version,
-        elapsed_ms=elapsed_ms,
-        raw_metrics={
-            "page_count": getattr(rendered, "page_count", None),
-            "image_count_extracted": len(raw_images) if raw_images else 0,
-            "image_count_returned": len(images_payload),
-        },
-        images=images_payload,
-        images_truncated=truncated,
-    )
-
-
-def _serialize_images(raw_images, src_path: str) -> tuple[list[ConvertImage], bool]:
-    """marker 의 `_images` (dict[slug, PIL.Image]) → base64 ConvertImage 리스트.
-
-    가드:
-      - MAX_IMAGES_PER_DOC 초과 시 head 만 반환 + truncated=True
-      - per-image 직렬화 실패 시 해당 이미지만 skip + warn (전체 fail 안 함)
-      - per-image 결과 byte 크기가 MAX_BYTES_PER_IMAGE 초과 시 skip + warn
-    """
-    if not raw_images:
-        return [], False
-
-    items = list(raw_images.items())
-    truncated = len(items) > MAX_IMAGES_PER_DOC
-    if truncated:
-        logger.warning(
-            f"[marker-service] images truncated path={src_path} "
-            f"total={len(items)} cap={MAX_IMAGES_PER_DOC}"
-        )
-        items = items[:MAX_IMAGES_PER_DOC]
-
-    out: list[ConvertImage] = []
-    for slug, pil_img in items:
-        try:
-            fmt_raw = (pil_img.format or "PNG").upper()
-            # WebP/GIF 도 marker 가 emit 가능하지만 본 1B.5 기준은 PNG/JPEG 우선.
-            # 알 수 없는 포맷이면 PNG 로 강제 (lossless re-encode).
-            fmt = fmt_raw if fmt_raw in {"PNG", "JPEG", "WEBP", "GIF"} else "PNG"
-            buf = io.BytesIO()
-            pil_img.save(buf, format=fmt)
-            raw_bytes = buf.getvalue()
-            if len(raw_bytes) > MAX_BYTES_PER_IMAGE:
-                logger.warning(
-                    f"[marker-service] image too large skipped path={src_path} "
-                    f"slug={slug} bytes={len(raw_bytes)} cap={MAX_BYTES_PER_IMAGE}"
-                )
-                continue
-            out.append(
-                ConvertImage(
-                    slug=slug,
-                    format=fmt.lower(),
-                    width=pil_img.width,
-                    height=pil_img.height,
-                    bytes_b64=base64.b64encode(raw_bytes).decode("ascii"),
-                )
-            )
-        except Exception as exc:
-            logger.warning(
-                f"[marker-service] image serialize failed path={src_path} "
-                f"slug={slug}: {type(exc).__name__}: {exc}"
-            )
-            continue
-    return out, truncated
@@ -0,0 +1,45 @@
+# mineru-service — MinerU 2.5 VLM 기반 PDF→markdown 추출기. marker-service 대체.
+# 단일카드(RTX 4070 Ti S 16GB→PRO 4000 24GB) markdown VRAM ~10GB(marker)→~5GB(MinerU VLM).
+#
+# 공식 opendatalab/MinerU global Dockerfile 기반:
+#   FROM vllm/vllm-openai:v0.21.0 (CUDA 13.0). GPU 호스트 드라이버 595.71.05 / CUDA 13.2 가
+#   13.0 런타임 지원 → cu129 폴백 불필요. vLLM 은 base 이미지가 제공하므로 mineru 는 [core] 만.
+#
+# 모델은 이미지에 굽지 않고 런타임 warmup 시 HF cache 볼륨으로 lazy 다운로드 (marker/ocr 선례 =
+#   서버 .cache 볼륨). 이미지 슬림 유지 + server.py 반복 빌드 빠름 + 모델 볼륨 영속.
+FROM vllm/vllm-openai:v0.21.0
+
+# base 이미지의 ENTRYPOINT(vLLM OpenAI 서버)를 제거 — 우리는 uvicorn 으로 자체 FastAPI 기동.
+ENTRYPOINT []
+
+# opencv(libgl) + CJK 폰트(레이아웃/렌더 안전) + curl(healthcheck). 공식 Dockerfile 동일.
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        fonts-noto-core fonts-noto-cjk fontconfig libgl1 curl \
+    && fc-cache -fv \
+    && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# mineru[core] — 공식 설치 라인. vLLM(vlm-engine 백엔드)은 base 가 이미 제공.
+RUN python3 -m pip install -U 'mineru[core]>=3.2.1' --break-system-packages \
+    && python3 -m pip cache purge
+
+# 서비스 wrapper 의존성. base(vllm-openai)+mineru 가 fastapi/uvicorn/pillow 를 이미 제공 →
+# pymupdf 만 추가(나머지 명시 핀은 base 의 pillow 12.x 를 불필요하게 다운그레이드해서 제거).
+RUN python3 -m pip install --no-cache-dir --break-system-packages \
+        'pymupdf>=1.24.0,<2.0.0'
+
+# MINERU_MODEL_SOURCE=huggingface = warmup 시 lazy 다운로드 (HF cache 볼륨에 영속).
+# PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True = 단편화 완화(연구 권고, 거대 입력 OOM 완충).
+ENV MINERU_MODEL_SOURCE=huggingface \
+    HF_HOME=/root/.cache/huggingface \
+    PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
+
+WORKDIR /app
+# server.py = 무거운 pip 레이어 뒤에 COPY → 반복 빌드 시 캐시 적중(빠른 재빌드).
+COPY server.py /app/server.py
+
+EXPOSE 3301
+# VLM 모델 lazy 다운로드(~2.4GB)+엔진 로드 여유로 start-period 길게.
+HEALTHCHECK --start-period=900s --interval=30s --timeout=10s --retries=3 \
+    CMD curl -f http://localhost:3301/ready || exit 1
+
+CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "3301"]
@@ -0,0 +1,315 @@
+"""mineru-service — POST /convert: PDF → markdown + 추출 이미지 base64.
+
+marker-service 대체(MinerU 2.5 VLM). **marker 의 /convert 계약을 그대로 복제**해서
+marker_worker 가 엔드포인트만 바꾸면 되도록 한다(요청/응답 동일 shape):
+
+  요청:  {file_path, max_pages?, start_page?, end_page?}   (page = 1-based inclusive)
+  응답:  {md_content, md_content_hash, engine, engine_version, elapsed_ms,
+          raw_metrics, images:[{slug, format, width, height, bytes_b64}], images_truncated}
+
+설계 노트:
+- **page range 는 PyMuPDF 로 직접 슬라이스**해서 MinerU 에 넘긴다(start_page..end_page →
+  0-based [a,b] 페이지만 담은 새 PDF bytes). MinerU 의 `end_page_id=0 falsy 무시` 버그 회피.
+  40p 윈도우 분할은 marker_worker 가 그대로 담당. (검증: fitz 슬라이스 렌더 = 원본과 동일 품질.)
+- **★ 반드시 async 엔진(`aio_do_parse`) 사용.** 동기 `do_parse`(vllm-engine sync)는 본 모델
+  (MinerU2.5-Pro-2605-1.2B)에서 layout 토큰 malformed → 빈 md 산출(실측 G1-2). async
+  (`aio_do_parse` = vllm-async-engine, mineru CLI 가 쓰는 정상 경로) = 정상 출력.
+- **이미지 = stateless**: marker 처럼 NAS write 안 함. MinerU 가 md 에 박는 `![](images/<sha>.jpg)`
+  href 를 그대로 slug 으로 반환 → fastapi(marker_worker)의 `_rewrite_image_refs` 가 basename
+  매칭으로 `docimg:img_NNN` 정규화 + NAS persist. (계약 무변)
+- **VRAM 캡**: `MINERU_GPU_MEMORY_UTILIZATION`(vLLM 분율, 0.40→~6GB 실측). compose 의
+  `MINERU_VIRTUAL_VRAM_SIZE` 도 무해(실측 정상)하나 출력엔 무관 — 캡은 분율로 충분.
+  backend=`vlm-engine`(기본 hybrid-engine 은 다중모델 로드 OOM, 반드시 명시).
+
+엔진은 첫 변환(또는 startup warmup) 시 1회 로드 — MinerU ModelSingleton 캐시. 단일 GPU 라
+변환은 _engine_lock 으로 직렬화.
+"""
+import asyncio
+import base64
+import hashlib
+import inspect
+import io
+import logging
+import os
+import time
+import unicodedata
+from pathlib import Path
+
+import fitz  # PyMuPDF — page 슬라이스 + 페이지수
+from fastapi import FastAPI, HTTPException, Response
+from PIL import Image
+from pydantic import BaseModel, Field
+
+logger = logging.getLogger("mineru-service")
+logging.basicConfig(level=logging.INFO)
+app = FastAPI()
+
+try:
+    import importlib.metadata
+    _engine_version = importlib.metadata.version("mineru")
+except Exception:
+    _engine_version = "unknown"
+
+# ---- 설정 (compose env 로 override) -----------------------------------------
+MINERU_BACKEND = os.getenv("MINERU_BACKEND", "vlm-engine")
+MINERU_LANG = os.getenv("MINERU_LANG", "korean")
+GPU_MEM_UTIL = float(os.getenv("MINERU_GPU_MEMORY_UTILIZATION", "0.40"))
+
+MAX_IMAGES_PER_DOC = int(os.getenv("MINERU_MAX_IMAGES_PER_DOC", "200"))
+MAX_BYTES_PER_IMAGE = int(os.getenv("MINERU_MAX_BYTES_PER_IMAGE", str(10 * 1024 * 1024)))
+MAX_PAGES_HARD = int(os.getenv("MINERU_MAX_PAGES_HARD", "200"))  # 1-shot max_pages 안전장치
+
+_PRELOAD = os.getenv("MINERU_PRELOAD", "1") != "0"
+
+# ---- 엔진 상태 ---------------------------------------------------------------
+_warmup_done = False
+_warmup_error: str | None = None
+# 단일 GPU async 엔진 — warmup + convert 직렬화(엔진 1개, 임시디렉토리/싱글톤 경합 차단).
+_engine_lock = asyncio.Lock()
+
+
+async def _run_mineru(pdf_bytes: bytes, lang: str) -> tuple[str, list[dict]]:
+    """슬라이스된 PDF bytes → (markdown, 이미지 dict 리스트). **async 엔진 경로.**
+
+    호출자(_ensure_warmup / convert)가 _engine_lock 을 잡은 상태로 호출한다.
+    이미지 dict: {slug, format, width, height, raw_bytes}. slug = md href 그대로.
+    """
+    import glob
+    import tempfile
+
+    from mineru.cli.common import aio_do_parse
+
+    with tempfile.TemporaryDirectory(prefix="mineru_") as td:
+        candidate = {
+            "output_dir": td,
+            "pdf_file_names": ["doc"],
+            "pdf_bytes_list": [pdf_bytes],
+            "p_lang_list": [lang],
+            "backend": MINERU_BACKEND,
+            "formula_enable": True,
+            "table_enable": True,
+            "f_dump_md": True,
+            "f_dump_content_list": True,
+            "f_dump_middle_json": False,
+            "f_dump_model_output": False,
+            "f_dump_orig_pdf": False,
+            "f_draw_layout_bbox": False,
+            "f_draw_span_bbox": False,
+            "gpu_memory_utilization": GPU_MEM_UTIL,
+        }
+        sig = inspect.signature(aio_do_parse)
+        has_var_kw = any(
+            p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values()
+        )
+        kwargs = candidate if has_var_kw else {
+            k: v for k, v in candidate.items() if k in sig.parameters
+        }
+        await aio_do_parse(**kwargs)
+
+        md_files = sorted(glob.glob(f"{td}/**/*.md", recursive=True))
+        if not md_files:
+            raise RuntimeError("mineru produced no markdown output")
+        md_path = Path(md_files[0])
+        md_text = md_path.read_text(encoding="utf-8", errors="replace")
+
+        images: list[dict] = []
+        img_dir = md_path.parent / "images"
+        if img_dir.is_dir():
+            for img_file in sorted(img_dir.iterdir()):
+                if not img_file.is_file():
+                    continue
+                raw = img_file.read_bytes()
+                slug = f"images/{img_file.name}"  # md href 와 정확히 일치
+                w = h = None
+                try:
+                    with Image.open(io.BytesIO(raw)) as im:
+                        w, h = im.width, im.height
+                        fmt = (im.format or "JPEG").lower()
+                except Exception:
+                    fmt = img_file.suffix.lstrip(".").lower() or "jpeg"
+                images.append(
+                    {"slug": slug, "format": fmt, "width": w, "height": h, "raw_bytes": raw}
+                )
+        return md_text, images
+
+
+async def _ensure_warmup() -> None:
+    """첫 /convert 또는 startup hook 시 1-page 합성 PDF 로 엔진+모델 적재."""
+    global _warmup_done, _warmup_error
+    if _warmup_done:
+        return
+    async with _engine_lock:
+        if _warmup_done:
+            return
+        try:
+            logger.info("[mineru-service] warmup start (async engine load + model fetch)")
+            doc = fitz.open()
+            page = doc.new_page()
+            page.insert_text((72, 72), "MinerU warmup.")
+            warmup_bytes = doc.tobytes()
+            doc.close()
+            await _run_mineru(warmup_bytes, MINERU_LANG)
+            _warmup_done = True
+            _warmup_error = None
+            logger.info(f"[mineru-service] warmup done engine_version={_engine_version}")
+        except Exception as exc:
+            _warmup_error = f"{type(exc).__name__}: {exc}"
+            logger.exception("[mineru-service] warmup failed")
+            raise
+
+
+@app.on_event("startup")
+async def startup():
+    if _PRELOAD:
+        asyncio.create_task(_ensure_warmup())
+
+
+# ---- 계약 모델 (marker 와 동일 shape) ----------------------------------------
+class ConvertRequest(BaseModel):
+    file_path: str
+    max_pages: int | None = None
+    start_page: int | None = None   # 1-based inclusive
+    end_page: int | None = None     # 1-based inclusive
+
+
+class ConvertImage(BaseModel):
+    slug: str
+    format: str
+    width: int | None = None
+    height: int | None = None
+    bytes_b64: str
+
+
+class ConvertResponse(BaseModel):
+    md_content: str
+    md_content_hash: str
+    engine: str
+    engine_version: str
+    elapsed_ms: int
+    raw_metrics: dict
+    images: list[ConvertImage] = Field(default_factory=list)
+    images_truncated: bool = False
+
+
+@app.get("/health")
+def health():
+    return {"status": "ok", "service": "mineru-service"}
+
+
+@app.get("/ready")
+async def ready(response: Response):
+    """marker /ready 의미 복제: warmup_failed 만 503, idle/warming=200(depends_on 굳음 방지)."""
+    if _warmup_error:
+        response.status_code = 503
+        return {"status": "warmup_failed", "engine": "mineru",
+                "engine_version": _engine_version, "error": _warmup_error}
+    if not _warmup_done:
+        return {"status": "warming_up" if _PRELOAD else "idle", "engine": "mineru",
+                "engine_version": _engine_version, "models_loaded": False}
+    return {"status": "ready", "engine": "mineru",
+            "engine_version": _engine_version, "models_loaded": True}
+
+
+def _resolve_path(file_path: str) -> Path | None:
+    """NFC(DB) vs NFD(NFS) 한글 경로 정규화 차이 흡수. ocr/server.py 와 동일 패턴
+    (필수 — 한글명 파일은 NFS=NFD 저장이라 DB 의 NFC 경로로는 is_file=False)."""
+    for c in (file_path,
+              unicodedata.normalize("NFD", file_path),
+              unicodedata.normalize("NFC", file_path)):
+        p = Path(c)
+        if p.exists():
+            return p
+    parent = Path(file_path).parent
+    if parent.exists():
+        target = unicodedata.normalize("NFC", Path(file_path).name)
+        for child in parent.iterdir():
+            if unicodedata.normalize("NFC", child.name) == target:
+                return child
+    return None
+
+
+def _slice_pdf(src_path: Path, start_page: int | None, end_page: int | None,
+               max_pages: int | None) -> tuple[bytes, int]:
+    """요청 page 범위(1-based inclusive)만 담은 새 PDF bytes + 변환 페이지수 반환."""
+    with fitz.open(src_path) as src:
+        n = src.page_count
+        if start_page is not None and end_page is not None:
+            a = max(0, start_page - 1)
+            b = min(n - 1, end_page - 1)
+        else:
+            a = 0
+            cap = max_pages if max_pages is not None else MAX_PAGES_HARD
+            b = min(n - 1, cap - 1)
+        if b < a:
+            raise HTTPException(422, detail={"code": "bad_page_range",
+                                             "message": f"a={a} b={b} n={n}"})
+        out = fitz.open()
+        out.insert_pdf(src, from_page=a, to_page=b)
+        pdf_bytes = out.tobytes()
+        out.close()
+        return pdf_bytes, (b - a + 1)
+
+
+def _serialize_images(images: list[dict], src_path: str) -> tuple[list[ConvertImage], bool]:
+    """이미지 dict 리스트 → base64 ConvertImage 리스트 (marker 가드 동일)."""
+    truncated = len(images) > MAX_IMAGES_PER_DOC
+    if truncated:
+        logger.warning(f"[mineru-service] images truncated path={src_path} "
+                       f"total={len(images)} cap={MAX_IMAGES_PER_DOC}")
+        images = images[:MAX_IMAGES_PER_DOC]
+    out: list[ConvertImage] = []
+    for img in images:
+        raw = img["raw_bytes"]
+        if len(raw) > MAX_BYTES_PER_IMAGE:
+            logger.warning(f"[mineru-service] image too large skipped path={src_path} "
+                           f"slug={img['slug']} bytes={len(raw)} cap={MAX_BYTES_PER_IMAGE}")
+            continue
+        out.append(ConvertImage(
+            slug=img["slug"], format=img["format"],
+            width=img.get("width"), height=img.get("height"),
+            bytes_b64=base64.b64encode(raw).decode("ascii"),
+        ))
+    return out, truncated
+
+
+@app.post("/convert", response_model=ConvertResponse)
+async def convert(req: ConvertRequest):
+    p = _resolve_path(req.file_path)
+    if p is None or not p.is_file():
+        raise HTTPException(404, detail={"code": "file_not_found", "message": req.file_path})
+    if req.start_page is not None and req.end_page is not None:
+        if req.start_page < 1 or req.end_page < req.start_page:
+            raise HTTPException(422, detail={"code": "bad_page_range",
+                "message": f"start_page={req.start_page} end_page={req.end_page}"})
+
+    pdf_bytes, page_count = _slice_pdf(p, req.start_page, req.end_page, req.max_pages)
+
+    await _ensure_warmup()                  # 엔진 로드 보장(내부에서 _engine_lock 잡았다 놓음)
+    async with _engine_lock:                # 실제 변환 직렬화(단일 GPU)
+        start = time.monotonic()
+        try:
+            md_text, raw_images = await _run_mineru(pdf_bytes, MINERU_LANG)
+        except HTTPException:
+            raise
+        except Exception as exc:
+            logger.exception(f"[mineru-service] conversion failed path={p}: {exc}")
+            raise HTTPException(422, detail={"code": "conversion_failed",
+                "message": f"{type(exc).__name__}: {exc}"}) from exc
+        elapsed_ms = int((time.monotonic() - start) * 1000)
+
+    images_payload, truncated = _serialize_images(raw_images, str(p))
+
+    return ConvertResponse(
+        md_content=md_text,
+        md_content_hash=hashlib.sha256(md_text.encode("utf-8")).hexdigest(),
+        engine="mineru",
+        engine_version=_engine_version,
+        elapsed_ms=elapsed_ms,
+        raw_metrics={
+            "page_count": page_count,
+            "image_count_extracted": len(raw_images),
+            "image_count_returned": len(images_payload),
+        },
+        images=images_payload,
+        images_truncated=truncated,
+    )
@@ -0,0 +1,107 @@
+"""ASME 절(clause) 타이핑 + 라벨 정제 단위테스트 (A-1 / C-4, presegment-multigranularity).
+
+핵심 불변식:
+  - (A-1) ATX heading 의 제목이 ASME 절 식별자(UG-79·PG-27.4.1·UW-11·A-69 …)면 node_type='clause'.
+    builder 가 과거 ATX 를 무조건 node_type=None 으로 반환해 ASME 절이 'clause' 로 안 잡히던 것을 고침.
+  - (C-4) marker LaTeX/markdown/페이지번호 아티팩트('$\\textbf{PG-20.1 …}', '(25) **A-69**')를 절번호
+    매칭 전에 정제 — 정제 없으면 패턴이 노이즈에 막혀 매칭 0.
+  - (A-2) 큰 절(>LEAF_HARD_MAX)은 기존 window-split 로직으로 자동 'clause_split' 이 됨
+    (char_start 보존 = 단일 점프 타깃). 추가 코드 없이 타이핑만으로 확보.
+  - (무회귀) 한국 법령 제N조(_KO_JO 경로)·일반 ATX 헤딩은 영향 없음(정제 inert, 타이핑 None 유지).
+
+pytest + 단독 실행 양쪽 지원:
+  PYTHONPATH=. python3 tests/hier_decomp/test_asme_clause.py
+"""
+from __future__ import annotations
+
+try:  # pytest 경로 (앱 패키지)
+    from app.services.hier_decomp.builder import _detect_heading, _clean_label, build_hier_tree
+except Exception:  # 단독 실행 (앱 deps 없이 builder.py 직접 로드 — stdlib only)
+    import importlib.util
+    import pathlib
+    import sys
+
+    _bp = pathlib.Path(__file__).resolve().parents[2] / "app/services/hier_decomp/builder.py"
+    _spec = importlib.util.spec_from_file_location("_hier_builder_t", _bp)
+    _m = importlib.util.module_from_spec(_spec)
+    sys.modules[_spec.name] = _m  # dataclass __module__ 해소
+    _spec.loader.exec_module(_m)
+    _detect_heading, _clean_label, build_hier_tree = _m._detect_heading, _m._clean_label, _m.build_hier_tree
+
+
+# 5180/5210 실데이터에서 뽑은 noisy 라벨 (marker LaTeX/markdown/페이지번호 범벅).
+ASME_NOISY = [
+    (r"# $\textbf{PG-20.1 Carbon and Carbon-Molybdenum Tube and} \hspace{0.2cm} \textbf{(25)}$", "PG-20.1"),
+    ("# (25) **A-69**", "A-69"),
+    ("# (25) PFT-14 GENERAL", "PFT-14"),
+    ("## (25) PG-27.4.1", "PG-27.4.1"),
+    ("### UG-79 Forming of Pressure Parts", "UG-79"),
+    ("# UW-11 Radiographic Examination", "UW-11"),
+    ("#### UCS-56 Requirements for Postweld Heat Treatment", "UCS-56"),
+]
+
+
+def test_asme_clause_typed_and_cleaned():
+    for line, head in ASME_NOISY:
+        r = _detect_heading(line)
+        assert r is not None, f"미탐지: {line!r}"
+        _lvl, title, nt = r
+        assert nt == "clause", f"{line!r} → node_type={nt} (clause 여야)"
+        assert title.startswith(head), f"{line!r} → 정제 라벨 {title!r} 가 {head!r} 로 시작 안 함"
+        assert "\\textbf" not in title and "$" not in title and "**" not in title, f"라벨에 아티팩트 잔류: {title!r}"
+
+
+def test_clean_label_strips_artifacts():
+    assert _clean_label(r"$\textbf{PG-20.1 Foo} \hspace{0.2cm} \textbf{(25)}$").startswith("PG-20.1 Foo")
+    assert _clean_label("(25) **A-69**") == "A-69"
+    assert _clean_label("(25) PFT-14 GENERAL") == "PFT-14 GENERAL"
+
+
+def test_korean_jo_unaffected():
+    # 한국 법령 제N조 = _KO_JO 경로(ATX 아님) → clause 유지, _clean_label 미적용·inert.
+    r = _detect_heading("제3조(정의) 이 규칙에서 사용하는 용어의 뜻은")
+    assert r is not None and r[2] == "clause" and "제3조" in r[1], r
+    assert _clean_label("제3조(정의)") == "제3조(정의)"  # 노이즈 없음 → inert(무회귀)
+
+
+def test_plain_atx_not_clause():
+    # ASME 절 식별자가 아닌 일반 ATX 헤딩은 node_type None 유지 + 라벨 무변.
+    for line, want in [("# Introduction", "Introduction"), ("## Overview of Methods", "Overview of Methods")]:
+        r = _detect_heading(line)
+        assert r is not None and r[2] is None and r[1] == want, r
+
+
+def test_large_clause_becomes_clause_split():
+    # A-2: 큰 절(>5000자) → 기존 window-split 이 'clause' 를 'clause_split' 로(char_start 보존=점프 타깃) + window 자식.
+    big = "# UG-22 Loadings\n\n" + ("This is a body paragraph describing loadings in detail. " * 30 + "\n\n") * 8
+    nodes = build_hier_tree(big)
+    splits = [n for n in nodes if n.node_type == "clause_split"]
+    assert splits, f"clause_split 없음: {[n.node_type for n in nodes]}"
+    assert all(n.char_start is not None for n in splits), "clause_split char_start(점프 타깃) 유실"
+    assert any(n.node_type == "window" for n in nodes), "window 자식 없음"
+
+
+def test_typing_ratio_sample():
+    # V-1 스타일: 4 ASME 절 + 1 일반 → clause 4개만.
+    md = "\n\n".join(f"# {x}\n\nbody for {x} here.\n"
+                     for x in ["UG-1 Scope", "UG-79 Forming", "PG-5 Service", "Introduction", "UW-11 RT"])
+    clauses = [n for n in build_hier_tree(md) if n.node_type in ("clause", "clause_split")]
+    assert len(clauses) == 4, [n.section_title for n in clauses]
+
+
+if __name__ == "__main__":
+    import sys
+    import traceback
+
+    fns = [(k, v) for k, v in sorted(globals().items()) if k.startswith("test_") and callable(v)]
+    failed = 0
+    for name, fn in fns:
+        try:
+            fn()
+            print(f"  PASS {name}")
+        except Exception:
+            failed += 1
+            print(f"  FAIL {name}")
+            traceback.print_exc()
+    print(f"\n{len(fns) - failed}/{len(fns)} passed")
+    sys.exit(1 if failed else 0)
@@ -67,7 +67,9 @@ def test_atx_part_and_item_still_detected():
    assert lvl == 1 and nt is None, r  # ATX = level(# 수), node_type None
    assert title.startswith("PART PG")
    r2 = _detect_heading("#### PG-1 SCOPE")
-    assert r2 is not None and r2[0] == 4 and r2[2] is None, r2
+    # A-1(asme-clause): ASME 절 식별자(PG-1) 는 이제 node_type='clause' 로 타이핑된다(과거 None).
+    # ATX 탐지·level(# 수) 보존은 그대로 — 변경은 타이핑 한정.
+    assert r2 is not None and r2[0] == 4 and r2[2] == "clause", r2


 def test_build_hier_tree_drops_false_part_section():
@@ -37,7 +37,8 @@ from services.search.freshness_decay import (
 NOW = datetime(2026, 5, 3, 12, 0, 0, tzinfo=timezone.utc)


-def _meta(channel: str | None, *, days_ago: float | None = 30.0, origin: str | None = None) -> _DocMeta:
+def _meta(channel: str | None, *, days_ago: float | None = 30.0, origin: str | None = None,
+          material_type: str | None = None) -> _DocMeta:
    if days_ago is None:
        created = None
    elif days_ago < 0:
@@ -45,7 +46,8 @@ def _meta(channel: str | None, *, days_ago: float | None = 30.0, origin: str | N
        created = NOW + timedelta(days=-days_ago)
    else:
        created = NOW - timedelta(days=days_ago)
-    return _DocMeta(source_channel=channel, content_origin=origin, created_at=created)
+    return _DocMeta(source_channel=channel, content_origin=origin, created_at=created,
+                    material_type=material_type)


 # ─── policy dispatcher ────────────────────────────────────────────
@@ -55,8 +57,15 @@ def test_policy_news():
    assert freshness_policy(_meta("news")) == "news_90d"


-def test_policy_law_monitor():
-    assert freshness_policy(_meta("law_monitor")) == "law_365d"
+def test_policy_law_monitor_now_unaffected():
+    # C-1 후속: law_365d 폐기 → law_monitor 비적용 (현행성은 version_status 가 처리)
+    assert freshness_policy(_meta("law_monitor")) is None
+
+
+def test_policy_incident():
+    # C-1 후속: 재해사례/사망사고(material_type='incident') → news_90d 흡수 (source 무관)
+    assert freshness_policy(_meta("crawl", material_type="incident")) == "news_90d"
+    assert freshness_policy(_meta("inbox_route", material_type="incident")) == "news_90d"


 def test_policy_manual_unaffected():
@@ -123,8 +132,9 @@ def test_decay_at_half_life_news():
    assert compute_decay(90.0, "news_90d") == pytest.approx(0.5, rel=1e-6)


-def test_decay_at_half_life_law():
-    assert compute_decay(365.0, "law_365d") == pytest.approx(0.5, rel=1e-6)
+def test_decay_law_365d_removed_returns_one():
+    # C-1 후속: law_365d 폐기 → HALF_LIFE_DAYS 미등록 policy → decay 1.0 (no-op)
+    assert compute_decay(365.0, "law_365d") == 1.0


 def test_decay_age_zero_full():
@@ -212,22 +222,38 @@ async def test_apply_news_recent_vs_old_recent_higher():


@pytest.mark.asyncio
-async def test_apply_law_monitor_recent_vs_old_recent_higher():
-    # 가드 2: law_monitor recent 가 위
+async def test_apply_law_monitor_now_unaffected():
+    # C-1 후속: law_monitor freshness 폐기 → recent/old 동일 score (재정렬 없음)
    base = 0.50
    rows = [
        {"id": 1, "source_channel": "law_monitor", "content_origin": "extracted",
-         "created_at": NOW - timedelta(days=10)},
+         "material_type": "law", "created_at": NOW - timedelta(days=10)},
        {"id": 2, "source_channel": "law_monitor", "content_origin": "extracted",
-         "created_at": NOW - timedelta(days=730)},  # 2년
+         "material_type": "law", "created_at": NOW - timedelta(days=730)},
+    ]
+    session = _MockSession(rows)
+    results = [_result(1, base), _result(2, base)]
+    out = await apply_freshness_decay(results, session, now=NOW)
+    assert out[0].score == base and out[1].score == base
+    assert out[0].freshness_debug["freshness_policy"] is None
+
+
+@pytest.mark.asyncio
+async def test_apply_incident_recent_vs_old_recent_higher():
+    # C-1 후속: 재해사례(incident) recent 가 위 (news_90d 흡수, source_channel='crawl')
+    base = 0.50
+    rows = [
+        {"id": 1, "source_channel": "crawl", "content_origin": "extracted",
+         "material_type": "incident", "created_at": NOW - timedelta(days=5)},
+        {"id": 2, "source_channel": "crawl", "content_origin": "extracted",
+         "material_type": "incident", "created_at": NOW - timedelta(days=400)},
    ]
    session = _MockSession(rows)
    results = [_result(1, base), _result(2, base)]
    out = await apply_freshness_decay(results, session, now=NOW)
    assert out[0].id == 1
-    assert out[0].freshness_debug["freshness_policy"] == "law_365d"
-    # 2년 → law_365d 반감기 1년 → decay ~0.25 → multiplier ~ 0.775
-    assert out[1].score < out[0].score
+    assert out[0].score > out[1].score
+    assert out[0].freshness_debug["freshness_policy"] == "news_90d"


@pytest.mark.asyncio
@@ -0,0 +1,400 @@
+"""PR-G2-3 — presegment LLM 경계 폴백 단위 테스트.
+
+scaffold-first 안전성 박제:
+  (a) parse_json_response + SegmentationOutput 가 대표 fixture(ToC-less 120p → 3 segments) 검증
+  (b) 검증 게이트(_is_clear_bundle)가 정상 응답 수락 / 비정상(중첩·gap·tiny child·N>MAX) 거부
+  (c) flag OFF(기본) → LLM 절대 호출 안 함(call_deep count==0), flag ON → 호출됨(positive control)
+
+DB·PyMuPDF 불요(unit) — AsyncSession 은 최소 fake, fitz 는 sys.modules 주입 fake.
+라이브 LLM 호출 없음(call_deep 는 fixture 반환 monkeypatch). worker-process 레벨 E2E(실 PDF
+번들 분할, 보류 백오프 DB 기록)는 GPU 라이브 게이트에서 별도 실측.
+[[feedback_external_api_fixture_first]] / [[feedback_scaffold_first_for_external_cost_pr]]
+"""
+
+from __future__ import annotations
+
+import json
+import sys
+import types
+from pathlib import Path
+
+import pytest
+
+sys.path.insert(0, str(Path(__file__).parent.parent / "app"))
+
+from ai.client import parse_json_response  # noqa: E402
+import workers.presegment_worker as pw  # noqa: E402
+from workers.presegment_worker import (  # noqa: E402
+    SegmentationOutput,
+    _is_clear_bundle,
+    _segments_from_output,
+)
+
+# ─── 대표 fixture: ToC-less 120p 번들 → 3 segments (1-based inclusive, 전범위·무중첩) ───
+GOOD_LLM_JSON = json.dumps(
+    {
+        "is_bundle": True,
+        "segments": [
+            {"start_page": 1, "end_page": 40, "title": "문서 A"},
+            {"start_page": 41, "end_page": 85, "title": "문서 B"},
+            {"start_page": 86, "end_page": 120, "title": "문서 C"},
+        ],
+        "confidence": 0.82,
+    },
+    ensure_ascii=False,
+)
+
+PAGE_COUNT = 120
+
+
+# ─── (a) parse_json_response + SegmentationOutput 검증 ──────────────────────
+
+
+def test_parse_and_validate_good_fixture():
+    parsed = parse_json_response(GOOD_LLM_JSON)
+    assert parsed is not None
+    out = SegmentationOutput.model_validate(parsed)
+    assert out.is_bundle is True
+    assert len(out.segments) == 3
+    assert out.segments[0].start_page == 1
+    assert out.segments[-1].end_page == PAGE_COUNT
+    assert out.confidence == pytest.approx(0.82)
+
+
+def test_parse_tolerates_think_and_fence():
+    """house parse_json_response 가 <think> + ```json fence 를 벗겨낸다."""
+    wrapped = f"<think>분석중...</think>\n```json\n{GOOD_LLM_JSON}\n```"
+    parsed = parse_json_response(wrapped)
+    out = SegmentationOutput.model_validate(parsed)
+    assert out.is_bundle is True and len(out.segments) == 3
+
+
+# ─── (b) 검증 게이트 accept / reject ────────────────────────────────────────
+
+
+def _segments(*spans):
+    return [{"start_page": s, "end_page": e, "title": ""} for (s, e) in spans]
+
+
+def test_gate_accepts_good():
+    out = SegmentationOutput.model_validate(parse_json_response(GOOD_LLM_JSON))
+    segs = _segments_from_output(out)
+    clear, reason = _is_clear_bundle(segs, PAGE_COUNT)
+    assert clear is True, reason
+    assert reason == ""
+
+
+def test_gate_rejects_overlap():
+    # 41 이어야 할 두번째 start 가 40 으로 중첩
+    clear, reason = _is_clear_bundle(_segments((1, 40), (40, 85), (86, 120)), PAGE_COUNT)
+    assert clear is False
+    assert "non_contiguous" in reason
+
+
+def test_gate_rejects_gap():
+    # 40 다음이 42 로 시작 → 41 빈틈 (non_contiguous 로 검출)
+    clear, reason = _is_clear_bundle(_segments((1, 40), (42, 85), (86, 120)), PAGE_COUNT)
+    assert clear is False
+    assert "non_contiguous" in reason
+
+
+def test_gate_rejects_tiny_child():
+    # 두번째 자식 41..43 = 3p < MIN_CHILD_PAGES(5)
+    clear, reason = _is_clear_bundle(_segments((1, 40), (41, 43), (44, 120)), PAGE_COUNT)
+    assert clear is False
+    assert "child_too_small" in reason
+
+
+def test_gate_rejects_coverage_not_full():
+    # 마지막이 page_count 에 못 미침
+    clear, reason = _is_clear_bundle(_segments((1, 40), (41, 85), (86, 110)), PAGE_COUNT)
+    assert clear is False
+    assert "last_end_not_page_count" in reason
+
+
+def test_gate_rejects_too_many_children():
+    # N > MAX_CHILDREN — 각 자식 MIN_CHILD_PAGES 만족시키되 개수만 초과
+    n = pw.MAX_CHILDREN + 1
+    pc = n * pw.MIN_CHILD_PAGES
+    spans = [
+        (i * pw.MIN_CHILD_PAGES + 1, (i + 1) * pw.MIN_CHILD_PAGES) for i in range(n)
+    ]
+    clear, reason = _is_clear_bundle(_segments(*spans), pc)
+    assert clear is False
+    assert "too_many_children" in reason
+
+
+def test_gate_rejects_single_segment():
+    clear, reason = _is_clear_bundle(_segments((1, 120)), PAGE_COUNT)
+    assert clear is False
+    assert "too_few_level1_entries" in reason
+
+
+# ─── 공통 fake (DB / PyMuPDF) ──────────────────────────────────────────────
+
+
+class _FakeDoc:
+    """presegment 가 읽는 Document 필드만 가진 최소 stand-in."""
+
+    def __init__(self, doc_id=1):
+        self.id = doc_id
+        self.file_path = "PKM/bundle.pdf"
+        self.file_hash = "deadbeef"
+        self.file_format = "pdf"
+        self.file_size = 123
+        self.file_type = "document"
+        self.import_source = "upload"
+        self.original_filename = "bundle.pdf"
+        self.source_channel = None
+        self.category = None
+        self.data_origin = None
+        self.doc_purpose = None
+        self.material_type = None
+        self.jurisdiction = None
+        self.title = "번들"
+        self.presegment_role = None
+        self.bundle_page_start = None
+        self.bundle_page_end = None
+        self.extracted_at = None
+        self.extracted_text = None
+
+
+class _ScalarResult:
+    def __init__(self, rows):
+        self._rows = rows
+
+    def scalars(self):
+        return self
+
+    def all(self):
+        return list(self._rows)
+
+
+class _FakeSession:
+    """_create_children / process 가 쓰는 AsyncSession 표면만 구현.
+
+    execute() = 기존 자식 lineage 조회 → 빈 결과(첫 분할). add/flush 로 child.id 부여.
+    get() = document_id → 미리 등록한 doc, child_id → 생성된 child.
+    """
+
+    def __init__(self, doc):
+        self._docs = {doc.id: doc}
+        self.added = []
+        self.commits = 0
+        self.enqueued = []  # enqueue_stage monkeypatch 가 채움
+        self._next_id = 1000
+
+    async def get(self, _model, oid):
+        return self._docs.get(oid)
+
+    async def execute(self, _stmt):
+        # _create_children 의 기존 자식 조회 → 항상 빈(첫 분할). enqueue_stage 는 monkeypatch.
+        return _ScalarResult([])
+
+    def add(self, obj):
+        self.added.append(obj)
+        # child Document 에 id 부여 (flush 대용 — _FakeDoc/실 Document 모두 setattr 가능)
+        if getattr(obj, "id", None) is None and hasattr(obj, "presegment_role"):
+            self._next_id += 1
+            obj.id = self._next_id
+            self._docs[obj.id] = obj
+
+    async def flush(self):
+        for obj in self.added:
+            if getattr(obj, "id", None) is None and hasattr(obj, "presegment_role"):
+                self._next_id += 1
+                obj.id = self._next_id
+                self._docs[obj.id] = obj
+
+    async def commit(self):
+        self.commits += 1
+
+
+def _install_fake_fitz(monkeypatch, *, page_count=PAGE_COUNT, toc=None, first_lines=None):
+    """sys.modules['fitz'] 에 fake 주입 — worker 의 `import fitz` 가 이걸 받게 한다."""
+    toc = toc or []
+
+    class _FakePage:
+        def __init__(self, idx):
+            self._idx = idx
+
+        def get_text(self):
+            if first_lines and self._idx < len(first_lines):
+                return first_lines[self._idx]
+            return f"page {self._idx + 1} body text"
+
+    class _FakePdf:
+        def __init__(self):
+            self.page_count = page_count
+
+        def get_toc(self, simple=True):
+            return list(toc)
+
+        def __getitem__(self, idx):
+            return _FakePage(idx)
+
+        def __enter__(self):
+            return self
+
+        def __exit__(self, *exc):
+            return False
+
+    fake = types.ModuleType("fitz")
+    fake.open = lambda *_a, **_k: _FakePdf()
+    monkeypatch.setitem(sys.modules, "fitz", fake)
+    return fake
+
+
+class _SpyClient:
+    """AIClient stand-in — call_deep 호출 횟수 카운트 + 지정 응답 반환."""
+
+    calls = 0
+    response = GOOD_LLM_JSON
+
+    def __init__(self):
+        type(self).calls += 1  # 인스턴스화 자체는 비용 아님 — 호출 카운트는 call_deep 기준
+
+    async def call_deep(self, prompt, system=None):
+        type(self)._deep_calls += 1
+        return type(self).response
+
+    async def close(self):
+        pass
+
+
+@pytest.fixture(autouse=True)
+def _reset_spy():
+    _SpyClient.calls = 0
+    _SpyClient._deep_calls = 0
+    _SpyClient.response = GOOD_LLM_JSON
+    yield
+
+
+# ─── (b) _llm_boundary_fallback 수락/거부 (mocked LLM) ──────────────────────
+
+
+@pytest.mark.asyncio
+async def test_fallback_accepts_good_and_creates_children(monkeypatch):
+    """정상 LLM 응답 → 게이트 통과 → _create_children 가 3 자식 + parent 표식."""
+    _install_fake_fitz(monkeypatch)
+    monkeypatch.setattr(pw, "AIClient", _SpyClient)
+    # enqueue_stage 는 DB 의존 — no-op 으로 대체 (호출 인자만 기록)
+    enq = []
+
+    async def _fake_enqueue(session, doc_id, stage, **kw):
+        enq.append((doc_id, stage))
+        return True
+
+    monkeypatch.setattr(pw, "enqueue_stage", _fake_enqueue)
+
+    doc = _FakeDoc()
+    session = _FakeSession(doc)
+    ok = await pw._llm_boundary_fallback(doc, Path("/tmp/bundle.pdf"), PAGE_COUNT, session)
+
+    assert ok is True
+    assert _SpyClient._deep_calls == 1
+    # 자식 3개 생성 + parent 표식 + lineage 3 + commit
+    children = [o for o in session.added if getattr(o, "presegment_role", None) == "child"]
+    assert len(children) == 3
+    assert doc.presegment_role == "parent"
+    assert sum(1 for o in session.added if o.__class__.__name__ == "DocumentLineage") == 3
+    assert {s for (_id, s) in enq} == {"extract"}
+
+
+@pytest.mark.asyncio
+async def test_fallback_rejects_bad_segments(monkeypatch):
+    """LLM 이 중첩 경계 반환 → 게이트 거부 → False + 자식 0 (단일문서)."""
+    _install_fake_fitz(monkeypatch)
+    bad = json.dumps({
+        "is_bundle": True,
+        "segments": [
+            {"start_page": 1, "end_page": 40},
+            {"start_page": 40, "end_page": 85},   # 중첩
+            {"start_page": 86, "end_page": 120},
+        ],
+    })
+    _SpyClient.response = bad
+    monkeypatch.setattr(pw, "AIClient", _SpyClient)
+
+    async def _fake_enqueue(*a, **k):
+        return True
+
+    monkeypatch.setattr(pw, "enqueue_stage", _fake_enqueue)
+
+    doc = _FakeDoc()
+    session = _FakeSession(doc)
+    ok = await pw._llm_boundary_fallback(doc, Path("/tmp/b.pdf"), PAGE_COUNT, session)
+
+    assert ok is False
+    assert _SpyClient._deep_calls == 1
+    assert [o for o in session.added if getattr(o, "presegment_role", None) == "child"] == []
+    assert doc.presegment_role is None
+
+
+@pytest.mark.asyncio
+async def test_fallback_rejects_is_bundle_false(monkeypatch):
+    """is_bundle=false → 호출은 했으나 분할 안 함(False, 자식 0)."""
+    _install_fake_fitz(monkeypatch)
+    _SpyClient.response = json.dumps({"is_bundle": False, "segments": []})
+    monkeypatch.setattr(pw, "AIClient", _SpyClient)
+
+    async def _fake_enqueue(*a, **k):
+        return True
+
+    monkeypatch.setattr(pw, "enqueue_stage", _fake_enqueue)
+
+    doc = _FakeDoc()
+    session = _FakeSession(doc)
+    ok = await pw._llm_boundary_fallback(doc, Path("/tmp/b.pdf"), PAGE_COUNT, session)
+    assert ok is False
+    assert _SpyClient._deep_calls == 1
+    assert doc.presegment_role is None
+
+
+# ─── (c) flag gating — OFF=호출 0 (deployed default 무변), ON=호출됨 ───────────
+
+
+@pytest.mark.asyncio
+async def test_flag_off_never_calls_llm(monkeypatch):
+    """PRESEGMENT_LLM_FALLBACK=False(기본) → 큰 ToC-less PDF 도 LLM 미호출 = 오늘과 동일."""
+    monkeypatch.setattr(pw, "PRESEGMENT_LLM_FALLBACK", False)
+    _install_fake_fitz(monkeypatch, page_count=120, toc=[])  # 대형 + level-1 ToC 없음 = 애매
+    monkeypatch.setattr(pw, "AIClient", _SpyClient)
+    monkeypatch.setattr(pw, "_resolve_path", lambda raw: Path("/tmp/bundle.pdf"))
+
+    async def _fake_enqueue(*a, **k):
+        return True
+
+    monkeypatch.setattr(pw, "enqueue_stage", _fake_enqueue)
+
+    doc = _FakeDoc()
+    session = _FakeSession(doc)
+    await pw.process(doc.id, session)
+
+    assert _SpyClient._deep_calls == 0          # ★ LLM 절대 호출 안 됨
+    assert doc.presegment_role is None          # 단일문서 (분할 안 함)
+    assert session.commits == 0
+
+
+@pytest.mark.asyncio
+async def test_flag_on_calls_llm_and_splits(monkeypatch):
+    """positive control — flag ON 이면 같은 입력에 LLM 호출 + 게이트 통과 시 분할."""
+    monkeypatch.setattr(pw, "PRESEGMENT_LLM_FALLBACK", True)
+    _install_fake_fitz(monkeypatch, page_count=120, toc=[])
+    _SpyClient.response = GOOD_LLM_JSON
+    monkeypatch.setattr(pw, "AIClient", _SpyClient)
+    monkeypatch.setattr(pw, "_resolve_path", lambda raw: Path("/tmp/bundle.pdf"))
+
+    async def _fake_enqueue(*a, **k):
+        return True
+
+    monkeypatch.setattr(pw, "enqueue_stage", _fake_enqueue)
+
+    doc = _FakeDoc()
+    session = _FakeSession(doc)
+    await pw.process(doc.id, session)
+
+    assert _SpyClient._deep_calls == 1          # LLM 호출됨
+    assert doc.presegment_role == "parent"      # 분할 수행
+    children = [o for o in session.added if getattr(o, "presegment_role", None) == "child"]
+    assert len(children) == 3