feat(docpage): D3 절 구조 탐색기 — 슬림 인사이트 레일 + 절 트리 (frontend only)

문서 상세 /documents/[id] 재구성 (BE 무변경): - 우측 탭(정보/AI/관리) → 슬림 전역 인사이트 레일: 요약·심층·불일치를 탭 게이트 없이 상시 노출(details open, 모바일은 접기 가능), 정보/관리는 접이. → 가공 자료가 탭/온디맨드에 묻히던 IA 문제(G1) 해소. - SectionOutline 절 목차 레벨 기반 들여쓰기(평탄→트리 모양). - 모바일: 본문 메인 + 절목차/인사이트/정보/관리 접이 + 절 탭 본문 이동(기존 구조 활용). 관련 문서(See Also)는 v1 제외(자리만 유지). 심화 목업 = comparisons/2026-06-13-ds-docpage-d3-deepened.html. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
Merge feat/safety-library-a1 (C-1 후속 version_status+facets) into ds-board-merged
2026-06-13 15:18:15 +09:00 · 2026-06-13 15:08:24 +09:00 · 2026-06-13 15:07:57 +09:00 · 2026-06-13 14:53:34 +09:00 · 2026-06-13 14:52:04 +09:00 · 2026-06-13 14:34:42 +09:00
181 changed files with 15401 additions and 515 deletions
@@ -9,7 +9,23 @@
 }

 http://document.hyungi.net {
-    encode gzip
+    # 명시 Content-Type match — 기본 match 의 text/* 는 text/event-stream 까지 포함해
+    # SSE(/api/eid/chat)의 첫 ~512B 를 gzip 버퍼링함. SSE 제외, 기존 압축 대상은 보존.
+    # (응답 매처는 header <필드> <값> 한 쌍씩 — 여러 줄 = OR. 한 줄 다중 값은 파싱 에러)
+    encode {
+        gzip
+        match {
+            header Content-Type text/html*
+            header Content-Type text/css*
+            header Content-Type text/plain*
+            header Content-Type text/xml*
+            header Content-Type text/javascript*
+            header Content-Type application/json*
+            header Content-Type application/javascript*
+            header Content-Type application/xml*
+            header Content-Type image/svg+xml*
+        }
+    }

    # API + 문서 → FastAPI
    handle /api/* {
@@ -134,6 +134,49 @@ def _fix_json_string_escapes(s: str) -> str:
        i += 1
    return "".join(out)

+def is_deferrable_error(exc: Exception) -> bool:
+    """deep(맥북 M5 Max) 호출 실패가 '보류(StageDeferred)' 대상인지 분류 (ds-macbook-offload-1).
+
+    보류 = 맥북 일시 불가 신호:
+      - HTTP 503 (라우터 upstream_cold / editor_busy / warming — no-silent-fallback 계약)
+      - HTTP 502/504 (라우터가 upstream 연결 실패·생성 도중 절단을 502 로 변환 —
+        llm_router.py 실측 4곳. 맥북 sleep 절단이 라우터 경유 토폴로지에선 이걸로 표면화)
+      - httpx.TransportError 전계열 (ConnectError·ReadError·RemoteProtocolError +
+        ConnectTimeout·ReadTimeout 등) — 라우터 자체 불가 / DS↔라우터 구간 절단.
+    그 외(400/500, 파싱/검증 오류 등)는 보류가 아니라 호출자의 기존 실패 경로.
+    """
+    if isinstance(exc, httpx.HTTPStatusError):
+        return exc.response.status_code in (502, 503, 504)
+    return isinstance(exc, httpx.TransportError)
+
+
+async def call_deep_or_defer(
+    client: "AIClient",
+    prompt: str,
+    system: str | None = None,
+    cfg: "AIModelConfig | None" = None,
+) -> str:
+    """call_deep + 보류 변환 — 맥북 불가(503/연결/절단)는 StageDeferred 로 raise.
+
+    deep_summary_worker / summarize_worker(drain) / classify_worker(drain) 가 공유.
+    StageDeferred 는 queue_consumer/queue_drain 이 attempts 미소모 + deferred_until
+    백오프로 처리한다 (sleep-안전 불변식).
+
+    cfg: 지정 시 deep 슬롯 대신 이 config 로 호출 (classify drain — deep 슬롯의
+         endpoint 는 쓰되 triage 의 temperature/max_tokens 를 적용한 변형).
+    """
+    from models.queue import StageDeferred
+
+    try:
+        if cfg is not None:
+            return await client._request(cfg, prompt, system=system)
+        return await client.call_deep(prompt, system=system)
+    except Exception as exc:
+        if is_deferrable_error(exc):
+            raise StageDeferred(f"macbook_unavailable:{type(exc).__name__}") from exc
+        raise
+
+
 # 프롬프트 로딩
 PROMPTS_DIR = Path(__file__).parent.parent / "prompts"

@@ -185,22 +228,37 @@ class AIClient:
        """triage/primary 실패 시 최후 방어선. Claude Sonnet 4 API (config.yaml ai.models.fallback) — PR #20 이후 swap 완료."""
        return await self._request(self.ai.fallback, prompt)

+    async def call_deep(self, prompt: str, system: str | None = None) -> str:
+        """심층 전용 — 맥북 M5 Max Qwen3.6-27B (config.yaml ai.models.deep, ds-macbook-offload-1).
+
+        llm-router :8890 경유(model=qwen-macbook alias) — 라우터의 wake preflight(~24s)·
+        editor_busy 가드를 재사용한다. 맥미니 mlx gate 와 무관(게이트는 맥미니 보호 목적)이라
+        gate 없이 호출. 자동 cloud/맥미니 폴백 없음 — 실패는 그대로 전파하고 보류 판단은
+        호출자가 is_deferrable_error() 로 한다. 슬롯 부재 시 primary 로 처리(방어적 —
+        호출자가 보통 슬롯 유무를 먼저 분기).
+        """
+        cfg = self.ai.deep or self.ai.primary
+        return await self._request(cfg, prompt, system=system)
+
    # ─── Legacy API (classify_worker 교체 시 제거 예정) ───────────────────

-    async def classify(self, text: str) -> dict:
+    async def classify(self, text: str, cfg=None) -> dict:
        """[DEPRECATED] 기존 classify_worker 전용. B-1 에서 summary_triage 로 대체.

        호출부 정리 전 존속. 신규 코드는 call_triage + prompt_render 를 쓸 것.
+        cfg (2026-06-12 fair-share): 지정 시 primary 대신 해당 config 로 호출 —
+        drain classify 가 deep 슬롯(맥북) 경유에 사용. cfg != ai.primary 라
+        _call_chat 의 primary→fallback 자동 전환은 발동하지 않는다 (에러 raw 전파).
        """
        prompt = CLASSIFY_PROMPT.replace("{document_text}", text)
-        response = await self._call_chat(self.ai.primary, prompt)
+        response = await self._call_chat(cfg or self.ai.primary, prompt)
        return response

-    async def summarize(self, text: str, force_premium: bool = False) -> str:
-        """[DEPRECATED] 기존 호출부용. B-1 에서 summary_triage 가 tldr 대체."""
+    async def summarize(self, text: str, force_premium: bool = False, cfg=None) -> str:
+        """[DEPRECATED] 기존 호출부용. B-1 에서 summary_triage 가 tldr 대체. cfg = classify() 와 동일."""
        if force_premium:
            return await self._call_chat(self.ai.premium, f"다음 문서를 500자 이내로 요약해주세요:\n\n{text}")
-        return await self._call_chat(self.ai.primary, f"다음 문서를 500자 이내로 요약해주세요:\n\n{text}")
+        return await self._call_chat(cfg or self.ai.primary, f"다음 문서를 500자 이내로 요약해주세요:\n\n{text}")

    async def embed(self, text: str) -> list[float]:
        """벡터 임베딩 — GPU 서버 전용"""
@@ -244,7 +244,15 @@ async def regenerate(
    user: Annotated[User, Depends(require_admin)],
 ):
    """수동 트리거 — 백그라운드 태스크로 워커 실행 (admin 필요)."""
+    from core.config import settings
    from workers.digest_worker import run

+    # 홀드 중 silent no-op 방지 — 워커 게이트와 동일 조건을 표면에서 명시.
+    if "digest" in settings.pipeline_held_stages:
+        raise HTTPException(
+            status_code=409,
+            detail="global_digest 보류 중 (config.yaml pipeline.held_stages) — 해제 후 재시도",
+        )
+
    asyncio.create_task(run())
    return {"status": "started", "message": "global_digest 워커 백그라운드 실행 시작"}
@@ -210,8 +210,14 @@ class DocumentDetailResponse(DocumentResponse):


 class AcceptSuggestionRequest(BaseModel):
-    """§1 accept-suggestion 요청 body — stale payload / doc 수정 검출."""
+    """§1 accept-suggestion 요청 body — stale payload / doc 수정 검출.
+
+    jurisdiction: 안전 자료실 A-2 — material_type 제안 승인 시 사용자가 지정하는 관할.
+    law 승인은 필수 (기본값 없음 — KR 자동 부여 시 외국 자료가 KR 법령으로 오염되는
+    경로를 차단, plan A-2 계약).
+    """
    expected_source_updated_at: datetime
+    jurisdiction: str | None = None


 class DocumentUpdate(BaseModel):
@@ -537,6 +543,8 @@ async def list_documents(
    category: str | None = Query(None, description="doc_category enum — 지정 시 기본 news/memo 제외 해제"),
    has_suggestion: bool | None = Query(None, description="true: ai_suggestion IS NOT NULL"),
    proposed_category: str | None = Query(None, description="ai_suggestion.proposed_category 필터"),
+    material_type: str | None = Query(None, description="안전 자료실 C-1: 자료유형. 지정 시 기본 exclude 해제"),
+    jurisdiction: str | None = Query(None, description="안전 자료실 C-1: 관할 (KR/US/...)"),
 ):
    """문서 목록 조회 (페이지네이션 + 필터).

@@ -550,6 +558,10 @@ async def list_documents(
    if category:
        # 명시적 카테고리 필터 — 기본 exclude 해제
        query = query.where(Document.category == category)
+    elif material_type:
+        # 안전 자료실 C-1: material_type 지정 = 기본 exclude(news·law_monitor·note) 해제.
+        # 안전 코퍼스 본체(KOSHA 사례·CSB·법령 등)가 전부 note/crawl 채널이라 exclude 면 빈 화면.
+        query = query.where(Document.material_type == material_type)
    else:
        # 기본 목록: 뉴스/메모/법령 제외 (문서함 용도)
        query = query.where(
@@ -558,6 +570,9 @@ async def list_documents(
            Document.file_type != "note",
        )

+    if jurisdiction:
+        query = query.where(Document.jurisdiction == jurisdiction)
+
    if has_suggestion is True:
        query = query.where(Document.ai_suggestion.isnot(None))
    elif has_suggestion is False:
@@ -663,8 +678,9 @@ class SectionItem(BaseModel):
    section_title: str | None = None  # raw 마크다운 포함 — 정제는 프런트(headingPath.ts)
    heading_path: str | None = None   # raw
    level: int | None = None
-    node_type: str | None = None      # window | section_split | null
+    node_type: str | None = None      # window | chapter_split | clause_split | section_split | null
    is_leaf: bool
+    char_start: int | None = None     # md_content 내 heading offset(UTF-16). jump-target 만 값, 그 외 None (Path B)
    section_type: str | None = None
    summary: str | None = None        # status='summarized' 인 분석행에만, 그 외 None
    confidence: float | None = None
@@ -703,12 +719,12 @@ async def get_document_sections(
        await session.execute(
            sql_text(
                """
-                SELECT chunk_id, section_title, heading_path, level, node_type, is_leaf,
+                SELECT chunk_id, section_title, heading_path, level, node_type, is_leaf, char_start,
                       section_type, summary, confidence
                FROM (
                  SELECT DISTINCT ON (c.id)
                         c.id AS chunk_id, c.chunk_index, c.section_title, c.heading_path,
-                         c.level, c.node_type, c.is_leaf,
+                         c.level, c.node_type, c.is_leaf, c.char_start,
                         a.section_type,
                         CASE WHEN a.status = 'summarized' THEN a.summary ELSE NULL END AS summary,
                         a.confidence
@@ -717,7 +733,7 @@ async def get_document_sections(
                         ON a.chunk_id = c.id AND a.status = 'summarized'
                  WHERE c.doc_id = :doc_id
                    AND c.source_type = 'hier_section'
-                    AND c.is_leaf = true
+                    AND (c.is_leaf = true OR c.node_type LIKE '%\\_split' ESCAPE '\\')
                  ORDER BY c.id, a.created_at DESC, a.id DESC
                ) t
                ORDER BY t.chunk_index
@@ -1243,11 +1259,49 @@ async def accept_suggestion(
    # payload 적용
    proposed_category = doc.ai_suggestion.get("proposed_category")
    proposed_path = doc.ai_suggestion.get("proposed_path")
+    # 안전 자료실 A-2 — material_type 제안 (classify 의 document_type 결정적 매핑)
+    proposed_material = doc.ai_suggestion.get("proposed_material_type")

-    if not proposed_category:
-        raise HTTPException(status_code=422, detail="proposed_category 누락된 suggestion")
+    if not proposed_category and not proposed_material:
+        raise HTTPException(
+            status_code=422,
+            detail="proposed_category/proposed_material_type 둘 다 누락된 suggestion",
+        )

-    doc.category = proposed_category
+    if proposed_category:
+        doc.category = proposed_category
+
+    if proposed_material:
+        _MATERIAL_TYPES = {"law", "paper", "book", "incident", "manual", "standard", "guide"}
+        _JURISDICTIONS = {"KR", "US", "EU", "JP", "GB", "INT"}
+        if proposed_material not in _MATERIAL_TYPES:
+            raise HTTPException(
+                status_code=422, detail=f"허용 밖 material_type: {proposed_material}"
+            )
+        jur = body.jurisdiction or doc.ai_suggestion.get("proposed_jurisdiction")
+        if jur is not None and jur not in _JURISDICTIONS:
+            raise HTTPException(status_code=422, detail=f"허용 밖 jurisdiction: {jur}")
+        # law = 국가 필수 입력, 기본값 없음 (plan A-2 — KR 자동 부여 시 외국 법령 오염.
+        # DB CHECK(chk_documents_law_jurisdiction) 도 거부하지만 422 로 명시 안내).
+        if proposed_material == "law" and not jur:
+            raise HTTPException(
+                status_code=422,
+                detail="법령(law) 승인은 jurisdiction 필수 — body.jurisdiction 으로 국가를 지정하세요 (기본값 없음)",
+            )
+        doc.material_type = proposed_material
+        doc.jurisdiction = jur
+        # 미러 동기화 1문 — jurisdiction 부여/정정 시 청크 country 동반 UPDATE
+        # (leg 간 국가 불일치 방지, plan A-2 계약. 단일 지점 = 본 승인 경로).
+        if jur:
+            from sqlalchemy import update as sa_update
+
+            from models.chunk import DocumentChunk
+
+            await session.execute(
+                sa_update(DocumentChunk)
+                .where(DocumentChunk.doc_id == doc.id)
+                .values(country=jur)
+            )

    # user_tags append (중복 방지, normalize + dedup 통과)
    if proposed_path:
@@ -0,0 +1,322 @@
+"""이드 채팅 표면 — POST /api/eid/chat (eid-chat 트랙).
+
+확정 결정:
+  - D-1 경로 = /api/eid/chat (main.py prefix=/api/eid + 본 라우터 POST /chat)
+  - D-2 mode 닫힌 어휘: daily / deep — 둘 다 mac-mini-default (맥북 백지화 2026-06-11,
+    맥미니 Qwen 27B 단일 호스트. deep = ReAct 자동검색 모드 구분). 클라는 mode 만 보냄 —
+    claude-cloud / auto 금지 (Literal 로 422 차단). 게이트 = alias 기준 자동 적용(무게이트 폐지).
+  - D-3 독립 /chat 라우트 (frontend) — 본 모듈은 백엔드 API 만.
+  - D-5 LLM 호출 = EidAIClient.call_stream 한 곳 (이드 egress 봉쇄 불변식 #5,
+    RouterBackend 직접 호출 금지).
+  - D-6 rules.md 부재 = 503 substrate_degraded fail-closed — 다른 표면의 degraded 배너
+    컨벤션(compose._rules)과 달리 채팅은 진행 자체를 거부.
+
+응답 = router SSE 라인 단위 중계 (text/event-stream — call_stream 이 model 필드를 mode
+어휘로 치환·usage 제거, 프레이밍 보존. 본 모듈은 무변형 relay). 스트림 시작 전
+backend 실패는 /api/search/ask 와 동일 shape 의 503 + error_reason 매핑(자동 fallback 0).
+로그는 메타 1줄(mode·턴수·status)만 — 대화 본문 로깅 0.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+from collections.abc import AsyncIterator
+from typing import Annotated, Literal
+
+import httpx
+from fastapi import APIRouter, Depends
+from fastapi.responses import JSONResponse, StreamingResponse
+from pydantic import BaseModel, Field, field_validator, model_validator
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from core.auth import get_current_user
+from core.database import get_session
+from core.utils import setup_logger
+from eid import compose as eid_compose
+from eid.ai import EidAIClient
+from models.user import User
+from services.llm.backends import BackendUnavailable, _router_url, get_backend
+from services.search import llm_gate
+from services.search.react_loop import agentic_ask_loop
+
+logger = setup_logger("eid_chat")
+
+router = APIRouter()
+
+# ── ds-eid-ask-absorb P1: deep 모드 = ReAct 자동검색 (맥미니 Qwen 27B, 2026-06-11~) ──
+# 비생성 reachability probe — router 도달만 확인(coarse). 27B(맥북) 자체 미가용은
+# 첫 generate_with_tools 호출의 BackendUnavailable → mid-stream error envelope 로 커버
+# (plan: probe 정밀도 불필요, TOCTOU 는 in-stream error 가 처리). ~2s 타임아웃·생성 슬롯 비점유.
+_DEEP_PROBE_TIMEOUT = httpx.Timeout(connect=2.0, read=2.0, write=2.0, pool=2.0)
+# heartbeat: ReAct 다회 tool call 시 수십초 무출력 → 프록시 idle timeout 차단.
+# `{"phase":"ping"}` no-op 이벤트 (프론트 envelope 파서가 자연 스킵 — `: ping` comment 는
+# POST SSE fetch 파서가 처리 보장 안 됨).
+_HEARTBEAT_INTERVAL_S = 10.0
+
+
+async def _probe_router_reachable() -> bool:
+    """router(:8890) /v1/models GET — 도달 확인(비생성). 실패/비200 = 미가용."""
+    url = f"{_router_url().rstrip('/')}/v1/models"
+    try:
+        async with httpx.AsyncClient(timeout=_DEEP_PROBE_TIMEOUT) as client:
+            resp = await client.get(url)
+            return resp.status_code == 200
+    except Exception:
+        return False
+
+
+def _sse(obj: dict) -> bytes:
+    """SSE 이벤트 1건 — data: <json>\\n\\n. final_answer 는 OpenAI 호환 choices.delta.content
+    로, sources/phase 는 별 envelope 키로(프론트가 분기). model/usage 머신 메타 미포함."""
+    return b"data: " + json.dumps(obj, ensure_ascii=False).encode("utf-8") + b"\n\n"
+
+
+class ChatMessage(BaseModel):
+    """채팅 턴 1건. role=system 은 Literal 밖 → 422 (system 합본은 서버 compose 만 주입)."""
+
+    role: Literal["user", "assistant"]
+    content: str = Field(min_length=1, max_length=8000)
+
+
+# 대화 총량 cap (전 메시지 content 합) — per-message 8000·40턴 제한과 별도의 총량 상한
+_TOTAL_CONTENT_CAP = 32000
+
+
+class ChatRequest(BaseModel):
+    """POST /api/eid/chat body. mode 는 닫힌 어휘(D-2), messages 는 1~40턴 + 총량 32000자."""
+
+    mode: Literal["daily", "deep"]
+    messages: list[ChatMessage] = Field(min_length=1, max_length=40)
+
+    @field_validator("messages")
+    @classmethod
+    def _last_turn_is_user(cls, v: list[ChatMessage]) -> list[ChatMessage]:
+        if v and v[-1].role != "user":
+            raise ValueError("마지막 메시지는 role=user 여야 합니다")
+        return v
+
+    @model_validator(mode="after")
+    def _total_content_cap(self) -> "ChatRequest":
+        if sum(len(m.content) for m in self.messages) > _TOTAL_CONTENT_CAP:
+            raise ValueError(
+                "대화 총량 초과 — 새 대화로 시작하거나 입력을 줄여주세요 "
+                f"(전체 메시지 합 {_TOTAL_CONTENT_CAP}자 제한)"
+            )
+        return self
+
+
+@router.get("/status")
+async def eid_status(
+    user: Annotated[User, Depends(get_current_user)],
+):
+    """이드 backend 점유 상태 스냅샷 — GET /api/eid/status (UI 의 "대기 vs 고장" 구분용).
+
+    daily(맥미니 MLX) 의 DS 프로세스 내부 llm_gate 점유만 본다 — 외부 소비자
+    (맥미니 자체 derived-worker·Hermes 등)의 endpoint 점유는 미포착.
+    따라서 busy=true 는 확실(지금 줄이 있다), false 는 근사(외부 점유 가능성 잔존).
+
+    가벼움 보장: DB 0 / LLM 0 / 본문 로깅 0 — 폴링 대상으로 안전.
+    자동 fallback 판단 근거로 쓰지 않는다 (모드 전환 = 명시 버튼만, 정책).
+    """
+    snap = llm_gate.gate_status()
+    inflight = bool(snap["inflight"])
+    waiters = int(snap["waiters"])
+    return {
+        "daily": {
+            "busy": inflight or waiters > 0,
+            "inflight": inflight,
+            "waiters": waiters,
+        }
+    }
+
+
+def _backend_unavailable_response(body: ChatRequest, reason: str, backend_name: str) -> JSONResponse:
+    """스트림 시작 전 27B 미가용 → ask 컨벤션과 동일 shape 503 (자동 fallback 0)."""
+    logger.warning(
+        "eid_chat backend_unavailable mode=%s turns=%d status=503 reason=%s",
+        body.mode, len(body.messages), reason,
+    )
+    return JSONResponse(
+        status_code=503,
+        content={
+            "error": "backend_unavailable",
+            "error_reason": reason,
+            "backend_requested": backend_name,
+            "detail": (
+                "심층 엔진(검색)이 일시적으로 응답할 수 없습니다. "
+                "잠시 후 다시 시도하거나 일상 모드로 물어보세요."
+            ),
+        },
+    )
+
+
+async def _eid_chat_deep(body: ChatRequest, session: AsyncSession) -> StreamingResponse | JSONResponse:
+    """deep 모드 = ReAct 자동검색. ReAct(`tool_choice=auto`)가 검색 여부를 LLM 자율 판단 —
+    검색 불요 질문은 early-exit 으로 대화 답변. substrate(persona+rules+react_ask task)는
+    agentic_ask_loop 내부 compose("react_ask") 가 주입(evidence-first 자동 상속).
+
+    멀티턴 = 1단계는 마지막 user 메시지 단독 처리(agentic_ask_loop 가 query: str — history
+    미지원). 후속 질문 대명사 해소는 2단계 백로그.
+    """
+    # ① 첫 SSE 바이트(=HTTP 200 확정) 전 비생성 probe — router 도달 실패 시 503 (재매핑 가능 구간)
+    if not await _probe_router_reachable():
+        return _backend_unavailable_response(body, "router_unreachable", "mac-mini-default")
+
+    query = body.messages[-1].content  # 메시지 단독 처리 (마지막 user 턴)
+    backend = get_backend("mac-mini-default")
+
+    async def _stream() -> AsyncIterator[bytes]:
+        # ② phase:searching 방출 = HTTP 200 확정. 이후 미가용은 503 불가 → in-stream error.
+        yield _sse({"phase": "searching"})
+        task = asyncio.create_task(agentic_ask_loop(session, query, backend=backend))
+        try:
+            # heartbeat: task 미완 동안 ~10s 마다 ping (shield 로 wait_for 취소가 task 안 죽임)
+            while not task.done():
+                try:
+                    await asyncio.wait_for(asyncio.shield(task), timeout=_HEARTBEAT_INTERVAL_S)
+                except asyncio.TimeoutError:
+                    yield _sse({"phase": "ping"})
+            result = task.result()  # BackendUnavailable 은 여기서 raise (mid-stream)
+            # final_answer = OpenAI 호환 1청크(프론트 기존 content 누적 경로 재사용)
+            yield _sse({"choices": [{"delta": {"content": result.final_answer}}]})
+            # 근거 = 별 envelope (citation 번호 없음 — 프론트가 순서 기반). partial = 근거 부족 표식
+            yield _sse({"eid_sources": result.sources, "partial": result.partial})
+            yield b"data: [DONE]\n\n"
+            logger.info(
+                "eid_chat deep ok turns=%d sources=%d partial=%s iters=%d",
+                len(body.messages), len(result.sources), result.partial, result.iterations,
+            )
+        except BackendUnavailable as exc:
+            # mid-stream 미가용(검색 중 AC 분리·뚜껑 닫힘) — 200 이미 송신, in-stream error envelope.
+            # error 뒤 [DONE] = 프론트 sawDone 로 '중단' 오경보 방지(명시 error notice 유지).
+            logger.warning(
+                "eid_chat deep mid-stream unavailable turns=%d reason=%s",
+                len(body.messages), exc.reason,
+            )
+            yield _sse({"phase": "error", "error_reason": exc.reason})
+            yield b"data: [DONE]\n\n"
+        except asyncio.CancelledError:
+            raise  # 클라 disconnect — finally 가 task 정리
+        except Exception:
+            logger.exception("eid_chat deep stream failed turns=%d", len(body.messages))
+            yield _sse({"phase": "error", "error_reason": "deep_failed"})
+            yield b"data: [DONE]\n\n"
+        finally:
+            # 클라 disconnect 시 ReAct task 고아화 방지 — cancel + await(전파 완료 보장).
+            # 안 하면 27B 가 닫힌 연결 위해 수분 점유, router 동시성상 다음 검색 대기.
+            if not task.done():
+                task.cancel()
+            try:
+                await task
+            except (asyncio.CancelledError, Exception):
+                pass
+
+    return StreamingResponse(
+        _stream(),
+        media_type="text/event-stream",
+        headers={"Cache-Control": "no-store", "X-Accel-Buffering": "no"},
+    )
+
+
+@router.post("/chat")
+async def eid_chat(
+    body: ChatRequest,
+    user: Annotated[User, Depends(get_current_user)],
+    session: Annotated[AsyncSession, Depends(get_session)],
+):
+    """이드 채팅 — daily = router SSE pass-through(대화) / deep = ReAct 자동검색(근거).
+
+    503 경로 (모두 자동 fallback 없음):
+      - substrate_degraded: rules.md 부재 (D-6 fail-closed, 채팅 진행 거부)
+      - backend_unavailable: 스트림 시작 전 backend 실패 (daily/deep 공통, ask 컨벤션 shape)
+    """
+    # D-6: rules 부재 = fail-closed. 채팅은 안전·정책 가드 없이 진행하지 않는다(배너 X).
+    if not eid_compose.rules_present():
+        logger.error(
+            "eid_chat substrate_degraded mode=%s turns=%d status=503 — rules.md 부재, 채팅 거부",
+            body.mode, len(body.messages),
+        )
+        return JSONResponse(
+            status_code=503,
+            content={
+                "detail": (
+                    "이드 substrate 가 degraded 상태입니다 (운영 규칙 rules.md 부재). "
+                    "복구 전까지 채팅을 진행하지 않습니다."
+                ),
+                "error_reason": "substrate_degraded",
+            },
+        )
+
+    # deep = ReAct 자동검색 (별 흐름 — probe + 동기 ReAct → SSE 변환)
+    if body.mode == "deep":
+        return await _eid_chat_deep(body, session)
+
+    # daily = 순수 대화 SSE pass-through (기존)
+    system = eid_compose.compose("eid_chat", task="")
+    client = EidAIClient()
+    stream = client.call_stream(
+        body.mode, [m.model_dump() for m in body.messages], system,
+    )
+
+    # async generator 는 첫 __anext__ 에서야 실제 요청 전송 — 스트림 시작 전 실패(연결/4xx/5xx)
+    # 를 503 으로 매핑하기 위해 첫 chunk 를 여기서 먼저 당긴다.
+    try:
+        first = await anext(stream, None)
+    except BackendUnavailable as exc:
+        logger.warning(
+            "eid_chat backend_unavailable mode=%s turns=%d status=503 reason=%s",
+            body.mode, len(body.messages), exc.reason,
+        )
+        await client.close()
+        return JSONResponse(
+            status_code=503,
+            content={
+                "error": "backend_unavailable",
+                "error_reason": exc.reason,
+                "backend_requested": exc.backend_name,
+                "detail": (
+                    "선택한 모드의 backend 가 일시적으로 응답할 수 없습니다. "
+                    "잠시 후 다시 시도하거나 mode 를 바꿔 호출하세요."
+                ),
+            },
+        )
+    except BaseException:
+        await client.close()
+        raise
+
+    # 메타 로그 1줄 — 본문 로깅 0 (대화 내용은 어디에도 남기지 않는다)
+    logger.info(
+        "eid_chat stream mode=%s turns=%d status=200", body.mode, len(body.messages)
+    )
+
+    async def _passthrough():
+        # call_stream 방출분 무변형 relay (정화는 call_stream 라인 단위 한 곳). 취소·
+        # disconnect 포함 finally 에서 generator aclose → AsyncExitStack 이 upstream 정리.
+        try:
+            try:
+                if first is not None:
+                    yield first
+                async for chunk in stream:
+                    yield chunk
+            except (BackendUnavailable, httpx.HTTPError) as exc:
+                # 스트림 시작 후 절단 — status 200 은 이미 송신돼 재매핑 불가. 메타 로그
+                # 1줄만 남기고 조용히 종료(traceback 전파 0) — 프론트는 [DONE] 부재로 처리.
+                logger.warning(
+                    "eid_chat stream aborted mode=%s turns=%d reason=%s",
+                    body.mode, len(body.messages),
+                    getattr(exc, "reason", type(exc).__name__),
+                )
+                return
+        finally:
+            # stream.aclose() 가 예외여도 client.close() 는 보장 (중첩 finally)
+            try:
+                await stream.aclose()
+            finally:
+                await client.close()
+
+    return StreamingResponse(
+        _passthrough(),
+        media_type="text/event-stream",
+        headers={"Cache-Control": "no-store", "X-Accel-Buffering": "no"},
+    )
@@ -0,0 +1,192 @@
+"""처리 머신 보드 API — /api/queue/* (plan ds-processing-ui-6an → ds-board-engines-1).
+
+- GET /overview: 홈 stage 평면 테이블을 "머신 관점 보드(누가 일하나)"로 — 집계
+  로직은 services/queue_overview.py (순수 판정부 분리). 응답 스키마는 FE 와
+  계약 고정. 응답에 raw 모델명 노출 금지 — 머신 label 만 (엔진/모델 표기는
+  FE 정적 맵 책임).
+- GET /failed + POST /retry|/skip: 실패 처리 (ds-board-engines-1) — 영구 실패
+  (자동 재시도 3회 소진)의 유일한 사용자 조치 경로. 일괄 조치는 FE 가 그룹의
+  id 목록을 모아 보낸다 (서버측 패턴 매칭 없음 — raw 식별자/패턴 미수신).
+"""
+
+from datetime import datetime
+from typing import Annotated, Literal
+
+from fastapi import APIRouter, Depends
+from pydantic import BaseModel, Field
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from core.auth import get_current_user
+from core.database import get_session
+from models.user import User
+from services.queue_overview import (
+    build_overview,
+    fetch_failed_items,
+    retry_failed,
+    skip_failed,
+)
+
+router = APIRouter()
+
+
+class CurrentItem(BaseModel):
+    """머신이 지금 처리 중인 문서 (최대 2건)."""
+    document_id: int
+    title: str
+    stage: str
+
+
+class MachineCard(BaseModel):
+    """머신 카드 — stage 귀속 합산 + 완료 실적(summarize 는 풀 분리) + state."""
+    key: Literal["gpu", "macmini", "macbook"]
+    label: str
+    state: Literal["active", "deferred", "idle"]
+    stages: list[str]
+    pending: int
+    processing: int
+    failed: int
+    done_1h: int
+    done_today: int
+    deferred_pending: int
+    current: list[CurrentItem]
+
+
+class SummarizeEta(BaseModel):
+    """summarize 풀 ETA — done > inflow 일 때만 eta_minutes 산출."""
+    pending: int
+    done_rate_1h: int
+    inflow_rate_1h: int
+    eta_minutes: int | None
+
+
+class MachineDone(BaseModel):
+    """머신 1대의 summarize 완료 실적 (분담 표시용)."""
+    done_1h: int
+    done_today: int
+
+
+class SummarizeByMachine(BaseModel):
+    """summarize 풀의 머신별 완료 실적 분담 — 보드 레인의 '맥미니 vs 맥북'
+    오프로드 가시화용. rows_to_summarize_split 이 이미 계산하던 값의 노출
+    (ds-board-merged A-1, 신규 수집 SQL 0)."""
+    macmini: MachineDone
+    macbook: MachineDone
+
+
+class TrendBucket(BaseModel):
+    """summarize 24h 추이 버킷 — hour 는 KST "HH:00" 라벨."""
+    hour: str
+    inflow: int
+    done: int
+
+
+class Totals(BaseModel):
+    """전 stage 합계."""
+    pending: int
+    processing: int
+    failed: int
+
+
+class StageRow(BaseModel):
+    """단계별 현황 행 — 흐름 노드/상세 패널용.
+
+    done_1h/created_1h = 처리율·유입률 (유입 우세 판정 + ETA 의 FE 재료,
+    ds-board-engines-1 추가 — 수집 SQL 에 이미 있던 값의 노출).
+    """
+    stage: str
+    pending: int
+    processing: int
+    failed: int
+    done_1h: int
+    created_1h: int
+    done_today: int
+    oldest_pending_age_sec: int | None
+
+
+class QueueOverviewResponse(BaseModel):
+    machines: list[MachineCard]
+    stages: list[StageRow]
+    summarize_eta: SummarizeEta
+    summarize_by_machine: SummarizeByMachine
+    trend_24h: list[TrendBucket]
+    totals: Totals
+
+
+class FailedItem(BaseModel):
+    """영구 실패 행 — 실패 드로어 표시 단위."""
+    id: int
+    stage: str
+    document_id: int
+    title: str
+    attempts: int
+    max_attempts: int
+    error_message: str | None
+    failed_at: datetime | None
+
+
+class FailedListResponse(BaseModel):
+    items: list[FailedItem]
+    total: int
+
+
+class QueueActionRequest(BaseModel):
+    """재시도/건너뛰기 대상 — 실패 행 id 목록 (FE 가 그룹핑 후 전달)."""
+    ids: list[int] = Field(min_length=1, max_length=300)
+
+
+class RetryResponse(BaseModel):
+    requested: int
+    retried: int
+    not_retried: int
+
+
+class SkipResponse(BaseModel):
+    requested: int
+    skipped: int
+    not_skipped: int
+
+
+@router.get("/overview", response_model=QueueOverviewResponse)
+async def get_queue_overview(
+    user: Annotated[User, Depends(get_current_user)],
+    session: Annotated[AsyncSession, Depends(get_session)],
+):
+    """머신 관점 처리 보드 + summarize ETA 집계 (라이브 계산, 신규 테이블 0)"""
+    return QueueOverviewResponse.model_validate(await build_overview(session))
+
+
+@router.get("/failed", response_model=FailedListResponse)
+async def get_failed_items(
+    user: Annotated[User, Depends(get_current_user)],
+    session: Annotated[AsyncSession, Depends(get_session)],
+):
+    """영구 실패 행 목록 (문서 제목 포함, 최대 300건)"""
+    items = await fetch_failed_items(session)
+    return FailedListResponse(
+        items=[FailedItem.model_validate(i) for i in items],
+        total=len(items),
+    )
+
+
+@router.post("/retry", response_model=RetryResponse)
+async def retry_failed_items(
+    body: QueueActionRequest,
+    user: Annotated[User, Depends(get_current_user)],
+    session: Annotated[AsyncSession, Depends(get_session)],
+):
+    """실패 행 재시도 — attempts 리셋 + pending 복귀.
+
+    not_retried = 같은 (문서, 단계) 의 active 행 충돌(uq_queue_active) 또는
+    이미 failed 가 아닌 행 (중복 클릭 등) — 건드리지 않고 건수만 보고.
+    """
+    return RetryResponse.model_validate(await retry_failed(session, body.ids))
+
+
+@router.post("/skip", response_model=SkipResponse)
+async def skip_failed_items(
+    body: QueueActionRequest,
+    user: Annotated[User, Depends(get_current_user)],
+    session: Annotated[AsyncSession, Depends(get_session)],
+):
+    """실패 행 건너뛰기 — completed 마킹(payload.skipped_by_user) + 연쇄 없음"""
+    return SkipResponse.model_validate(await skip_failed(session, body.ids))
@@ -12,6 +12,7 @@
 import asyncio
 import hmac
 import time
+from datetime import date
 from typing import Annotated, Literal

 from fastapi import APIRouter, BackgroundTasks, Depends, Header, Query
@@ -31,6 +32,8 @@ from services.search.fusion_service import DEFAULT_FUSION
 from services.search.grounding_check import check as grounding_check
 from services.search.refusal_gate import RefusalDecision, decide as refusal_decide
 from services.search import query_rewriter
+from services.search.retrieval_service import AxisFilter
+from services.search.result_decorate import compute_facets, decorate_version_status
 from services.search.search_pipeline import PipelineResult, run_search
 from services.search.synthesis_service import SynthesisResult, synthesize
 from services.search.verifier_service import VerifierResult, verify
@@ -70,6 +73,14 @@ class SearchResult(BaseModel):
    # PR-RAG-Time-1: freshness decay 디버그 메타. apply_freshness_decay 가 채움.
    # 비적용 row 도 채워짐(freshness_policy=None). base_score 는 항상 보존.
    freshness_debug: dict | None = None
+    # 안전 자료실 C-1: 분류 축 메타 (3 leg SELECT 에서 채움 — additive, ranking 무관).
+    # D-1 UI 결과 카드 유형별 렌더 + 해외 법령(B-5) 가동 시 국가 무표지 혼재 차단의 선행 조건.
+    material_type: str | None = None
+    jurisdiction: str | None = None
+    published_date: date | None = None
+    # 안전 자료실 C-1 후속: 법령 버전 상태(legal_meta.version_status) — wrapper 1회 decorate.
+    # law 결과만 채워짐(legal_meta 위성), 그 외/무매핑 law = None. D-1 버전 뱃지 선행.
+    version_status: str | None = None


 # ─── Phase 0.4: 디버그 응답 스키마 ─────────────────────────
@@ -101,6 +112,9 @@ class SearchResponse(BaseModel):
    query: str
    mode: str
    debug: SearchDebug | None = None
+    # 안전 자료실 C-1 후속: facets=true 일 때만 채워짐(미요청=None, byte 불변).
+    # top-K 결과 내 분류 축 분포 라벨 {axis: {label: count}}.
+    facets: dict[str, dict[str, int]] | None = None


 def _to_debug_candidates(rows: list[SearchResult], n: int = 20) -> list[DebugCandidate]:
@@ -205,9 +219,23 @@ async def search(
            "분리용. production 검색에는 사용 금지 (latency 큼)."
        ),
    ),
+    material_type: str | None = Query(
+        None, description="안전 자료실 C-1: 자료유형 필터 CSV (law,paper,incident,...). material_type = ANY"),
+    jurisdiction: str | None = Query(
+        None, description="안전 자료실 C-1: 관할 필터 (KR/US/EU/JP/GB/INT)"),
+    year_from: int | None = Query(None, ge=1900, le=2100, description="published_date 연도 하한 (NULL=created_at fallback)"),
+    year_to: int | None = Query(None, ge=1900, le=2100, description="published_date 연도 상한"),
+    facets: bool = Query(False, description="안전 자료실 C-1 후속: top-K 결과 분류 축 분포(material_type/jurisdiction/version_status)를 응답 facets 에 집계. 미지정=계산/노출 0"),
 ):
    """문서 검색 — FTS + ILIKE + 벡터 결합 (Phase 3.1 이후 run_search wrapper)"""
    try:
+        axis = AxisFilter(
+            material_types=[m.strip() for m in material_type.split(",") if m.strip()]
+            if material_type else None,
+            jurisdiction=jurisdiction,
+            year_from=year_from,
+            year_to=year_to,
+        )
        pr = await run_search(
            session,
            q,
@@ -223,6 +251,7 @@ async def search(
            rewrite_backend=rewrite_backend,
            corpus_variant=corpus_variant,
            exact_knn=exact_knn,
+            axis=axis,
        )
    except ValueError as e:
        # _resolve_backend / _resolve_reranker / _resolve_rewrite_backend / _resolve_corpus_variant unknown slug → HTTP 400
@@ -313,12 +342,17 @@ async def search(

    debug_obj = _build_search_debug(pr) if debug else None

+    # 안전 자료실 C-1 후속 — wrapper decoration (검색 코어 무접촉, ranking 무관)
+    await decorate_version_status(session, pr.results)   # 법령 결과에 version_status
+    facets_obj = compute_facets(pr.results) if facets else None
+
    return SearchResponse(
        results=pr.results,
        total=len(pr.results),
        query=q,
        mode=pr.mode,
        debug=debug_obj,
+        facets=facets_obj,
    )


@@ -98,6 +98,10 @@ class AIConfig(BaseModel):
    classifier: AIModelConfig | None = None
    # Phase 3.5b: semantic verifier (optional — 없으면 grounding-only). PR #20 이후 Mac mini 26B MLX endpoint (initial = exaone3.5).
    verifier: AIModelConfig | None = None
+    # ds-macbook-offload-1: 심층 전용 슬롯 (optional). 맥북 M5 Max Qwen3.6-27B — llm-router :8890
+    # 경유(model=qwen-macbook alias, wake preflight 재사용). 부재 시 deep_summary 는 기존
+    # primary(맥미니 26B) 경로 그대로 = 기능 미활성. 명시 opt-in — silent fallback 없음.
+    deep: AIModelConfig | None = None
    # Legacy: vision 슬롯 (현재 사용처 0 — Document Server 는 OCR/STT 별도 서비스).
    # 제거 진행 중이므로 optional 로 관대한 로딩 유지.
    vision: AIModelConfig | None = None
@@ -154,6 +158,17 @@ class Settings(BaseModel):
    # 업로드 한도 (authoritative policy)
    upload: UploadConfig = UploadConfig()

+    # 생성 LLM 홀드 (2026-06-11): config.yaml pipeline.held_stages 에 든 이름의
+    # 컨슈머/워커는 claim 자체를 하지 않는다 (attempts 미소모, pending 적체 = 의도).
+    # 유효 키 = 큐 stage 명(classify/summarize/deep_summary) + cron/컨슈머 키(digest,
+    # briefing, study_explanation, study_session_analysis, study_memo_card).
+    # 빈 리스트 = 무동작 (기존 동작 그대로).
+    pipeline_held_stages: list[str] = []
+
+    # mlx gate 동시 실행 상한 (2026-06-12, config.yaml pipeline.mlx_gate_concurrency).
+    # 1 = 구 single-inference 동작. 2 = continuous batching 활용 (llm_gate docstring 참조).
+    mlx_gate_concurrency: int = 1
+
    # PR-MacMini-Derived-Worker-1: study explanation owner = Mac mini
    # GPU 측은 false 로 설정 (.env), explanation 분기 skip guard 트리거.
    study_explanation_enabled: bool = True
@@ -218,6 +233,7 @@ def load_settings() -> Settings:
                verifier=(
                    AIModelConfig(**models["verifier"]) if "verifier" in models else None
                ),
+                deep=(AIModelConfig(**models["deep"]) if "deep" in models else None),
                deep_summary_backlog=DeepSummaryBacklogConfig(
                    **ai_raw.get("deep_summary_backlog", {})
                ),
@@ -239,6 +255,21 @@ def load_settings() -> Settings:
            )
        )

+    pipeline_held_stages: list[str] = []
+    mlx_gate_concurrency = 1
+    if config_path.exists() and raw and "pipeline" in raw:
+        held_raw = (raw.get("pipeline") or {}).get("held_stages") or []
+        # 스칼라(문자열) 오기입 시 char-split 방지 — 단일 항목 리스트로 수용.
+        if not isinstance(held_raw, (list, tuple)):
+            held_raw = [held_raw]
+        pipeline_held_stages = [str(s) for s in held_raw]
+        try:
+            mlx_gate_concurrency = max(
+                1, int((raw.get("pipeline") or {}).get("mlx_gate_concurrency", 1))
+            )
+        except (TypeError, ValueError):
+            mlx_gate_concurrency = 1
+
    taxonomy = raw.get("taxonomy", {}) if config_path.exists() and raw else {}
    document_types = raw.get("document_types", []) if config_path.exists() and raw else []
    upload_cfg = (
@@ -267,6 +298,8 @@ def load_settings() -> Settings:
        study_explanation_enabled=study_explanation_enabled,
        study_card_extract_enabled=study_card_extract_enabled,
        internal_worker_token=internal_worker_token,
+        pipeline_held_stages=pipeline_held_stages,
+        mlx_gate_concurrency=mlx_gate_concurrency,
    )


@@ -0,0 +1,346 @@
+"""크롤링 politeness 코어 (A-4, plan crawl-24x7-1)
+
+개인 아카이빙 권장치를 그대로 박은 공용 fetch 계층:
+- per-domain 동시성 1 (asyncio.Lock) + 같은 도메인 연속 요청 5–15초 지연 + jitter
+- robots.txt 존중 (urllib.robotparser, 24h 캐시) — 비로그인 공개 크롤링 한정.
+  로그인 세션 fetch (B-3) 는 사용자 행위 성격이라 robots 대신 사람 속도가 기준.
+- 정직 식별 UA + 연락처 (익명 크롤링 트랙. 로그인 세션은 브라우저 UA 유지 — B-3)
+- 429 = Retry-After 존중 / 5xx = 재시도 가능 / 403 = 차단 신호 (호출측 circuit 연동)
+
+도메인별 마지막 요청 시각 등 rate 상태는 in-process (영속 워터마크는 DB — news_sources).
+SSRF 차단은 core.url_validator.validate_feed_url 재사용 (redirect target 재검증 포함).
+"""
+
+import asyncio
+import base64
+import random
+import time
+import urllib.robotparser
+from urllib.parse import urljoin, urlparse
+
+import httpx
+
+from core.url_validator import validate_feed_url
+from core.utils import setup_logger
+
+# bare getLogger 는 root(WARNING) 상속이라 INFO 대기/차단 로그가 드랍됨 — 타 워커와 동일 설정
+logger = setup_logger("crawl_politeness")
+
+# 정직 식별 UA + 연락처 — 차단 전 연락 통로 (A-4)
+CRAWL_UA = "HyungiPKM-Archiver/1.0 (personal archive; +mailto:hyun49196@gmail.com)"
+
+# 같은 도메인 연속 요청 간격 (초) — 권장치 5–15s + jitter
+_DOMAIN_DELAY_MIN = 5.0
+_DOMAIN_DELAY_MAX = 15.0
+
+# 구독 세션(브라우저) fetch 간격 — 사람 속도 (B-3 ④: 기사 간 수십 초)
+_AUTH_DELAY_MIN = 30.0
+_AUTH_DELAY_MAX = 60.0
+
+# B-3 Playwright 격리 컨테이너 (internal-only, compose DNS)
+_FETCHER_URL = "http://playwright-fetcher:3400"
+_FETCHER_TIMEOUT = 120.0  # 브라우저 기동 + 네비게이션 + settle 포함
+
+# 안티봇 챌린지 페이지 식별 마커 (DataDome/Cloudflare 등) — 좁게 유지(오탐 회피).
+# 실측: 르몽드 기사 = DataDome "Client Challenge" + "Entrez les caractères" CAPTCHA.
+_CHALLENGE_MARKERS = (
+    "Client Challenge",
+    "Entrez les caractères affichés",
+    "Checking your browser before",
+    "captcha-delivery.com",
+    "geo.captcha-delivery",
+    # CF JS 챌린지 인터스티셜의 스크립트 도메인 (aiche.org 실측 2026-06-11) —
+    # fetcher 의 챌린지 대기를 끝까지 통과 못 한 최종 HTML 만 여기 걸린다.
+    "challenges.cloudflare.com",
+)
+
+_ROBOTS_CACHE_TTL = 24 * 3600  # 24h
+_MAX_PAGE_BYTES = 5 * 1024 * 1024  # 피드 fetch 와 동일 5MB cap
+_PAGE_TIMEOUT = 20.0
+_MAX_REDIRECTS = 3
+
+_HTML_CONTENT_TYPES = ("text/html", "application/xhtml+xml")
+
+
+class CrawlFetchError(Exception):
+    """일시 오류 (5xx / timeout / 네트워크) — 큐 재시도 대상."""
+
+
+class CrawlBlocked(Exception):
+    """차단 신호 (403 / 429 / robots disallow) — 재시도보다 backoff/circuit 대상."""
+
+
+class CrawlSkip(Exception):
+    """영구 비대상 (비-HTML / 크기 초과 / SSRF 차단 / 4xx) — 격하 처리 대상."""
+
+
+# 도메인별 직렬화 상태 (in-process)
+_domain_locks: dict[str, asyncio.Lock] = {}
+_domain_last_request: dict[str, float] = {}
+# host → (cached_at, RobotFileParser | None).  None = robots 없음/4xx (전부 허용)
+_robots_cache: dict[str, tuple[float, urllib.robotparser.RobotFileParser | None]] = {}
+
+
+def _domain_of(url: str) -> str:
+    return (urlparse(url).hostname or "").lower()
+
+
+def _get_lock(domain: str) -> asyncio.Lock:
+    if domain not in _domain_locks:
+        _domain_locks[domain] = asyncio.Lock()
+    return _domain_locks[domain]
+
+
+async def _respect_domain_rate(
+    domain: str,
+    delay_min: float = _DOMAIN_DELAY_MIN,
+    delay_max: float = _DOMAIN_DELAY_MAX,
+) -> None:
+    """같은 도메인 직전 요청에서 delay(jitter) 경과할 때까지 대기."""
+    last = _domain_last_request.get(domain)
+    if last is not None:
+        delay = random.uniform(delay_min, delay_max)
+        wait = last + delay - time.monotonic()
+        if wait > 0:
+            # silent sleep 금지 — politeness 동작 검증·운영 관찰 가시성
+            logger.info("[politeness] %s %.1fs 대기", domain, wait)
+            await asyncio.sleep(wait)
+
+
+async def _fetch_robots(client: httpx.AsyncClient, scheme: str, host: str):
+    """robots.txt 조회. 4xx/부재 = 전부 허용(None), 5xx/오류 = 보수적으로 이번 사이클 차단."""
+    robots_url = f"{scheme}://{host}/robots.txt"
+    try:
+        resp = await client.get(robots_url, headers={"User-Agent": CRAWL_UA})
+    except httpx.HTTPError as e:
+        raise CrawlFetchError(f"robots.txt 조회 실패: {host}: {e}") from e
+    if resp.status_code >= 500:
+        # 5xx 는 의도 불명 — 표준 관행대로 이번 사이클은 차단 취급
+        raise CrawlFetchError(f"robots.txt 5xx: {host}: {resp.status_code}")
+    if resp.status_code >= 400:
+        return None  # robots 없음 = 전부 허용
+    rp = urllib.robotparser.RobotFileParser()
+    rp.parse(resp.text.splitlines())
+    return rp
+
+
+async def _robots_allows(client: httpx.AsyncClient, url: str) -> bool:
+    parsed = urlparse(url)
+    host = (parsed.hostname or "").lower()
+    cached = _robots_cache.get(host)
+    if cached is None or time.monotonic() - cached[0] > _ROBOTS_CACHE_TTL:
+        rp = await _fetch_robots(client, parsed.scheme or "https", host)
+        _robots_cache[host] = (time.monotonic(), rp)
+        cached = _robots_cache[host]
+    rp = cached[1]
+    if rp is None:
+        return True
+    return rp.can_fetch(CRAWL_UA, url)
+
+
+async def fetch_page(
+    url: str, *, check_robots: bool = True,
+    content_types: tuple[str, ...] = _HTML_CONTENT_TYPES,
+) -> tuple[str, str]:
+    """공개 페이지 1건 politeness fetch. (html_text, final_url) 반환.
+
+    - SSRF 검증 (redirect target 포함, news_collector 피드 fetch 와 동일 이중 검증)
+    - per-domain 동시성 1 + 5–15s jitter 지연
+    - 429: Retry-After 로그 후 CrawlBlocked / 403: CrawlBlocked / 그 외 4xx: CrawlSkip
+    - 5xx/timeout: CrawlFetchError (큐 재시도)
+    - 비-HTML content-type / 5MB 초과: CrawlSkip
+    """
+    try:
+        validate_feed_url(url)
+    except ValueError as e:
+        raise CrawlSkip(f"URL 검증 실패: {e}") from e
+
+    domain = _domain_of(url)
+    async with _get_lock(domain):
+        await _respect_domain_rate(domain)
+        try:
+            async with httpx.AsyncClient(
+                timeout=_PAGE_TIMEOUT, follow_redirects=False,
+                headers={"User-Agent": CRAWL_UA},
+            ) as client:
+                if check_robots and not await _robots_allows(client, url):
+                    raise CrawlBlocked(f"robots.txt disallow: {url}")
+
+                resp = await client.get(url)
+                redirects = 0
+                # has_redirect_location = location 헤더 있는 진짜 redirect 만 (httpx 의
+                # is_redirect 는 3xx 전체라 304 등을 redirect 로 오인 — news_collector 동일 함정)
+                while resp.has_redirect_location and redirects < _MAX_REDIRECTS:
+                    location = urljoin(str(resp.request.url), resp.headers["location"])
+                    try:
+                        validate_feed_url(location)
+                    except ValueError as e:
+                        raise CrawlSkip(f"redirect target 차단: {e}") from e
+                    # redirect 도 같은 도메인 연속 요청 — 간격은 lock 보유로 충분 (즉시 1회)
+                    resp = await client.get(location)
+                    redirects += 1
+                if resp.has_redirect_location:
+                    raise CrawlSkip(f"redirect {_MAX_REDIRECTS}회 초과: {url}")
+        except httpx.TimeoutException as e:
+            raise CrawlFetchError(f"timeout: {url}") from e
+        except httpx.HTTPError as e:
+            raise CrawlFetchError(f"네트워크 오류: {url}: {e}") from e
+        finally:
+            _domain_last_request[domain] = time.monotonic()
+
+    if resp.status_code == 429:
+        retry_after = resp.headers.get("retry-after", "")
+        logger.warning("[politeness] 429 %s (Retry-After=%s)", domain, retry_after or "-")
+        raise CrawlBlocked(f"429 rate limited: {url} (Retry-After={retry_after or '-'})")
+    if resp.status_code == 403:
+        raise CrawlBlocked(f"403 forbidden: {url}")
+    if resp.status_code >= 500:
+        raise CrawlFetchError(f"{resp.status_code}: {url}")
+    if resp.status_code >= 400:
+        raise CrawlSkip(f"{resp.status_code}: {url}")
+
+    ct = resp.headers.get("content-type", "").lower()
+    if ct and not any(t in ct for t in content_types):
+        raise CrawlSkip(f"비허용 content-type: {ct}: {url}")
+    if len(resp.content) > _MAX_PAGE_BYTES:
+        raise CrawlSkip(f"크기 초과: {len(resp.content)} bytes: {url}")
+
+    return resp.text, str(resp.request.url)
+
+
+# ── B-3 구독 세션 fetch (Playwright 격리 컨테이너 경유) ──────────────────────
+
+async def fetch_page_via_browser(url: str, profile: str | None) -> tuple[str, str]:
+    """브라우저 페이지 1건 — playwright-fetcher 에 위임, politeness 는 사람 속도(30~60s).
+
+    profile=None = 익명 컨텍스트 (사이클 3 — 평문 httpx 를 UA 무관 403 하는 공개
+    사이트의 WAF 우회 전용, CCPS aiche.org 실측). 값 = B-3 구독 세션.
+    (html_text, final_url) 반환. robots 미적용 — 구독 fetch 는 사용자 행위 성격,
+    익명 WAF 우회는 월간 1~2회 저빈도 + 사람 속도가 보호 장치.
+    예외 어휘는 fetch_page 와 동일 (호출측 분기 재사용).
+    """
+    try:
+        validate_feed_url(url)
+    except ValueError as e:
+        raise CrawlSkip(f"URL 검증 실패: {e}") from e
+
+    payload = {"url": url}
+    if profile:
+        payload["profile"] = profile
+
+    domain = _domain_of(url)
+    async with _get_lock(domain):
+        await _respect_domain_rate(domain, _AUTH_DELAY_MIN, _AUTH_DELAY_MAX)
+        try:
+            async with httpx.AsyncClient(timeout=_FETCHER_TIMEOUT) as client:
+                resp = await client.post(f"{_FETCHER_URL}/fetch", json=payload)
+        except httpx.TimeoutException as e:
+            raise CrawlFetchError(f"browser fetch timeout: {url}") from e
+        except httpx.HTTPError as e:
+            raise CrawlFetchError(f"playwright-fetcher 연결 오류: {e}") from e
+        finally:
+            _domain_last_request[domain] = time.monotonic()
+
+    if resp.status_code == 503:
+        # storage_state 부재 — 수동 세션 박제 대기 (호출측 degrade, 재시도 루프 금지)
+        raise CrawlBlocked(f"세션 프로필 부재: {profile}")
+    if resp.status_code != 200:
+        raise CrawlFetchError(f"playwright-fetcher {resp.status_code}: {url}")
+    data = resp.json()
+    html_text = data.get("html", "")
+    if len(html_text.encode("utf-8", errors="replace")) > _MAX_PAGE_BYTES:
+        raise CrawlSkip(f"크기 초과 (browser): {url}")
+    # 안티봇 챌린지 페이지(DataDome 등) 식별 — 본문 길이 게이트(200자)를 통과하는
+    # 짧은 챌린지 HTML 이 기사 본문으로 승격되는 silent corruption 차단. 헤드리스 탐지라
+    # 재시도 무의미 → CrawlBlocked(=degrade, RSS 요약 유지). 마커는 보수적으로 좁게.
+    if any(m in html_text for m in _CHALLENGE_MARKERS):
+        raise CrawlBlocked(f"안티봇 챌린지 페이지(headless 차단): {url}")
+    return html_text, data.get("final_url", url)
+
+
+_MAX_DOWNLOAD_BYTES = 60 * 1024 * 1024  # fetcher MAX_DOWNLOAD_BYTES 와 동률
+
+
+async def download_via_browser(
+    url: str, *, referer: str | None = None, profile: str | None = None
+) -> tuple[bytes, str]:
+    """바이너리(PDF) 1건 — fetcher /download 위임. (content, content_type) 반환.
+
+    referer = WAF 챌린지 쿠키를 먼저 획득할 목록 페이지 (CCPS Beacon 패턴).
+    내부 status 판정: 403/429 = CrawlBlocked, 그 외 4xx = CrawlSkip, 5xx = CrawlFetchError
+    (fetch_page 와 동일 어휘 — 호출측 분기 재사용).
+    """
+    try:
+        validate_feed_url(url)
+    except ValueError as e:
+        raise CrawlSkip(f"URL 검증 실패: {e}") from e
+
+    payload: dict = {"url": url}
+    if referer:
+        payload["referer"] = referer
+    if profile:
+        payload["profile"] = profile
+
+    domain = _domain_of(url)
+    async with _get_lock(domain):
+        await _respect_domain_rate(domain, _AUTH_DELAY_MIN, _AUTH_DELAY_MAX)
+        try:
+            async with httpx.AsyncClient(timeout=_FETCHER_TIMEOUT) as client:
+                resp = await client.post(f"{_FETCHER_URL}/download", json=payload)
+        except httpx.TimeoutException as e:
+            raise CrawlFetchError(f"browser download timeout: {url}") from e
+        except httpx.HTTPError as e:
+            raise CrawlFetchError(f"playwright-fetcher 연결 오류: {e}") from e
+        finally:
+            _domain_last_request[domain] = time.monotonic()
+
+    if resp.status_code == 503:
+        raise CrawlBlocked(f"세션 프로필 부재: {profile}")
+    if resp.status_code != 200:
+        raise CrawlFetchError(f"playwright-fetcher {resp.status_code}: {url}")
+    data = resp.json()
+    inner = int(data.get("status", 0))
+    if inner in (403, 429):
+        raise CrawlBlocked(f"{inner} (browser download): {url}")
+    if 400 <= inner < 500:
+        raise CrawlSkip(f"{inner} (browser download): {url}")
+    if inner != 200:
+        raise CrawlFetchError(f"{inner} (browser download): {url}")
+    content = base64.b64decode(data.get("body_b64", ""))
+    if len(content) > _MAX_DOWNLOAD_BYTES:
+        raise CrawlSkip(f"크기 초과 (browser download): {url}")
+    return content, data.get("content_type", "")
+
+
+async def probe_session(
+    profile: str, probe_url: str, min_body_chars: int, paywall_markers: list[str]
+) -> dict:
+    """내용 기반 세션 probe (B-3 ②) — {'ok': bool, 'reason': str|None, 'body_chars': int}.
+
+    실패를 예외가 아닌 값으로 반환 — 호출측이 source_health 에 기록하고 degrade 분기.
+    probe 도 실제 publisher fetch 라 동일 도메인 lock + 사람 속도 적용.
+    """
+    domain = _domain_of(probe_url)
+    async with _get_lock(domain):
+        await _respect_domain_rate(domain, _AUTH_DELAY_MIN, _AUTH_DELAY_MAX)
+        try:
+            async with httpx.AsyncClient(timeout=_FETCHER_TIMEOUT) as client:
+                resp = await client.post(
+                    f"{_FETCHER_URL}/probe",
+                    json={
+                        "profile": profile,
+                        "probe_url": probe_url,
+                        "min_body_chars": min_body_chars,
+                        "paywall_markers": paywall_markers,
+                    },
+                )
+        except httpx.HTTPError as e:
+            return {"ok": False, "reason": f"fetcher 연결 오류: {e}", "body_chars": 0}
+        finally:
+            _domain_last_request[domain] = time.monotonic()
+
+    if resp.status_code == 503:
+        return {"ok": False, "reason": f"세션 프로필 부재: {profile}", "body_chars": 0}
+    if resp.status_code != 200:
+        return {"ok": False, "reason": f"fetcher {resp.status_code}", "body_chars": 0}
+    return resp.json()
@@ -106,33 +106,3 @@ END:VCALENDAR"""
    except Exception as e:
        logging.getLogger("caldav").error(f"CalDAV VTODO 생성 실패: {e}")
        return None
-
-
-# ─── SMTP 헬퍼 ───
-
-
-def send_smtp_email(
-    host: str,
-    port: int,
-    username: str,
-    password: str,
-    subject: str,
-    body: str,
-    to_addr: str | None = None,
-):
-    """Synology MailPlus SMTP로 이메일 발송"""
-    import smtplib
-    from email.mime.text import MIMEText
-
-    to_addr = to_addr or username
-    msg = MIMEText(body, "plain", "utf-8")
-    msg["Subject"] = subject
-    msg["From"] = username
-    msg["To"] = to_addr
-
-    try:
-        with smtplib.SMTP_SSL(host, port, timeout=30) as server:
-            server.login(username, password)
-            server.send_message(msg)
-    except Exception as e:
-        logging.getLogger("smtp").error(f"SMTP 발송 실패: {e}")
@@ -11,11 +11,119 @@ endpoint 를 못 부른다(silent fallback 0, rules no-silent-fallback).
  - _request()       → endpoint 에 anthropic.com 있으면 raise(primary 오결선 방어, 이중보증)
 call_primary / call_triage / embed / rerank 는 그대로(내부 inference·임베딩 허용).
 egress 워커·시스템 경로는 기존 AIClient 유지 — fallback 은 시스템만, 이드만 박탈(분리).
+
+eid-chat (D-5): 이드 채팅 SSE 스트리밍도 이 클래스의 call_stream() 한 곳 — RouterBackend
+직접 호출 금지, mode 어휘는 _CHAT_ALIAS 닫힌 매핑(daily/deep)만, 미지 mode = EidEgressBlocked.
 """

 from __future__ import annotations

+import asyncio
+import json
+import re
+from collections.abc import AsyncIterator
+from contextlib import AsyncExitStack
+
+import httpx
+
 from ai.client import AIClient
+from services.llm.backends import (
+    MAC_MINI_DEFAULT,
+    BackendUnavailable,
+    _router_url,  # router URL 단일 출처 재사용 (settings → env LLM_ROUTER_URL → MVP default)
+)
+from services.search.llm_gate import Priority, acquire_mlx_gate
+
+# 이드 채팅 mode → router alias 닫힌 매핑 (D-2). 클라는 mode 만 보냄 — claude-cloud/auto 금지.
+# 2026-06-11 맥북 백지화: deep 도 mac-mini-default (맥미니 Qwen 27B 단일 호스트).
+# mode 구분은 유지 — deep = ReAct 자동검색 경로(모델이 아니라 동작이 다름).
+# 게이트는 alias==MAC_MINI_DEFAULT 조건이라 deep 도 자동으로 mlx gate 적용
+# (llm_gate "예외 없이 gate 획득 필수" invariant 충족 — 구 무게이트는 맥북 예외였음).
+_CHAT_ALIAS: dict[str, str] = {
+    "daily": MAC_MINI_DEFAULT,  # router tier_b → Mac mini :8801
+    "deep": MAC_MINI_DEFAULT,   # 맥북 폐기로 동일 upstream — ReAct 검색 모드 구분만 유지
+}
+
+# read 는 per-chunk 적용이라 MacBook wake(24s)+토큰 생성 간격 커버. connect 는 내부 router 라 짧게.
+_STREAM_TIMEOUT = httpx.Timeout(connect=5.0, read=120.0, write=30.0, pool=5.0)
+
+# 스트림 중계 전체(업스트림 진입~종료) wall-clock 상한. per-chunk read timeout 만으로는
+# 토큰이 계속 흐르는 한 무한 점유 가능 → daily 는 mlx gate 를 물고 있어 deadline 필수.
+# deep 도 동일 적용(단순·일관). 정상 스트림(max_tokens 2048, ~90tps ≈ 23s)은 여유 통과.
+_STREAM_DEADLINE_S = 300.0
+
+# error_reason allowlist — 이 밖(대문자/공백/JSON 직렬화 파편)은 일반화해 비노출
+_REASON_ALLOWED = re.compile(r"[a-z0-9_]{1,64}")
+
+# 스트림 시작 전 transport 계열 실패 → BackendUnavailable 매핑 대상 (RouterBackend._post 와 동일 목록)
+_TRANSPORT_ERRORS = (
+    httpx.ConnectError,
+    httpx.ConnectTimeout,
+    httpx.ReadTimeout,
+    httpx.PoolTimeout,
+    httpx.WriteTimeout,
+    httpx.RemoteProtocolError,
+)
+
+
+def _stream_error_reason(status_code: int, body: bytes) -> str:
+    """스트림 시작 전 4xx/5xx 응답 본문 → error_reason 추출.
+
+    어휘는 /api/search/ask(RouterBackend._post)와 일치 — router 가 주는 error.type /
+    error.error_reason (macbook_unavailable / warming / editor_busy / upstream_cold /
+    provider_not_configured 등) 우선, 없으면 status 기반 router_503 / upstream_502 /
+    router_http_<status>.
+
+    최종 reason 은 [a-z0-9_]{1,64} allowlist 검사 — 불일치(대문자/공백/dict 직렬화
+    파편)는 upstream_502(502 계열) / router_error(그 외) 로 일반화해 외부 비노출.
+    """
+    try:
+        data = json.loads(body.decode("utf-8", errors="replace"))
+    except Exception:
+        data = {}
+    err = data.get("error", {}) if isinstance(data, dict) else {}
+    reason: str | None = None
+    if isinstance(err, dict):
+        raw = err.get("type") or err.get("error_reason")
+        if raw:
+            reason = str(raw)
+    if reason is None and isinstance(data, dict) and data.get("error_reason"):
+        reason = str(data["error_reason"])
+    if reason is None:
+        if status_code == 502:
+            reason = "upstream_502"
+        elif status_code == 503:
+            reason = "router_503"
+        else:
+            reason = f"router_http_{status_code}"
+    if _REASON_ALLOWED.fullmatch(reason):
+        return reason
+    return "upstream_502" if status_code == 502 else "router_error"
+
+
+def _rewrite_sse_line(line: bytes, mode: str) -> bytes:
+    """SSE 라인 1건 정화 — data: JSON 의 model 을 mode 어휘로 치환 + usage 제거.
+
+    fixture 실측: 27B chunk 의 model 필드가 맥북 파일시스템 절대경로
+    ("/Users/.../mlx-models/Qwen3.6-27B-8bit")를 노출 — 표면 문법 '모델·머신명
+    비노출'과 충돌해 라인 단위로 재작성한다. usage(tps/peak_memory 등 머신
+    텔레메트리)도 함께 제거. [DONE]·비-data 라인(빈 줄 포함)·파싱 실패 라인은
+    원문 그대로(방어적) — SSE 프레이밍(data: 라인 + 빈 줄) 보존.
+    """
+    if not line.startswith(b"data: "):
+        return line
+    payload = line[len(b"data: "):]
+    if payload.strip() == b"[DONE]":
+        return line
+    try:
+        obj = json.loads(payload)
+    except Exception:
+        return line
+    if not isinstance(obj, dict):
+        return line
+    obj["model"] = mode
+    obj.pop("usage", None)
+    return b"data: " + json.dumps(obj, ensure_ascii=False).encode("utf-8")


 class EidEgressBlocked(RuntimeError):
@@ -39,3 +147,91 @@ class EidAIClient(AIClient):
        if "anthropic.com" in endpoint:
            raise EidEgressBlocked(f"이드: 외부 endpoint 차단 ({endpoint}). 내부 inference 만.")
        return await super()._request(model_config, prompt, system=system)
+
+    async def call_stream(
+        self, mode: str, messages: list[dict], system: str
+    ) -> AsyncIterator[bytes]:
+        """이드 채팅 SSE 스트림 — router /v1/chat/completions stream=true 라인 단위 중계 (D-5).
+
+        mode     : "daily" | "deep" — _CHAT_ALIAS 닫힌 매핑. 미지 mode = EidEgressBlocked
+                   (이드 LLM 호출 봉쇄는 이 클래스 한 곳, 불변식 #5).
+        messages : user/assistant 턴 목록 (system role 금지 — system 인자로만 주입).
+        system   : compose("eid_chat", ...) 합본. messages 맨 앞에 system role 로 끼움.
+
+        스트림 시작 전 실패(연결 실패·5xx 응답) = BackendUnavailable(reason 어휘는 ask
+        와 동일). router 400 = 닫힌 매핑에서 alias drift 코드 버그 → ValueError fail-loud
+        (RouterBackend._post 컨벤션 미러). 스트림 시작 후엔 bytes 를 라인 버퍼링해
+        _rewrite_sse_line 으로 model 치환(mode 어휘)·usage 제거만 하고 프레이밍은 보존.
+        취소/disconnect 시 AsyncExitStack 이 response·client 정리(upstream 닫힘 보장).
+
+        daily/deep 모두 mac-mini-default(2026-06-11 맥북 백지화) → Mac mini MLX 단일
+        inference 영구 룰(llm_gate docstring "예외 없이 gate 획득 필수")에 따라
+        acquire_mlx_gate(FOREGROUND) 안에서 스트리밍 — 게이트 조건이 alias 기준이라
+        deep 도 자동 적용 (구 무게이트는 맥북 별 endpoint 시절 예외였음).
+
+        중계 전체(업스트림 진입~종료)는 asyncio.timeout(_STREAM_DEADLINE_S) wall-clock
+        deadline 안 — llm_gate 계약 "timeout 은 gate 안쪽" 준수(gate 대기엔 미적용).
+        초과 시 BackendUnavailable(alias, "stream_deadline_exceeded") 로 수렴.
+        """
+        alias = _CHAT_ALIAS.get(mode)
+        if alias is None:
+            raise EidEgressBlocked(
+                f"이드: 미지 chat mode {mode!r} — 닫힌 매핑(daily/deep) 외 호출 차단."
+            )
+        router_url = _router_url()
+        if "anthropic.com" in router_url:
+            # 기존 _request 패턴 미러 — router URL 오결선 시 외부 egress 방어 (이중보증)
+            raise EidEgressBlocked(f"이드: 외부 endpoint 차단 ({router_url}). 내부 router 만.")
+        url = f"{router_url.rstrip('/')}/v1/chat/completions"
+        payload = {
+            "model": alias,
+            "messages": [{"role": "system", "content": system}] + messages,
+            "stream": True,
+            "max_tokens": 2048,
+            "temperature": 0.4,
+        }
+        async with AsyncExitStack() as stack:
+            if alias == MAC_MINI_DEFAULT:
+                await stack.enter_async_context(acquire_mlx_gate(Priority.FOREGROUND))
+            client = await stack.enter_async_context(httpx.AsyncClient(timeout=_STREAM_TIMEOUT))
+            try:
+                # wall-clock deadline — gate 획득 *후* 진입 (llm_gate "timeout 은 gate 안쪽")
+                async with asyncio.timeout(_STREAM_DEADLINE_S):
+                    try:
+                        resp = await stack.enter_async_context(
+                            client.stream("POST", url, json=payload)
+                        )
+                    except _TRANSPORT_ERRORS as exc:
+                        # 스트림 시작 전 연결 계열 실패 — reason 어휘 = RouterBackend(router_*) 와 일치
+                        raise BackendUnavailable(alias, f"router_{type(exc).__name__}") from exc
+                    if resp.status_code == 400:
+                        # 닫힌 매핑에서 400 = alias drift 코드 버그 — RouterBackend._post 미러,
+                        # BackendUnavailable(일시 비가용) 아님 → fail-loud
+                        body = await resp.aread()
+                        try:
+                            data = json.loads(body.decode("utf-8", errors="replace"))
+                        except Exception:
+                            data = {}
+                        raise ValueError(f"router rejected alias={alias!r} body={data!r}")
+                    if resp.status_code >= 400:
+                        body = await resp.aread()
+                        raise BackendUnavailable(
+                            alias, _stream_error_reason(resp.status_code, body)
+                        )
+                    buf = b""
+                    try:
+                        async for chunk in resp.aiter_bytes():
+                            buf += chunk
+                            # 라인 버퍼링 — 청크 경계에서 b"\n" 분리, 잔여 버퍼 유지
+                            while (nl := buf.find(b"\n")) != -1:
+                                line, buf = buf[:nl], buf[nl + 1:]
+                                yield _rewrite_sse_line(line, mode) + b"\n"
+                    except _TRANSPORT_ERRORS as exc:
+                        # 시작 후 중단 — 이미 보낸 chunk 는 전송됨. typed 예외로 수렴(caller 가 끊고 정리).
+                        raise BackendUnavailable(alias, f"router_{type(exc).__name__}") from exc
+                    if buf:
+                        # 스트림 끝 잔여분 flush (개행 없는 마지막 라인 — 원문에 없던 \n 추가 안 함)
+                        yield _rewrite_sse_line(buf, mode)
+            except TimeoutError as exc:
+                # asyncio.timeout 초과 — 게이트 점유 무한화 차단, typed 예외로 수렴
+                raise BackendUnavailable(alias, "stream_deadline_exceeded") from exc
@@ -50,6 +50,8 @@ _ROUTE: dict[str, dict] = {
    "react_ask":                  {"overlay": None, "variant": "full"},
    "study_subject_note":         {"overlay": None, "variant": "full"},
    "study_question_explanation": {"overlay": None, "variant": "full"},
+    # 이드 채팅 표면 (D-1 /api/eid/chat) — 자유-prose(base), persona ON (불변식 #3)
+    "eid_chat":                   {"overlay": None, "variant": "full"},
    # 미래 active eid 표면 — 기능 overlay (W3+ 에서 호출 배선)
    "study_diagnosis":            {"overlay": "study",    "variant": "full"},
    "document_brief":             {"overlay": "document", "variant": "full"},
@@ -113,6 +115,17 @@ def is_composed_surface(surface: str) -> bool:
    return surface in _ROUTE


+def rules_present() -> bool:
+    """rules.md 존재 여부 — 채팅 표면(D-6)의 fail-closed 판정 재료.
+
+    기존 _rules() 의 degraded 배너 컨벤션(다른 표면, fail-loud 진행)은 그대로 둔다 —
+    여긴 '진행 거부' 판정만 제공하고 강제는 호출부(/api/eid/chat) 책임.
+    lru_cache 된 _read 를 쓰지 않고 매 호출 직접 stat — D-6 게이트는 살아있는 판정
+    이어야 한다(캐시 동결 시 rules.md 부재/복구가 영원히 반영 안 됨).
+    """
+    return (_SUBSTRATE_DIR / "rules.md").is_file()
+
+
 def compose(surface: str, task: str, *, variant: str | None = None,
            budget_chars: int | None = None) -> str:
    """persona → rules → overlay → task 단일 system 문자열 합성.
@@ -17,10 +17,12 @@ from api.digest import router as digest_router
 from api.document_notes import router as document_notes_router
 from api.document_reads import router as document_reads_router
 from api.documents import router as documents_router
+from api.eid_chat import router as eid_chat_router
 from api.events import router as events_router
 from api.library import router as library_router
 from api.memos import router as memos_router
 from api.news import router as news_router
+from api.queue_overview import router as queue_overview_router
 from api.search import router as search_router
 from api.setup import router as setup_router
 from api.study_question_progress import router as study_question_progress_router
@@ -51,10 +53,15 @@ async def lifespan(app: FastAPI):
    from workers.dedup_reconcile import run as dedup_reconcile_run
    from workers.digest_worker import run as global_digest_run
    from workers.file_watcher import watch_inbox
-    from workers.law_monitor import run as law_monitor_run
    from workers.mailplus_archive import run as mailplus_run
+    from workers.statute_collector import run as statute_run
    from workers.news_collector import run as news_collector_run
-    from workers.queue_consumer import consume_queue, consume_markdown_queue
+    from workers.fulltext_worker import reconcile_unresolved as fulltext_reconcile_run
+    from workers.kosha_collector import run as kosha_collector_run
+    from workers.csb_collector import run as csb_collector_run
+    from workers.api_standards_collector import run as api_standards_run
+    from workers.ccps_collector import run as ccps_collector_run
+    from workers.queue_consumer import consume_queue, consume_fast_queue, consume_markdown_queue
    from workers.study_queue_consumer import consume_study_queue
    from workers.study_session_queue_consumer import consume_study_session_queue
    from workers.study_memo_card_jobs_consumer import consume_study_memo_card_queue
@@ -88,6 +95,9 @@ async def lifespan(app: FastAPI):
    # 대형 PDF split 변환(수십 분)이 메인 consume_queue 를 점유해 전 파이프라인을
    # stall 시키던 문제 제거. max_instances=1(기본) 으로 동시 marker 변환 2건은 방지.
    scheduler.add_job(consume_markdown_queue, "interval", minutes=1, id="markdown_consumer")
+    # 2026-06-12 fast-consumer split: embed/chunk(건당 <1s)를 LLM 사이클에서 분리 —
+    # classify(~190s×3)가 사이클을 점유해 벡터 적재가 굶던 구조 캡 해소 (markdown 선례).
+    scheduler.add_job(consume_fast_queue, "interval", minutes=1, id="fast_queue_consumer")
    scheduler.add_job(watch_inbox, "interval", minutes=5, id="file_watcher")
    scheduler.add_job(cleanup_orphan_uploads, "interval", minutes=10, id="upload_cleanup")
    # PR-4: study_questions 자동 임베딩 (status='none/failed/stale' 행을 batch=10 처리).
@@ -110,7 +120,9 @@ async def lifespan(app: FastAPI):
    # safety > law > manual 우선순위로 25건씩. 6720 레거시 → 야간당 ~150건 → 약 45일 소화.
    scheduler.add_job(tier_backfill_run, "interval", minutes=30, id="tier_backfill")
    # 일일 스케줄 (KST)
-    scheduler.add_job(law_monitor_run, CronTrigger(hour=7, timezone=KST), id="law_monitor")
+    # statute_collector = 구 law_monitor 대체 (safety-library-1 B-1 PR②) — poll→ingest→
+    # 생애주기 잡(버전 시리즈 승격·supersede·레거시 스윕·repeal) 통째 (R8-B1).
+    scheduler.add_job(statute_run, CronTrigger(hour=7, timezone=KST), id="statute_collector")
    scheduler.add_job(mailplus_run, CronTrigger(hour=7, timezone=KST), id="mailplus_morning")
    scheduler.add_job(mailplus_run, CronTrigger(hour=18, timezone=KST), id="mailplus_evening")
    scheduler.add_job(daily_digest_run, CronTrigger(hour=20, timezone=KST), id="daily_digest")
@@ -121,9 +133,20 @@ async def lifespan(app: FastAPI):
    # 이드 W3-2: 공부중 토픽 약점 derived 스냅샷 (nightly 04:30 KST, LLM 0). study_diagnosis 표면 source.
    scheduler.add_job(study_weakness_run, CronTrigger(hour=4, minute=30, timezone=KST), id="study_weakness")
    scheduler.add_job(news_collector_run, "interval", hours=6, id="news_collector")
+    # crawl-24x7 A-2 안전망: fulltext 영구 실패(3회 소진) 문서를 RSS 요약 기준으로
+    # 후속 enqueue (silent skip 누적 방지). 03:40 = dedup_reconcile(03:30) 직후 비충돌 슬롯.
+    scheduler.add_job(fulltext_reconcile_run, CronTrigger(hour=3, minute=40, timezone=KST), id="fulltext_reconcile")
    # plan ds-s1-backend-1 B-4: dedup 컬럼(duplicate_of/duplicate_count) 야간 절대 재계산.
    # soft-delete 잔여 드리프트 정리(멱등, 드리프트 없으면 no-op). cron 03:30 (다른 잡과 비충돌).
    scheduler.add_job(dedup_reconcile_run, CronTrigger(hour=3, minute=30, timezone=KST), id="dedup_reconcile")
+    # crawl-24x7 C-2: KOSHA 재해사례 diff + GUIDE 점진 백필 (daily, 새벽 잡들과 비충돌 슬롯).
+    scheduler.add_job(kosha_collector_run, CronTrigger(hour=6, minute=40, timezone=KST), id="kosha_collector")
+    # 사이클 3 C-2 잔여: CSB sitemap lastmod diff (weekly 월, cap 40 + 워터마크 점진 백필).
+    scheduler.add_job(csb_collector_run, CronTrigger(day_of_week="mon", hour=6, minute=50, timezone=KST), id="csb_collector")
+    # 사이클 3 C-4: API 표준 공지 목록 diff (monthly — 월 1~2건 공지 페이스).
+    scheduler.add_job(api_standards_run, CronTrigger(day=5, hour=7, minute=5, timezone=KST), id="api_standards_collector")
+    # 사이클 3 C-2 잔여: CCPS Beacon 월간 PDF (playwright 익명 경유 — WAF 차단 시 health 로 가시화).
+    scheduler.add_job(ccps_collector_run, CronTrigger(day=5, hour=7, minute=20, timezone=KST), id="ccps_collector")
    scheduler.start()

    # Phase 2.1 (async 구조): QueryAnalyzer prewarm.
@@ -158,12 +181,16 @@ app.include_router(documents_router, prefix="/api/documents", tags=["documents"]
 app.include_router(document_reads_router, prefix="/api/documents", tags=["document-reads"])
 app.include_router(document_notes_router, prefix="/api/documents", tags=["document-notes"])
 app.include_router(search_router, prefix="/api/search", tags=["search"])
+# 이드 채팅 표면 (D-1) — POST /api/eid/chat. SSE 스트리밍, EidAIClient.call_stream 봉쇄 경유.
+app.include_router(eid_chat_router, prefix="/api/eid", tags=["eid-chat"])

 app.include_router(memos_router, prefix="/api/memos", tags=["memos"])
 app.include_router(events_router, prefix="/api/events", tags=["events"])
 app.include_router(dashboard_router, prefix="/api/dashboard", tags=["dashboard"])
 app.include_router(library_router, prefix="/api/library", tags=["library"])
 app.include_router(news_router, prefix="/api/news", tags=["news"])
+# 처리 머신 보드 (plan ds-processing-ui-6an) — GET /api/queue/overview
+app.include_router(queue_overview_router, prefix="/api/queue", tags=["queue"])
 app.include_router(digest_router, prefix="/api/digest", tags=["digest"])
 app.include_router(briefing_router, prefix="/api/briefing", tags=["briefing"])
 app.include_router(audio_router, prefix="/api/audio", tags=["audio"])
@@ -14,6 +14,11 @@ from sqlalchemy.orm import Mapped, mapped_column

 from core.database import Base

+# FK("users.id") 해석에 users 테이블 메타데이터 필요 — fastapi 앱은 어차피 전 모델을
+# import 하지만, CLI 단독 실행(queue_drain 등)은 본 모듈만 끌어와 INSERT 시
+# "could not find table 'users'" 로 실패했다 (2026-06-12 drain 로그 실측). 명시 import.
+from models.user import User  # noqa: F401
+

 class AnalyzeEvent(Base):
    __tablename__ = "analyze_events"
@@ -1,9 +1,9 @@
 """documents 테이블 ORM"""

-from datetime import datetime
+from datetime import date, datetime

 from pgvector.sqlalchemy import Vector
-from sqlalchemy import BigInteger, Boolean, DateTime, Enum, ForeignKey, Integer, String, Text
+from sqlalchemy import BigInteger, Boolean, Date, DateTime, Enum, ForeignKey, Integer, String, Text
 from sqlalchemy.dialects.postgresql import JSONB
 from sqlalchemy.orm import Mapped, mapped_column

@@ -118,7 +118,7 @@ class Document(Base):
    source_channel: Mapped[str | None] = mapped_column(
        Enum("law_monitor", "devonagent", "email", "web_clip",
             "tksafety", "inbox_route", "manual", "drive_sync", "news", "memo",
-             "voice", "hermes",
+             "voice", "hermes", "crawl",
             name="source_channel")
    )
    # 외부 채널 (Hermes Discord 등) 의 channel/user/message_id/timestamp 메타.
@@ -146,6 +146,16 @@ class Document(Base):
    # /accept-suggestion 승인 시에만 category / user_tags 반영 (자동 전이 금지)
    ai_suggestion: Mapped[dict | None] = mapped_column(JSONB)

+    # === 안전 자료실 분류 축 (plan safety-library-1, migrations 340~345) ===
+    # 자료유형 — law/paper/book/incident/manual/standard/guide (TEXT+CHECK, enum 아님).
+    # 수집기 ingest 시점 deterministic 부여 (classify-skip 경로 다수 — classify_worker 의존 금지).
+    # AI 라우팅(subject_domain) 매칭 키 사용 금지 (axis separation — category 와 동일 불변식).
+    material_type: Mapped[str | None] = mapped_column(Text)
+    # 관할 — KR/US/EU/JP/GB/INT. law 는 CHECK 로 jurisdiction NOT NULL 구조 강제 (migration 344).
+    jurisdiction: Mapped[str | None] = mapped_column(Text)
+    # 유형별 대표 날짜 — 법령=COALESCE(시행일, 공포일) / 논문=발행일 / 재해=발생일
+    published_date: Mapped[date | None] = mapped_column(Date)
+
    # PR-B B-1: summary_triage (4B, 상시) / summary_deep (26B, 에스컬레이션) 분할 산출
    ai_tldr: Mapped[str | None] = mapped_column(Text)                          # ≤60자 TL;DR
    ai_bullets: Mapped[list | None] = mapped_column(JSONB)                     # 3~5개 핵심 bullets
@@ -0,0 +1,73 @@
+"""legal_acts / legal_meta 테이블 ORM — 법령 레지스트리(워치리스트 겸) + 버전 위성
+
+plan: safety-library-1 (migrations 346~347).
+- legal_acts = 폴링 순회 대상 목록이 곧 테이블 (news_sources 패턴의 법령판).
+  KOSHA GUIDE(비법령)·KGS Code(watch-폴더 단독 트랙)는 비대상.
+- legal_meta = 법령 문서 1버전(또는 별표·해석례 1건)당 1행, documents 1:0..1 위성.
+  version_status 전이는 statute_collector 의 일일 잡이 유일한 코드 지점
+  (전 버전 pending 적재 → 잡이 승격·supersede·repeal 을 한 트랜잭션 처리).
+"""
+
+from datetime import date, datetime
+
+from sqlalchemy import BigInteger, Boolean, Date, DateTime, ForeignKey, Text, UniqueConstraint
+from sqlalchemy.orm import Mapped, mapped_column
+
+from core.database import Base
+
+
+class LegalAct(Base):
+    __tablename__ = "legal_acts"
+
+    # 'kr-law:{법령ID}' / 'us-cfr:29-1910' 형식. KGS 는 시드 비대상 (R3-M5).
+    family_id: Mapped[str] = mapped_column(Text, primary_key=True)
+    # 어댑터 상수 고정값 — 파싱 결과에서 추론 금지 (코어가 적재 직전 assert)
+    jurisdiction: Mapped[str] = mapped_column(Text, nullable=False)
+    # statute(법률) / decree(시행령) / rule(시행규칙·부령) / admin_rule(고시·예규) / code(법정 위임 상세기준)
+    law_level: Mapped[str] = mapped_column(Text, nullable=False)
+    title: Mapped[str] = mapped_column(Text, nullable=False)
+    title_ko: Mapped[str | None] = mapped_column(Text)
+    # 법률 → 시행령 → 시행규칙 계층
+    parent_family_id: Mapped[str | None] = mapped_column(ForeignKey("legal_acts.family_id"))
+    # 법령ID / CFR part / CELEX / e-Gov law_id 등 소스 고유 식별자
+    native_id: Mapped[str] = mapped_column(Text, nullable=False)
+    # 'law.go.kr' / 'ecfr' / 'cellar' / 'egov_v2' / 'leg_gov_uk'
+    source_api: Mapped[str] = mapped_column(Text, nullable=False)
+    # 시드 26개 전부 true — '우선순위'는 정렬일 뿐 watch 제외 아님 (R3-B1)
+    watch: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
+    poll_cycle: Mapped[str] = mapped_column(Text, nullable=False, default="daily")
+    # 변경이력 폴링 워터마크 — 파싱 검증 통과 후에만 영속
+    watermark: Mapped[str | None] = mapped_column(Text)
+    # 어댑터는 폐지 감지 마킹만, repealed 전이는 일일 잡 (R3-M3)
+    repeal_detected_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), default=datetime.now
+    )
+    updated_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), default=datetime.now, onupdate=datetime.now
+    )
+
+
+class LegalMeta(Base):
+    __tablename__ = "legal_meta"
+    __table_args__ = (
+        # 버전 dedup 구조 강제 — annex 는 version_key='MST|별표N' 합성형 (R3-M4)
+        UniqueConstraint("family_id", "law_doc_kind", "version_key", name="uq_legal_meta_version"),
+    )
+
+    document_id: Mapped[int] = mapped_column(
+        BigInteger, ForeignKey("documents.id", ondelete="CASCADE"), primary_key=True
+    )
+    family_id: Mapped[str] = mapped_column(
+        ForeignKey("legal_acts.family_id"), nullable=False
+    )
+    # primary(본문) / annex(별표·서식) / interpretation(해석례)
+    law_doc_kind: Mapped[str] = mapped_column(Text, nullable=False, default="primary")
+    version_key: Mapped[str] = mapped_column(Text, nullable=False)
+    promulgation_date: Mapped[date | None] = mapped_column(Date)
+    effective_date: Mapped[date | None] = mapped_column(Date)
+    # pending → current → superseded / repealed. 전이는 일일 잡 단일 지점, KST 기준.
+    version_status: Mapped[str] = mapped_column(Text, nullable=False, default="pending")
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), default=datetime.now
+    )
@@ -2,7 +2,8 @@

 from datetime import datetime

-from sqlalchemy import Boolean, DateTime, String, Text
+from sqlalchemy import Boolean, DateTime, Enum, Integer, String, Text
+from sqlalchemy.dialects.postgresql import JSONB
 from sqlalchemy.orm import Mapped, mapped_column

 from core.database import Base
@@ -23,3 +24,41 @@ class NewsSource(Base):
    created_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), default=datetime.now
    )
+
+    # ── A-3 (plan crawl-24x7-1) 레지스트리 증축 — migration 319 ──
+    # fetch_method: rss / rss+page / sitemap+page / page / api / signal-only
+    fetch_method: Mapped[str] = mapped_column(String(20), default="rss")
+    # fulltext_policy: none(현행) / page(기사 페이지 fetch 후 4-tier 승격) / feed-full(피드 본문이 전문)
+    fulltext_policy: Mapped[str] = mapped_column(String(20), default="none")
+    # NULL=공개, 값=구독 세션 키 (B-3 Playwright 어댑터 슬롯)
+    auth_profile: Mapped[str | None] = mapped_column(String(50))
+    # 소스별 차등 폴링 (NULL=전역 6h 사이클)
+    poll_interval_minutes: Mapped[int | None] = mapped_column(Integer)
+    # 조건부 GET 워터마크 — 서버가 준 값 그대로 저장·재전송 (A-1)
+    etag: Mapped[str | None] = mapped_column(Text)
+    last_modified: Mapped[str | None] = mapped_column(Text)
+    # CDN ETag 회전 대비 콘텐츠 해시 변경감지 병행 (A-1)
+    feed_content_hash: Mapped[str | None] = mapped_column(String(64))
+    # 추출 실패 잦은 소스의 site-specific CSS selector (A-2)
+    selector_override: Mapped[dict | None] = mapped_column(JSONB)
+    # rdf / table-strip / gn-redirect / skip-video 등 파서 특이 케이스 (B-5)
+    parser_quirk: Mapped[str | None] = mapped_column(String(30))
+    # 채널 — 'news'(다이제스트/브리핑 대상) / 'crawl'(도메인 재료, 0-5 (a)) — migration 324.
+    # documents.source_channel 로 전파, crawl 채널은 embed/chunk 30일 게이트 미적용.
+    # documents 와 동일 PG enum 재사용 (Document 모델과 값 목록 동기 유지).
+    source_channel: Mapped[str] = mapped_column(
+        Enum("law_monitor", "devonagent", "email", "web_clip",
+             "tksafety", "inbox_route", "manual", "drive_sync", "news", "memo",
+             "voice", "hermes", "crawl",
+             name="source_channel"),
+        default="news",
+    )
+
+    # ── 안전 자료실 분류 축 (plan safety-library-1 A-2, migrations 352~355) ──
+    # 자료유형 기본값 — documents.material_type 으로 ingest 시점 전파 (NULL=비대상).
+    # jurisdiction 은 별도 컬럼 없이 country 전파, 단 paper 는 코드에서 NULL 강제.
+    material_type: Mapped[str | None] = mapped_column(Text)
+    # extract_meta.license 주입용 — kogl/ogl/public_domain/proprietary/unknown.
+    # 미확정 = 보수적(unknown + redistribute=false), 근거 확보 시 완화.
+    license_scheme: Mapped[str | None] = mapped_column(Text)
+    license_redistribute: Mapped[bool | None] = mapped_column(Boolean)
@@ -2,14 +2,41 @@

 from datetime import datetime

-from sqlalchemy import BigInteger, DateTime, Enum, ForeignKey, SmallInteger, Text, text
+from sqlalchemy import BigInteger, DateTime, Enum, ForeignKey, SmallInteger, Text, func, or_, text
 from sqlalchemy.dialects.postgresql import JSONB, insert as pg_insert
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.orm import Mapped, mapped_column
+from sqlalchemy.types import TIMESTAMP

 from core.database import Base


+class StageDeferred(Exception):
+    """워커가 '지금은 처리 불가 — 자료 손상 없이 보류' 를 선언하는 신호 (ds-macbook-offload-1).
+
+    맥북(M5 Max) deep 슬롯 경로 전용: 503(upstream_cold/editor_busy/warming) · 연결 실패 ·
+    생성 중 절단(read-timeout, 맥북 sleep) 시 raise. queue_consumer/queue_drain 이 attempts 를
+    소모하지 않고 pending 복귀 + payload.deferred_until 백오프를 기록한다. 결과 쓰기는 호출
+    완주 + 파싱 성공 후에만 일어나므로 어느 시점에 끊겨도 부분 쓰기 0 (sleep-안전 불변식).
+    """
+
+    def __init__(self, reason: str, retry_after_minutes: int = 30):
+        super().__init__(reason)
+        self.retry_after_minutes = retry_after_minutes
+
+
+def not_deferred_condition():
+    """보류 백오프(payload.deferred_until, ISO 문자열) 가 미래인 행을 claim 에서 제외.
+
+    payload 없음 / 키 없음 = 통과. queue_consumer 와 queue_drain 의 claim 이 공유한다.
+    """
+    deferred = ProcessingQueue.payload["deferred_until"].astext
+    return or_(
+        deferred.is_(None),
+        deferred.cast(TIMESTAMP(timezone=True)) <= func.now(),
+    )
+
+
 class ProcessingQueue(Base):
    __tablename__ = "processing_queue"

@@ -18,10 +45,11 @@ class ProcessingQueue(Base):
    stage: Mapped[str] = mapped_column(
        # 'stt' (audio): migration 150 / 'thumbnail' (video): queue_consumer 가 enqueue.
        # 'deep_summary' (PR-B B-1): classify_worker 가 에스컬레이션 시 enqueue.
+        # 'fulltext' (crawl-24x7 A-2): migration 321 — 기사 페이지 fetch 후 본문 승격.
        # DB enum 변경은 마이그레이션이 처리하므로 create_type=False.
        Enum(
            "extract", "classify", "summarize", "embed", "chunk", "preview",
-            "stt", "thumbnail", "deep_summary", "markdown",
+            "stt", "thumbnail", "deep_summary", "markdown", "fulltext",
            name="process_stage",
            create_type=False,
        ),
@@ -0,0 +1,44 @@
+"""source_health 테이블 ORM (A-5, plan crawl-24x7-1)
+
+news_sources 와 1:1. 소스별 fetch 성공/실패 기록 + circuit breaker 상태.
+silent skip 누적 방지의 가시성 기반 — A-8 헬스 패널이 읽는다.
+"""
+
+from datetime import datetime
+
+from sqlalchemy import BigInteger, Boolean, DateTime, ForeignKey, Integer, String, Text
+from sqlalchemy.orm import Mapped, mapped_column
+
+from core.database import Base
+
+
+class SourceHealth(Base):
+    __tablename__ = "source_health"
+
+    id: Mapped[int] = mapped_column(primary_key=True)
+    source_id: Mapped[int] = mapped_column(
+        Integer, ForeignKey("news_sources.id", ondelete="CASCADE"), nullable=False
+    )
+    consecutive_failures: Mapped[int] = mapped_column(Integer, default=0)
+    total_fetches: Mapped[int] = mapped_column(BigInteger, default=0)
+    total_failures: Mapped[int] = mapped_column(BigInteger, default=0)
+    last_success_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
+    last_error: Mapped[str | None] = mapped_column(Text)
+    last_error_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
+    last_fetch_items: Mapped[int | None] = mapped_column(Integer)
+    # 200 인데 entries 0 인 연속 fetch 횟수 (304/해시동일은 미집계 — 피드 부패 신호 전용)
+    empty_streak: Mapped[int] = mapped_column(Integer, default=0)
+    # closed(정상) / open(연속 실패 → 지수 backoff) / disabled(임계 초과, 수동 복구 대상)
+    circuit_state: Mapped[str] = mapped_column(String(10), default="closed")
+    circuit_opened_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
+    updated_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), default=datetime.now
+    )
+
+    # ── B-3 구독 세션 상태 계약 — migration 325 ──
+    # 쓰기 1종 플래그: A-8 버튼이 기록만, 어댑터가 소비(수동 half-open).
+    # 소비 위치 = open-스킵 분기보다 앞 (r5 함정 고정 — 데드 버튼 방지).
+    relogin_requested: Mapped[bool] = mapped_column(Boolean, default=False)
+    # 내용 기반 probe 결과 (시간 기반 만료 판정 금지 — 페이월 안내문 silent corruption 차단)
+    last_probe_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
+    last_probe_ok: Mapped[bool | None] = mapped_column(Boolean)
@@ -17,10 +17,17 @@ python-multipart>=0.0.9
 jinja2>=3.1.0
 feedparser>=6.0.0
 pymupdf>=1.24.0
-# Web/Blog ingest (devonagent 트랙) — HTML 본문 정화 4-tier fallback
-trafilatura>=1.12.0
+# Web/Blog ingest (devonagent 트랙) + 뉴스 fulltext 승격 (crawl-24x7 A-2) — 4-tier fallback.
+# trafilatura 는 단일 메인테이너 리스크로 exact pin (A-2 결정).
+trafilatura==2.1.0
 readability-lxml>=0.8.1
 markdownify>=0.13.1
-# office OOXML(docx/xlsx/pptx) → md (plan ds-s1-backend-1 C-1). hwp 는 LibreOffice+markdownify 경로.
+# tier-4 (bs4) 가 직접 import — 전이 의존 가정 제거 (crawl-24x7 A-2)
+beautifulsoup4>=4.12.0
+# office OOXML(docx/xlsx/pptx) → md (plan ds-s1-backend-1 C-1).
 # 정확한 핀은 E-1 markitdown OOXML PoC(devsbx/버전핀 컨텍스트)에서 확정.
 markitdown[docx,xlsx,pptx]>=0.1.0
+# .hwp(HWP5 binary) → md: 순수 Python HWP5 전용 변환기(CLI hwp5html). LibreOffice 번들 libhwplo
+# 필터가 실제 한컴 HWP5 를 못 읽어 전건 실패 → pyhwp 로 교체(2026-06-09). six = pyhwp 의 미선언 런타임 의존성.
+pyhwp>=0.1b15
+six>=1.16.0
@@ -15,11 +15,12 @@ from sqlalchemy import text

 from core.database import async_session
 from core.utils import setup_logger
+from services.search.license_filter import restricted_exclude_sql

 logger = setup_logger("briefing_loader")


-_NEWS_WINDOW_SQL = text("""
+_NEWS_WINDOW_SQL = text(f"""
    SELECT
        d.id,
        d.title,
@@ -41,6 +42,8 @@ _NEWS_WINDOW_SQL = text("""
      AND d.created_at < :window_end
      AND d.embedding IS NOT NULL
      AND d.ai_summary IS NOT NULL
+      -- 안전 자료실 B-4: licensed_restricted 발행 차단 (digest 와 동일 공유 술어, 경로 일관성)
+      AND {restricted_exclude_sql("d")}
 """)


@@ -49,7 +52,7 @@ _SOURCE_COUNTRY_SQL = text("""
 """)


-_HISTORICAL_CANDIDATES_SQL = text("""
+_HISTORICAL_CANDIDATES_SQL = text(f"""
    SELECT
        d.id,
        d.title,
@@ -63,6 +66,8 @@ _HISTORICAL_CANDIDATES_SQL = text("""
      AND d.created_at < :hist_end
      AND d.embedding IS NOT NULL
      AND d.ai_summary IS NOT NULL
+      -- 안전 자료실 B-4: licensed_restricted 발행 차단 (공유 술어)
+      AND {restricted_exclude_sql("d")}
 """)


@@ -15,11 +15,12 @@ from sqlalchemy import text

 from core.database import async_session
 from core.utils import setup_logger
+from services.search.license_filter import restricted_exclude_sql

 logger = setup_logger("digest_loader")


-_NEWS_WINDOW_SQL = text("""
+_NEWS_WINDOW_SQL = text(f"""
    SELECT
        d.id,
        d.title,
@@ -41,6 +42,9 @@ _NEWS_WINDOW_SQL = text("""
      AND d.created_at < :window_end
      AND d.embedding IS NOT NULL
      AND d.ai_summary IS NOT NULL
+      -- 안전 자료실 B-4: licensed_restricted 발행 차단 (모든 경로 공유 술어 = license_filter).
+      -- news 채널엔 현재 restricted 부재 = 방어적 게이트(미래 유료 news 소스 대비, 경로 누락 방지).
+      AND {restricted_exclude_sql("d")}
 """)


@@ -13,6 +13,7 @@
 from __future__ import annotations
 import re
 import hashlib
+import unicodedata
 from dataclasses import dataclass, field

 STRUCTURE_SPLIT_THRESHOLD = 4000
@@ -27,6 +28,17 @@ _KO_JEOL = re.compile(r'^\s*(?P<title>제\s*\d+\s*절\b.*)$')
 _KO_JO = re.compile(r'^\s*(?P<title>제\s*\d+\s*조\b.*)$')
 _ENG = re.compile(r'^\s*(?P<title>(?:Chapter|Section|Article|Part|PART)\s+[\dIVXLA-Z]+\b.*)$')

+# 코드펜스 경계 (FE outlineAnchors.ts:60 `/^\s{0,3}(```|~~~)/` 와 동일). 펜스 내부 라인은
+# heading 미탐지 — 코드블록 안 '# foo' 가 가짜 절을 만들지 않게(O3).
+_FENCE = re.compile(r'^\s{0,3}(```|~~~)')
+
+
+def _utf16_units(s: str) -> int:
+    """JS 문자열 .length(= UTF-16 code unit 수) 와 동일. astral(BMP 밖)=surrogate pair=2 units.
+    FE 의 `raw.length` / `out.slice(off)` 가 UTF-16 code unit 단위라 char_start 도 같은 단위여야 함.
+    len(s.encode('utf-16-le'))//2 = code unit 수 (utf-16-le 는 BOM 미부착)."""
+    return len(s.encode("utf-16-le")) // 2
+

@dataclass
 class HierNode:
@@ -39,6 +51,9 @@ class HierNode:
    text: str
    is_leaf: bool = True
    chunk_content_hash: str = field(default="")
+    # md_content 내 heading 라인 시작 offset(UTF-16 code unit). jump-target(비-window leaf / %_split parent)만
+    # 값 보유; window-child / preamble(title None) = None(점프 타깃 아님, g0-t2/g2-t3).
+    char_start: int | None = None

    def finalize_hash(self):
        self.chunk_content_hash = hashlib.sha256(self.text.encode("utf-8")).hexdigest()
@@ -57,33 +72,64 @@ def _detect_heading(line: str) -> tuple[int, str, str] | None:
    return None


-def _segment(text: str) -> list[tuple[int, str | None, str | None, str]]:
-    """heading 경계로 분할 → [(level, title, node_type, segment_text), ...].
+def _segment(text: str) -> list[tuple[int, str | None, str | None, str, int | None]]:
+    """heading 경계로 분할 → [(level, title, node_type, segment_text, char_start), ...].

-    preamble(첫 heading 이전 본문) = (0, None, None, text).
+    라인 모델 = FE outlineAnchors.ts:55-65 와 동일: `text.split('\n')` + UTF-16 code-unit offset +
+    코드펜스 추적(splitlines(keepends=True) 폐기 — JS 와 라인경계 \v\f\x1c… 7종을 다르게 쪼개는 문제 제거).
+    char_start = 그 segment 첫 라인(=heading 라인)의 UTF-16 offset. preamble = None(점프 타깃 아님).
+    node.text 보존(라인모델 변경에 hash-neutral): 그룹을 '\n'.join 하되 마지막 그룹이 아니면 분리용 '\n'
+      을 그 그룹 끝에 되돌려 붙여(= splitlines(keepends) 가 마지막 라인에 \n 을 남기던 동작) 원문과 동일.
+    CR 미strip(CRLF 면 '\r' 잔류 → FE raw.length 와 동일), NFC 무변환.
    """
-    lines = text.splitlines(keepends=True)
-    segs: list[tuple[int, str | None, str | None, list[str]]] = []
-    cur: tuple[int, str | None, str | None, list[str]] | None = None
-    preamble: list[str] = []
-    for ln in lines:
-        h = _detect_heading(ln.rstrip("\n"))
-        if h:
-            if cur is not None:
-                segs.append(cur)
-            elif preamble and "".join(preamble).strip():
-                segs.append((0, None, None, preamble))
-            cur = (h[0], h[1], h[2], [ln])
+    raw_lines = text.split("\n")
+    n = len(raw_lines)
+    # 라인별 (offset, heading) 선계산 — 펜스 내부/경계 라인은 heading 미탐지.
+    offs: list[int] = []
+    headings: list[tuple[int, str, str | None] | None] = []
+    off = 0
+    in_fence = False
+    for raw in raw_lines:
+        fence_toggle = bool(_FENCE.match(raw))
+        fenced_here = in_fence or fence_toggle
+        offs.append(off)
+        headings.append(None if fenced_here else _detect_heading(raw))
+        if fence_toggle:
+            in_fence = not in_fence
+        off += _utf16_units(raw) + 1  # '\n'
+
+    # 그룹 경계 = 첫 heading 이전(preamble) + 각 heading 라인. (start_idx, meta) 리스트.
+    first_heading = next((i for i in range(n) if headings[i] is not None), None)
+    starts: list[int] = []
+    metas: list[tuple[int, str | None, str | None] | None] = []
+    if first_heading is None:
+        starts.append(0)
+        metas.append(None)  # 전체 = preamble
+    else:
+        if first_heading > 0:
+            starts.append(0)
+            metas.append(None)
+        for i in range(first_heading, n):
+            h = headings[i]
+            if h is not None:
+                starts.append(i)
+                metas.append((h[0], h[1], h[2]))
+
+    segs: list[tuple[int, str | None, str | None, str, int | None]] = []
+    for gi, s_idx in enumerate(starts):
+        e_idx = starts[gi + 1] if gi + 1 < len(starts) else n
+        seg_text = "\n".join(raw_lines[s_idx:e_idx])
+        if e_idx < n:
+            seg_text += "\n"  # 분리용 '\n' 을 앞 그룹에 귀속(splitlines keepends 동치)
+        meta = metas[gi]
+        if meta is None:
+            if not seg_text.strip():  # 빈 preamble 폐기(기존 동작)
+                continue
+            segs.append((0, None, None, seg_text, None))
        else:
-            if cur is None:
-                preamble.append(ln)
-            else:
-                cur[3].append(ln)
-    if cur is not None:
-        segs.append(cur)
-    elif preamble and "".join(preamble).strip():
-        segs.append((0, None, None, preamble))
-    return [(lvl, title, nt, "".join(body)) for (lvl, title, nt, body) in segs]
+            lvl, title, nt = meta
+            segs.append((lvl, title, nt, seg_text, offs[s_idx]))
+    return segs


 def _window_split(body: str, target: int) -> list[str]:
@@ -139,7 +185,7 @@ def build_hier_tree(
            chain.append(title)
        return " > ".join(chain) if chain else None

-    for lvl, title, nt, body in segs:
+    for lvl, title, nt, body, cstart in segs:
        norm = 0 if lvl == 0 else min(level_map[lvl], max_depth)
        # 부모 = 스택에서 norm 보다 작은 가장 가까운 노드
        while stack and stack[-1][0] >= norm:
@@ -147,8 +193,11 @@ def build_hier_tree(
        parent_idx = stack[-1][1] if stack else None
        idx = len(nodes)
        hp = _heading_path(parent_idx, title)
+        # char_start = 생성 시점 할당(window-split 가 n.text 를 heading 라인으로 truncate 하기 전에 박제).
+        # split-parent 가 돼도 이 값(heading 라인 offset)이 windowed section 단일 jump target 으로 보존된다.
        node = HierNode(idx=idx, parent_idx=parent_idx, level=norm, node_type=nt,
-                        section_title=title, heading_path=hp, text=body, is_leaf=True)
+                        section_title=title, heading_path=hp, text=body, is_leaf=True,
+                        char_start=cstart)
        nodes.append(node)
        if norm > 0:
            stack.append((norm, idx))
@@ -178,14 +227,17 @@ def build_hier_tree(
                n.is_leaf = False
                heading_line = (n.text.splitlines() or [""])[0]
                n.text = heading_line  # 중복 저장 회피 (full body 는 window child 가 보유)
-                n.node_type = (n.node_type or "section") + "_split"
+                n.node_type = (n.node_type or "section") + "_split"  # chapter_split/clause_split/section_split
+                # n.char_start 보존 = windowed section 의 단일 jump target(생성시점 heading offset).
                base_level = min(n.level + 1, max_depth)
                for wtext in wins:
                    ci = len(final)
+                    # window child = char_start None(_window_split 가 whitespace buf 를 drop 해
+                    # char-preserving 이 아니므로 합산 offset 이 거짓; 점프 타깃도 아님, B1/#1).
                    final.append(HierNode(
                        idx=ci, parent_idx=n.idx, level=base_level, node_type="window",
                        section_title=n.section_title, heading_path=n.heading_path,
-                        text=wtext, is_leaf=True))
+                        text=wtext, is_leaf=True, char_start=None))
    for n in final:
        n.finalize_hash()
    return final
@@ -209,6 +261,24 @@ def coverage_stats(text: str, nodes: list[HierNode]) -> dict:
            # 일반 네비: 자식 level > 부모 level 만 보장
            if n.level <= nodes[n.parent_idx].level and nodes[n.parent_idx].level > 0:
                bad_level += 1
+    # char_start O5 검증 (UTF-16 슬라이스 == heading 라인) + NFC telemetry (g2-t4).
+    # 검증은 FE 가 실제 쓰는 방식과 동일: md.encode('utf-16-le')[2*cs:2*(cs+n)].decode == heading_line
+    # (Python code-point 슬라이스 md[cs:cs+n] 가 아님 — astral 시 어긋남).
+    md_u16 = text.encode("utf-16-le")
+    cs_total = cs_verified = 0
+    for n in nodes:
+        if n.char_start is None:
+            continue
+        cs_total += 1
+        first_line = n.text.split("\n", 1)[0]
+        nu = _utf16_units(first_line)
+        seg = md_u16[2 * n.char_start: 2 * (n.char_start + nu)]
+        try:
+            if seg.decode("utf-16-le") == first_line:
+                cs_verified += 1
+        except UnicodeDecodeError:
+            pass
+    non_nfc = 1 if unicodedata.normalize("NFC", text) != text else 0
    return {
        "nodes": len(nodes), "leaves": len(leaves),
        "coverage_ratio": round(leaf_chars / base, 4) if base else 0,
@@ -217,4 +287,6 @@ def coverage_stats(text: str, nodes: list[HierNode]) -> dict:
        "level_dist": {l: sum(1 for n in nodes if n.level == l) for l in sorted({n.level for n in nodes})},
        "leaf_len_min": min((len(n.text) for n in leaves), default=0),
        "leaf_len_max": max((len(n.text) for n in leaves), default=0),
+        "char_start_total": cs_total, "char_start_verified": cs_verified,
+        "non_nfc": non_nfc,
    }
@@ -58,16 +58,16 @@ async def persist_hier_tree(
            INSERT INTO document_chunks
              (doc_id, chunk_index, chunk_type, section_title, heading_path, domain_category,
               text, embedding, source_type, chunker_version, chunk_content_hash,
-               parent_id, level, node_type, is_leaf, in_corpus)
+               parent_id, level, node_type, is_leaf, in_corpus, char_start)
            VALUES (:d, :ci, :ct, :stt, :hp, :dc, :tx,
               cast(cast(:emb AS text) AS vector),
-               :src, :cv, :hash, :pid, :lvl, :nt, :leaf, false)
+               :src, :cv, :hash, :pid, :lvl, :nt, :leaf, false, :cs)
            RETURNING id"""), {
                "d": doc_id, "ci": base + n.idx, "ct": chunk_type,
                "stt": n.section_title, "hp": n.heading_path, "dc": domain_category,
                "tx": n.text, "emb": emb_str, "src": SOURCE_TYPE, "cv": CHUNKER_VERSION,
                "hash": n.chunk_content_hash, "pid": parent_db, "lvl": n.level,
-                "nt": n.node_type, "leaf": n.is_leaf})
+                "nt": n.node_type, "leaf": n.is_leaf, "cs": n.char_start})
        idx_to_dbid[n.idx] = db_id
    await session.commit()

@@ -0,0 +1,524 @@
+"""처리 머신 보드 + ETA 집계 (plan ds-processing-ui-6an, 안2+안5/6).
+
+GET /api/queue/overview 의 집계 로직. 모든 수치는 기존 processing_queue /
+documents 컬럼에서 라이브 계산 — 신규 테이블/마이그레이션 0 (HARD 제약).
+
+구조: SQL 수집부(build_overview 내부 5쿼리)와 판정부(순수 함수)를 분리.
+판정부(rows_to_* / build_machines / build_summarize_eta / build_trend /
+build_totals / compute_eta_minutes)는 DB 없이 단위테스트 가능.
+
+귀속 규칙 (단일 진실):
+- stage→machine 정적 맵: gpu = extract/embed/chunk/markdown/preview/thumbnail/
+  fulltext/stt · macmini = classify/summarize · macbook = deep_summary
+  (단, settings.ai.deep 부재 시 deep_summary 도 macmini 귀속).
+- summarize 는 풀(pool): pending/processing/failed 는 macmini 귀속이되, 완료
+  실적(done_*)은 documents.ai_model_version 조인으로 분리 — 'qwen-macbook'
+  이면 macbook 실적, 아니면 macmini 실적.
+- deferred_pending(payload.deferred_until 미래)은 macbook 카드 귀속
+  (보류 = 맥북 불가 신호).
+"""
+
+from datetime import datetime, timedelta
+from posixpath import basename
+from zoneinfo import ZoneInfo
+
+from sqlalchemy import bindparam, text
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from core.config import settings
+
+KST = ZoneInfo("Asia/Seoul")
+
+# 내부 판별용 alias — 응답에 raw 모델명 노출 금지, 머신 label 만 노출.
+_MACBOOK_MODEL_ALIAS = "qwen-macbook"
+
+# stage→machine 정적 맵 재료 (선언 순서 = 카드 stages 표시 순서)
+_GPU_STAGES = (
+    "extract", "embed", "chunk", "markdown",
+    "preview", "thumbnail", "fulltext", "stt",
+)
+_MACMINI_STAGES = ("classify", "summarize")
+_MACBOOK_STAGES = ("deep_summary",)
+_STAGE_ORDER = _GPU_STAGES + _MACMINI_STAGES + _MACBOOK_STAGES
+
+_MACHINE_KEYS = ("gpu", "macmini", "macbook")
+_MACHINE_LABELS = {
+    "gpu": "GPU 서버",
+    "macmini": "맥미니",
+    "macbook": "맥북 M5 Max",
+}
+
+# 머신 카드당 current 표시 상한
+_CURRENT_LIMIT = 2
+
+
+def stage_machine_map(deep_enabled: bool) -> dict[str, str]:
+    """stage → machine key 맵. deep 슬롯 부재 시 deep_summary 는 macmini 귀속."""
+    mapping: dict[str, str] = {}
+    for s in _GPU_STAGES:
+        mapping[s] = "gpu"
+    for s in _MACMINI_STAGES:
+        mapping[s] = "macmini"
+    for s in _MACBOOK_STAGES:
+        mapping[s] = "macbook" if deep_enabled else "macmini"
+    return mapping
+
+
+def _zero_stage() -> dict:
+    return {
+        "pending": 0, "processing": 0, "failed": 0,
+        "done_1h": 0, "done_today": 0, "done_15m": 0,
+        "deferred_pending": 0, "created_1h": 0, "oldest_pending_at": None,
+    }
+
+
+def rows_to_stage_stats(rows) -> dict[str, dict]:
+    """stage×status 집계 쿼리 행 → {stage: {pending, ..., created_1h}} 변환."""
+    stats: dict[str, dict] = {}
+    for row in rows:
+        stats[row[0]] = {
+            "pending": int(row[1] or 0),
+            "processing": int(row[2] or 0),
+            "failed": int(row[3] or 0),
+            "done_1h": int(row[4] or 0),
+            "done_today": int(row[5] or 0),
+            "done_15m": int(row[6] or 0),
+            "deferred_pending": int(row[7] or 0),
+            "created_1h": int(row[8] or 0),
+            "oldest_pending_at": row[9] if len(row) > 9 else None,
+        }
+    return stats
+
+
+def rows_to_summarize_split(rows) -> dict[str, dict]:
+    """summarize 완료 실적 분리 쿼리 행 → {"macbook"|"macmini": {done_*}}.
+
+    is_macbook = documents.ai_model_version 이 'qwen-macbook' 인지 (내부 판별 전용).
+    """
+    split = {
+        "macbook": {"done_1h": 0, "done_today": 0, "done_15m": 0},
+        "macmini": {"done_1h": 0, "done_today": 0, "done_15m": 0},
+    }
+    for row in rows:
+        key = "macbook" if row[0] else "macmini"
+        split[key]["done_1h"] += int(row[1] or 0)
+        split[key]["done_today"] += int(row[2] or 0)
+        split[key]["done_15m"] += int(row[3] or 0)
+    return split
+
+
+def display_title(row: dict) -> str:
+    """표시용 제목 — title > original_filename > file_path basename > 문서 id."""
+    if row.get("title"):
+        return row["title"]
+    if row.get("original_filename"):
+        return row["original_filename"]
+    if row.get("file_path"):
+        return basename(row["file_path"].rstrip("/"))
+    return f"문서 #{row['document_id']}"
+
+
+def build_machines(
+    stage_stats: dict[str, dict],
+    summarize_split: dict[str, dict],
+    current_rows: list[dict],
+    *,
+    deep_enabled: bool,
+) -> list[dict]:
+    """머신 카드 3장 (gpu / macmini / macbook) 구성 — 귀속 규칙의 판정부."""
+    smap = stage_machine_map(deep_enabled)
+
+    def g(stage: str, field: str) -> int:
+        return stage_stats.get(stage, {}).get(field, 0)
+
+    # current 귀속: processing 행을 머신별 최대 2건 (summarize processing → macmini)
+    current_by_machine: dict[str, list[dict]] = {k: [] for k in _MACHINE_KEYS}
+    for row in current_rows:
+        machine = smap.get(row["stage"])
+        if machine and len(current_by_machine[machine]) < _CURRENT_LIMIT:
+            current_by_machine[machine].append({
+                "document_id": row["document_id"],
+                "title": display_title(row),
+                "stage": row["stage"],
+            })
+
+    machines = []
+    for key in _MACHINE_KEYS:
+        stages = [s for s in _STAGE_ORDER if smap[s] == key]
+
+        pending = sum(g(s, "pending") for s in stages)
+        processing = sum(g(s, "processing") for s in stages)
+        failed = sum(g(s, "failed") for s in stages)
+
+        # 완료 실적: summarize 는 풀이라 stage 합산에서 제외하고 split 로 귀속
+        done_1h = sum(g(s, "done_1h") for s in stages if s != "summarize")
+        done_today = sum(g(s, "done_today") for s in stages if s != "summarize")
+        done_15m = sum(g(s, "done_15m") for s in stages if s != "summarize")
+        if key in summarize_split:
+            done_1h += summarize_split[key]["done_1h"]
+            done_today += summarize_split[key]["done_today"]
+            done_15m += summarize_split[key]["done_15m"]
+
+        # 보류 백오프 = 맥북 불가 신호 → macbook 카드 귀속 (deep 슬롯 유무 무관)
+        deferred_pending = (
+            g("summarize", "deferred_pending") + g("deep_summary", "deferred_pending")
+            if key == "macbook" else 0
+        )
+
+        # state 판정 — 우선순위: 가동 > 보류 > 대기 (사용자 피드백 2026-06-11).
+        # 일하고 있으면(처리 중 또는 최근 15분 완료) 백오프 잔여가 있어도 "가동" —
+        # 보류 건수는 카드의 deferred_pending 라인이 따로 보여준다. "보류" 칩은
+        # 실제로 일이 멈춰 있고 백오프만 쌓인 상태(sleep/불가 지속)에서만.
+        if processing > 0 or done_15m > 0:
+            state = "active"
+        elif key == "macbook" and deferred_pending > 0:
+            state = "deferred"
+        else:
+            state = "idle"
+
+        machines.append({
+            "key": key,
+            "label": _MACHINE_LABELS[key],
+            "state": state,
+            "stages": stages,
+            "pending": pending,
+            "processing": processing,
+            "failed": failed,
+            "done_1h": done_1h,
+            "done_today": done_today,
+            "deferred_pending": deferred_pending,
+            "current": current_by_machine[key],
+        })
+    return machines
+
+
+def compute_eta_minutes(pending: int, done_1h: int, inflow_1h: int) -> int | None:
+    """ETA(분) = 순소화율 기반. done > inflow 일 때만 산출, 아니면 None (소화 불가)."""
+    if done_1h > inflow_1h:
+        return round(pending / (done_1h - inflow_1h) * 60)
+    return None
+
+
+def build_summarize_eta(stage_stats: dict[str, dict]) -> dict:
+    """summarize 풀 ETA — pending 은 보류(deferred) 포함 총수."""
+    s = stage_stats.get("summarize", _zero_stage())
+    pending = s["pending"]
+    done_rate = s["done_1h"]
+    inflow_rate = s["created_1h"]
+    return {
+        "pending": pending,
+        "done_rate_1h": done_rate,
+        "inflow_rate_1h": inflow_rate,
+        "eta_minutes": compute_eta_minutes(pending, done_rate, inflow_rate),
+    }
+
+
+def build_summarize_by_machine(summarize_split: dict[str, dict]) -> dict:
+    """summarize 머신별 완료 실적 분담 (macmini vs macbook) — 보드 레인의
+    오프로드 가시화용. rows_to_summarize_split 이 이미 만든 값을 응답 형태로
+    투영(done_1h/done_today 만, done_15m 은 내부 state 판정 전용이라 제외)."""
+    def m(key: str) -> dict:
+        s = summarize_split.get(key, {})
+        return {"done_1h": int(s.get("done_1h", 0)), "done_today": int(s.get("done_today", 0))}
+    return {"macmini": m("macmini"), "macbook": m("macbook")}
+
+
+def build_trend(
+    inflow_buckets: dict[str, int],
+    done_buckets: dict[str, int],
+    now_kst: datetime,
+) -> list[dict]:
+    """summarize 24h 추이 — KST 시간 버킷 24개 (오래된 것부터, 빈 버킷 0).
+
+    버킷 key = "YYYY-MM-DD HH:00" (KST). SQL to_char 출력과 동일 포맷.
+    """
+    base = now_kst.replace(minute=0, second=0, microsecond=0)
+    trend = []
+    for i in range(23, -1, -1):
+        bucket = base - timedelta(hours=i)
+        key = bucket.strftime("%Y-%m-%d %H:00")
+        trend.append({
+            "hour": bucket.strftime("%H:00"),
+            "inflow": inflow_buckets.get(key, 0),
+            "done": done_buckets.get(key, 0),
+        })
+    return trend
+
+
+def build_stages(stage_stats: dict[str, dict], now=None) -> list[dict]:
+    """단계별 현황 행 — '단계 상세' 패널용 (2026-06-11 사용자 피드백: 완료가 보여야 한다).
+
+    파이프라인 순서 유지, 미지 stage 는 뒤에. 숨김/강조 판단은 FE 몫 — 여기선 사실만.
+    oldest_pending_age_sec = 가장 오래된 pending 의 경과 초 (pending 없으면 None).
+    """
+    from datetime import datetime, timezone
+    now = now or datetime.now(timezone.utc)
+    extra = [s for s in stage_stats if s not in _STAGE_ORDER]
+    rows = []
+    for stage in [*_STAGE_ORDER, *extra]:
+        st = stage_stats.get(stage) or _zero_stage()
+        oldest = st.get("oldest_pending_at")
+        age = None
+        if oldest is not None:
+            if oldest.tzinfo is None:
+                oldest = oldest.replace(tzinfo=timezone.utc)
+            age = max(0, int((now - oldest).total_seconds()))
+        rows.append({
+            "stage": stage,
+            "pending": st["pending"],
+            "processing": st["processing"],
+            "failed": st["failed"],
+            "done_1h": st["done_1h"],
+            "created_1h": st["created_1h"],
+            "done_today": st["done_today"],
+            "oldest_pending_age_sec": age,
+        })
+    return rows
+
+
+def build_totals(stage_stats: dict[str, dict]) -> dict:
+    """전 stage 합계."""
+    return {
+        "pending": sum(s["pending"] for s in stage_stats.values()),
+        "processing": sum(s["processing"] for s in stage_stats.values()),
+        "failed": sum(s["failed"] for s in stage_stats.values()),
+    }
+
+
+def compose_overview(
+    stage_stats: dict[str, dict],
+    summarize_split: dict[str, dict],
+    inflow_buckets: dict[str, int],
+    done_buckets: dict[str, int],
+    current_rows: list[dict],
+    *,
+    deep_enabled: bool,
+    now_kst: datetime,
+) -> dict:
+    """수집된 통계 → 응답 dict (계약 shape). 순수 함수 — DB 불요."""
+    return {
+        "machines": build_machines(
+            stage_stats, summarize_split, current_rows, deep_enabled=deep_enabled
+        ),
+        "stages": build_stages(stage_stats),
+        "summarize_eta": build_summarize_eta(stage_stats),
+        "summarize_by_machine": build_summarize_by_machine(summarize_split),
+        "trend_24h": build_trend(inflow_buckets, done_buckets, now_kst),
+        "totals": build_totals(stage_stats),
+    }
+
+
+# ─── SQL 수집부 (총 5쿼리) ────────────────────────────────────────────────────
+
+# 1) stage×status 집계 + 시간창 완료/유입 + 보류 (1방)
+_STAGE_STATS_SQL = """
+    SELECT
+        stage,
+        COUNT(*) FILTER (WHERE status = 'pending')                          AS pending,
+        COUNT(*) FILTER (WHERE status = 'processing')                       AS processing,
+        COUNT(*) FILTER (WHERE status = 'failed')                           AS failed,
+        COUNT(*) FILTER (WHERE status = 'completed'
+                           AND completed_at > NOW() - INTERVAL '1 hour')    AS done_1h,
+        COUNT(*) FILTER (WHERE status = 'completed'
+                           AND completed_at > :kst_midnight)                AS done_today,
+        COUNT(*) FILTER (WHERE status = 'completed'
+                           AND completed_at > NOW() - INTERVAL '15 minutes') AS done_15m,
+        COUNT(*) FILTER (WHERE status = 'pending'
+                           AND payload ->> 'deferred_until' IS NOT NULL
+                           AND (payload ->> 'deferred_until')::timestamptz > NOW())
+                                                                            AS deferred_pending,
+        COUNT(*) FILTER (WHERE created_at > NOW() - INTERVAL '1 hour')      AS created_1h,
+        MIN(created_at) FILTER (WHERE status = 'pending')                    AS oldest_pending_at
+    FROM processing_queue
+    GROUP BY stage
+"""
+
+# 2) summarize 풀 완료 실적 분리 (documents.ai_model_version 조인, 1방)
+#    스캔 하한 = 오늘 0시(KST)와 1h 전 중 더 이른 시각 (자정 직후 1h 창 보전).
+_SUMMARIZE_SPLIT_SQL = """
+    SELECT
+        COALESCE(d.ai_model_version = :macbook_alias, false)                 AS is_macbook,
+        COUNT(*) FILTER (WHERE q.completed_at > NOW() - INTERVAL '1 hour')   AS done_1h,
+        COUNT(*) FILTER (WHERE q.completed_at > :kst_midnight)               AS done_today,
+        COUNT(*) FILTER (WHERE q.completed_at > NOW() - INTERVAL '15 minutes') AS done_15m
+    FROM processing_queue q
+    JOIN documents d ON d.id = q.document_id
+    WHERE q.stage = 'summarize'
+      AND q.status = 'completed'
+      AND q.completed_at > LEAST(:kst_midnight, NOW() - INTERVAL '1 hour')
+    GROUP BY 1
+"""
+
+# 3/4) summarize 24h 추이 — KST 시간 버킷 (inflow/done 각 1방)
+_TREND_INFLOW_SQL = """
+    SELECT to_char(date_trunc('hour', created_at AT TIME ZONE 'Asia/Seoul'),
+                   'YYYY-MM-DD HH24:00')                                     AS bucket,
+           COUNT(*)                                                          AS n
+    FROM processing_queue
+    WHERE stage = 'summarize'
+      AND created_at > NOW() - INTERVAL '24 hours'
+    GROUP BY 1
+"""
+
+_TREND_DONE_SQL = """
+    SELECT to_char(date_trunc('hour', completed_at AT TIME ZONE 'Asia/Seoul'),
+                   'YYYY-MM-DD HH24:00')                                     AS bucket,
+           COUNT(*)                                                          AS n
+    FROM processing_queue
+    WHERE stage = 'summarize'
+      AND status = 'completed'
+      AND completed_at > NOW() - INTERVAL '24 hours'
+    GROUP BY 1
+"""
+
+# 5) processing 행 + 표시용 제목 재료 (1방 — 머신별 2건 슬라이스는 판정부에서)
+_CURRENT_SQL = """
+    SELECT q.stage, q.document_id, d.title, d.original_filename, d.file_path
+    FROM processing_queue q
+    JOIN documents d ON d.id = q.document_id
+    WHERE q.status = 'processing'
+    ORDER BY q.started_at DESC NULLS LAST
+    LIMIT 50
+"""
+
+
+async def build_overview(session: AsyncSession) -> dict:
+    """5쿼리 수집 → compose_overview 판정 → 응답 dict."""
+    now_kst = datetime.now(KST)
+    kst_midnight = now_kst.replace(hour=0, minute=0, second=0, microsecond=0)
+    deep_enabled = settings.ai is not None and settings.ai.deep is not None
+
+    stage_rows = (
+        await session.execute(text(_STAGE_STATS_SQL), {"kst_midnight": kst_midnight})
+    ).all()
+    split_rows = (
+        await session.execute(
+            text(_SUMMARIZE_SPLIT_SQL),
+            {"kst_midnight": kst_midnight, "macbook_alias": _MACBOOK_MODEL_ALIAS},
+        )
+    ).all()
+    inflow_rows = (await session.execute(text(_TREND_INFLOW_SQL))).all()
+    done_rows = (await session.execute(text(_TREND_DONE_SQL))).all()
+    current_result = (await session.execute(text(_CURRENT_SQL))).all()
+
+    current_rows = [
+        {
+            "stage": row[0],
+            "document_id": row[1],
+            "title": row[2],
+            "original_filename": row[3],
+            "file_path": row[4],
+        }
+        for row in current_result
+    ]
+
+    return compose_overview(
+        rows_to_stage_stats(stage_rows),
+        rows_to_summarize_split(split_rows),
+        {row[0]: int(row[1]) for row in inflow_rows},
+        {row[0]: int(row[1]) for row in done_rows},
+        current_rows,
+        deep_enabled=deep_enabled,
+        now_kst=now_kst,
+    )
+
+
+# ─── 실패 처리 (plan ds-board-engines-1) ─────────────────────────────────────
+# 실패 = 자동 재시도(max_attempts=3) 소진 후 영구 정지 상태. 여기 함수들은
+# 사용자 명시 조치 전용 — 자동 호출 경로 없음 (보드 실패 드로어가 유일 호출자).
+
+# 실패 행은 completed_at 이 비어 있을 수 있어(소비자 실패 경로가 미기록)
+# started_at 을 시각 fallback 으로 쓴다.
+_FAILED_LIST_SQL = """
+    SELECT q.id, q.stage, q.document_id, q.attempts, q.max_attempts,
+           q.error_message,
+           COALESCE(q.completed_at, q.started_at)                 AS failed_at,
+           d.title, d.original_filename, d.file_path
+    FROM processing_queue q
+    JOIN documents d ON d.id = q.document_id
+    WHERE q.status = 'failed'
+    ORDER BY q.stage, COALESCE(q.completed_at, q.started_at) DESC NULLS LAST
+    LIMIT 300
+"""
+
+# 재시도: failed → pending (attempts 리셋 = 자동 재시도 3회 새로 부여).
+# error_message 는 감사용으로 보존 — 성공 시 완료 행에 남아도 무해.
+# uq_queue_active((doc,stage) pending/processing 부분 유니크)와 충돌하는 행 —
+# 같은 문서·단계가 이미 재enqueue 된 경우 — 는 건드리지 않고 건수만 보고.
+_RETRY_SQL = """
+    UPDATE processing_queue q
+    SET status = 'pending', attempts = 0,
+        started_at = NULL, completed_at = NULL
+    WHERE q.id IN :ids
+      AND q.status = 'failed'
+      AND NOT EXISTS (
+        SELECT 1 FROM processing_queue p
+        WHERE p.document_id = q.document_id
+          AND p.stage = q.stage
+          AND p.status IN ('pending', 'processing')
+          AND p.id <> q.id
+      )
+    RETURNING q.id
+"""
+
+# 건너뛰기: failed → completed + payload 마킹 (감사 추적).
+# enqueue_next_stage 는 의도적으로 호출하지 않는다 — 실패 문서(빈 텍스트 등)가
+# 하류 단계로 흘러가는 것 방지. 후속 단계가 필요하면 재시도가 정상 경로.
+_SKIP_SQL = """
+    UPDATE processing_queue
+    SET status = 'completed', completed_at = NOW(),
+        payload = COALESCE(payload, '{}'::jsonb)
+                  || jsonb_build_object('skipped_by_user', true,
+                                        'skipped_at', NOW()::text)
+    WHERE id IN :ids AND status = 'failed'
+    RETURNING id
+"""
+
+
+async def fetch_failed_items(session: AsyncSession) -> list[dict]:
+    """영구 실패 행 목록 (문서 제목 포함, 최대 300건)."""
+    rows = (await session.execute(text(_FAILED_LIST_SQL))).all()
+    return [
+        {
+            "id": r[0],
+            "stage": r[1],
+            "document_id": r[2],
+            "attempts": int(r[3] or 0),
+            "max_attempts": int(r[4] or 0),
+            "error_message": r[5],
+            "failed_at": r[6],
+            "title": display_title({
+                "document_id": r[2],
+                "title": r[7],
+                "original_filename": r[8],
+                "file_path": r[9],
+            }),
+        }
+        for r in rows
+    ]
+
+
+async def retry_failed(session: AsyncSession, ids: list[int]) -> dict:
+    """failed → pending 복귀. not_retried = active 충돌 + 이미 failed 아님."""
+    unique_ids = list(set(ids))
+    stmt = text(_RETRY_SQL).bindparams(bindparam("ids", expanding=True))
+    retried = (await session.execute(stmt, {"ids": unique_ids})).all()
+    await session.commit()
+    return {
+        "requested": len(unique_ids),
+        "retried": len(retried),
+        "not_retried": len(unique_ids) - len(retried),
+    }
+
+
+async def skip_failed(session: AsyncSession, ids: list[int]) -> dict:
+    """failed → completed(건너뛰기 마킹). 후속 단계 연쇄 없음."""
+    unique_ids = list(set(ids))
+    stmt = text(_SKIP_SQL).bindparams(bindparam("ids", expanding=True))
+    skipped = (await session.execute(stmt, {"ids": unique_ids})).all()
+    await session.commit()
+    return {
+        "requested": len(unique_ids),
+        "skipped": len(skipped),
+        "not_skipped": len(unique_ids) - len(skipped),
+    }
@@ -72,6 +72,10 @@ class LegacyWeightedSum(FusionStrategy):
                    score=existing.score + r.score * 0.5,
                    snippet=existing.snippet,
                    match_reason=f"{existing.match_reason}+vector",
+                    # C-1: 분류 축 메타 전파 (재구성 시 누락 = D-1 유형 표시 None)
+                    material_type=existing.material_type,
+                    jurisdiction=existing.jurisdiction,
+                    published_date=existing.published_date,
                )
            elif r.score > 0.3:
                merged[r.id] = r
@@ -128,6 +132,10 @@ class RRFOnly(FusionStrategy):
                    score=rrf_score,
                    snippet=base.snippet,
                    match_reason="+".join(reasons),
+                    # C-1: 분류 축 메타 전파 (재구성 시 누락 = D-1 유형 표시 None)
+                    material_type=base.material_type,
+                    jurisdiction=base.jurisdiction,
+                    published_date=base.published_date,
                )
            )
        return merged[:limit]
@@ -0,0 +1,28 @@
+"""안전 자료실 B-4 — licensed_restricted 단일 술어 (a안 U-2①, 모든 경로 공유 정의).
+
+색인은 허용하되 restricted=true(구매 전자책·유료자료)의 verbatim span 이 RAG 증거·발행물
+(검색/ask·digest·morning_briefing·study 풀이)에 들어가는 모든 경로를 구조적으로 차단.
+경로마다 술어를 복붙하지 않고 이 한 정의를 공유 — 가드 누락/드리프트 방지
+([[feedback_structural_integrity_over_path_discipline]]).
+개인 파일 열람(GET /documents/{id}?download)은 a안상 허용 = 미적용.
+
+두 표현(raw SQL / ORM)은 의미 동일: restricted 부재·false·extract_meta NULL = COALESCE 로
+미제외(redistribute=false 여도 restricted 부재면 미제외 — redistribute≠restricted 가 핵심).
+"""
+
+
+def restricted_exclude_sql(alias: str = "") -> str:
+    """raw text() 쿼리용 bare 술어('AND' 미포함). alias='' = 컬럼 직접 참조."""
+    p = (alias + ".") if alias else ""
+    return f"COALESCE({p}extract_meta -> 'license' ->> 'restricted', 'false') <> 'true'"
+
+
+def restricted_exclude_orm():
+    """SQLAlchemy ORM .where() 절 — restricted_exclude_sql 과 동일 의미(JSONB extract_meta)."""
+    from sqlalchemy import func
+
+    from models.document import Document
+
+    return func.coalesce(
+        Document.extract_meta["license"]["restricted"].astext, "false"
+    ) != "true"
@@ -26,8 +26,11 @@ PR-MacBook-RAG-Backend-1 부터 `services.llm.QwenMacBookBackend` 는 별 endpoi
 - **fallback(Claude Sonnet 4 API) 경로는 gate 제외**. PR #20 이후 fallback = Claude API. 단 현재
  구현상 `AIClient._call_chat` 내부에서 primary→fallback 전환이 일어나므로
  fallback도 gate 점유 상태로 실행된다. 허용 가능(fallback 빈도 낮음).
- **MLX concurrency는 `MLX_CONCURRENCY = 1` 고정**. 모델이 바뀌어도 single-
-  inference 특성이 깨지지 않는 한 이 값을 올리지 말 것.
+- ~~**MLX concurrency는 `MLX_CONCURRENCY = 1` 고정**~~ → **2026-06-12 개정**:
+  구 룰의 전제(서버 = single-inference)가 소멸 — 현 mlx_vlm server 는 continuous
+  batching 으로 동시 스트림 흡수(실측). 상한은 config `pipeline.mlx_gate_concurrency`
+  (기본 1, 운영 2). **게이트 자체(상한+우선순위 큐)는 영구 유지** — thundering herd
+  (23 concurrent → 22 timeout 사고) 방지는 계속 이 상한이 담당. 무제한 금지.

 ## 우선순위 정책 (B-1, 2026-05-17)

@@ -80,8 +83,22 @@ from core.utils import setup_logger

 logger = setup_logger("llm_gate")

-# MLX primary는 single-inference → 1
-MLX_CONCURRENCY = 1
+
+def _capacity() -> int:
+    """게이트 동시 실행 상한 — config.yaml `pipeline.mlx_gate_concurrency` (기본 1).
+
+    2026-06-12 일반화: "MLX_CONCURRENCY = 1 고정" 영구 룰의 전제(구 서버 = single-
+    inference, 23 concurrent → 22 timeout 실측)가 소멸 — 현 mlx_vlm server 는
+    continuous batching 으로 동시 스트림을 흡수(2026-06-11 밤 6~8 concurrent 실측
+    정상). 게이트 자체(상한 + 우선순위)는 유지하고 상한만 config 로 — thundering
+    herd 재발 방지는 이 상한이 계속 담당한다. 런타임 매 acquire 시 조회라
+    config 변경 + 프로세스 재기동으로 반영, 테스트는 settings monkeypatch.
+    """
+    from core.config import settings
+    try:
+        return max(1, int(getattr(settings, "mlx_gate_concurrency", 1)))
+    except (TypeError, ValueError):
+        return 1

 # Background waiter wait_ms 가 이 값 초과 시 WARN (starvation 신호, aging mitigation 은 Phase 2)
 STARVATION_WARN_MS = 300_000  # 5 min
@@ -101,7 +118,7 @@ DEFAULT_PRIORITY: Priority = Priority.BACKGROUND
 # Tuple format: (priority: int, seq: int, future: asyncio.Future, enqueue_ts: float)
 _waiters: list[tuple[int, int, asyncio.Future, float]] = []
 _seq = itertools.count()
-_inflight: bool = False
+_inflight_n: int = 0  # 동시 실행 수 (구 bool — capacity 일반화로 카운터)
 _lock: asyncio.Lock | None = None


@@ -143,7 +160,7 @@ async def acquire_mlx_gate(

    ⚠ `asyncio.timeout` 은 반드시 gate 안쪽 (Future await 후) 에 둘 것.
    """
-    global _inflight, _waiters
+    global _inflight_n, _waiters

    lock = _get_lock()
    seq = next(_seq)
@@ -152,9 +169,9 @@ async def acquire_mlx_gate(
    fut: asyncio.Future | None = None

    async with lock:
-        if not _inflight and not _waiters:
+        if _inflight_n < _capacity() and not _waiters:
            # fast path — 즉시 inflight 진입, Future 생성 안 함
-            _inflight = True
+            _inflight_n += 1
        else:
            # 대기열 진입
            fut = asyncio.get_event_loop().create_future()
@@ -194,8 +211,8 @@ async def acquire_mlx_gate(
        async with lock:
            next_fut = _dispatch_next_locked()
            if next_fut is None:
-                _inflight = False
-            # _inflight 는 True 유지 (다음 waiter 가 진입 예정)
+                _inflight_n = max(0, _inflight_n - 1)
+            # next_fut 가 있으면 슬롯 handover — 카운트 유지 (다음 waiter 가 진입 예정)
        logger.debug(
            "mlx_gate release duration_ms=%.0f priority=%s seq=%d",
            duration_ms, priority.name, seq,
@@ -222,13 +239,24 @@ def get_mlx_gate():
    return acquire_mlx_gate(DEFAULT_PRIORITY)


+# ── Read-only status (UI 표시용) ─────────────────────────────────────────────
+
+
+def gate_status() -> dict:
+    """현재 gate 점유 스냅샷 (read-only, lock-free 근사치 — UI 표시용).
+
+    inflight = 동시 실행 수(int). 기존 소비자(eid status)는 bool() 캐스팅이라 호환.
+    """
+    return {"inflight": _inflight_n, "waiters": len(_waiters)}
+
+
 # ── Test helpers (conftest reset) ────────────────────────────────────────────


 def _reset_for_test() -> None:
    """테스트 fixture 가 fresh loop 마다 호출. production code 에서 사용 X."""
-    global _waiters, _inflight, _lock, _seq
+    global _waiters, _inflight_n, _lock, _seq
    _waiters = []
-    _inflight = False
+    _inflight_n = 0
    _lock = None
    _seq = itertools.count()
@@ -0,0 +1,55 @@
+"""안전 자료실 C-1 후속 — 검색 결과 wrapper decoration (version_status + facets).
+
+엔드포인트 wrapper 에서 run_search() 결과에 1회 적용 — 검색 코어(run_search) 무접촉(r3).
+- version_status: 법령 결과(material_type='law')에 legal_meta.version_status
+  (current/superseded/pending/repealed) 부착. legal_meta.document_id 1:0..1 위성 →
+  매핑 없는 law(레거시 등)는 None 유지. law 결과 없으면 query skip.
+- facets: top-K 결과 내 분류 축(material_type/jurisdiction/version_status) 분포 라벨(r2-M4).
+  facets=true 일 때만 계산(미요청 시 None = byte 불변·ranking 무관).
+"""
+
+from __future__ import annotations
+
+from collections import Counter
+from typing import TYPE_CHECKING
+
+from sqlalchemy import text
+from sqlalchemy.ext.asyncio import AsyncSession
+
+if TYPE_CHECKING:
+    from api.search import SearchResult
+
+
+async def decorate_version_status(
+    session: AsyncSession, results: list["SearchResult"]
+) -> None:
+    """법령 결과에 legal_meta.version_status 부착 (in-place). law 결과 없으면 query skip."""
+    law_ids = [r.id for r in results if r.material_type == "law" and r.id is not None]
+    if not law_ids:
+        return
+    rows = await session.execute(
+        text(
+            "SELECT document_id, version_status FROM legal_meta "
+            "WHERE document_id = ANY(:ids)"
+        ),
+        {"ids": law_ids},
+    )
+    status_by_id = {row.document_id: row.version_status for row in rows}
+    for r in results:
+        if r.id in status_by_id:
+            r.version_status = status_by_id[r.id]
+
+
+def compute_facets(results: list["SearchResult"]) -> dict[str, dict[str, int]]:
+    """top-K 결과의 분류 축 분포 라벨. None 값은 제외(present 라벨만, 빈 축은 미포함)."""
+    axes = {
+        "material_type": [r.material_type for r in results],
+        "jurisdiction": [r.jurisdiction for r in results],
+        "version_status": [getattr(r, "version_status", None) for r in results],
+    }
+    facets: dict[str, dict[str, int]] = {}
+    for axis, vals in axes.items():
+        counter = Counter(v for v in vals if v is not None)
+        if counter:
+            facets[axis] = dict(counter.most_common())
+    return facets
@@ -24,6 +24,7 @@ import asyncio
 import hashlib
 import re
 import time
+from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any

 from sqlalchemy import text
@@ -63,8 +64,98 @@ CANDIDATE_BACKEND_MAP: dict[str, dict[str, str] | None] = {
        "chunks_table": "document_chunks_cand_snowflake_l_v2",
        "embed_endpoint": "http://embedding-cand-snowflake-l-v2:80/embed",
    },
+    # ─── Phase 2A (embedding-phase2a-1, 2026-06-12): Qwen3-Embedding 후보 3종 ───
+    # embed_kind="ollama" = /api/embed 호출 + 쿼리측 instruct prefix (비대칭 사용,
+    # G-1 fixture 실측: prefix 가 관련쌍 cos +0.016). 문서측은 backfill 이 plain 으로 적재.
+    # qwen4m = 4B 의 MRL 1024d (dimensions 옵션 — Ollama 가 truncate+재정규화 수행, G-1 실측).
+    "cand_qwen06": {
+        "docs_table": "documents_cand_qwen06",
+        "chunks_table": "document_chunks_cand_qwen06",
+        "embed_endpoint": "http://ollama:11434/api/embed",
+        "embed_kind": "ollama",
+        "embed_model": "qwen3-embedding:0.6b",
+    },
+    "cand_qwen4": {
+        "docs_table": "documents_cand_qwen4",
+        "chunks_table": "document_chunks_cand_qwen4",
+        "embed_endpoint": "http://ollama:11434/api/embed",
+        "embed_kind": "ollama",
+        "embed_model": "qwen3-embedding:4b",
+    },
+    "cand_qwen4m": {
+        "docs_table": "documents_cand_qwen4m",
+        "chunks_table": "document_chunks_cand_qwen4m",
+        "embed_endpoint": "http://ollama:11434/api/embed",
+        "embed_kind": "ollama",
+        "embed_model": "qwen3-embedding:4b",
+        "embed_dimensions": 1024,
+    },
 }

+# G-1 핀 고정 instruct 문자열 (inventory 2026-06-12-c 기록과 동일해야 함 —
+# 문구 변경 = 저장=조회 불변식 위반과 동급. 쿼리 측 전용, 문서 적재는 plain).
+QWEN3_QUERY_INSTRUCT = (
+    "Instruct: Given a web search query, retrieve relevant passages that answer the query"
+    "\nQuery: "
+)
+
+# ─── 안전 자료실 C-1: 분류 축 명시 필터 (3 leg 동등, byte 불변) ───────────────
+# 미지정(active=False) 시 모든 SQL 절이 빈 문자열 → 기존 SQL byte 불변(run_eval 회귀 0).
+# year 는 published_date NULL fallback created_at (freshness 와 동일 COALESCE 사상).
+@dataclass
+class AxisFilter:
+    material_types: list[str] | None = None   # CSV → list, material_type = ANY
+    jurisdiction: str | None = None
+    year_from: int | None = None
+    year_to: int | None = None
+
+    def active(self) -> bool:
+        return bool(self.material_types or self.jurisdiction
+                    or self.year_from is not None or self.year_to is not None)
+
+
+def _axis_sql(alias: str, af: "AxisFilter | None", params: dict) -> str:
+    """alias 기준 axis 필터 SQL — 미지정 시 '' (byte 불변). 반환 형태 ' AND ...'.
+
+    alias='' 이면 컬럼 직접 참조(단일 테이블 FROM documents 경로). 파라미터는 af_ prefix
+    로 호출측 기존 bind 와 충돌 방지.
+    """
+    if af is None or not af.active():
+        return ""
+    p = (alias + ".") if alias else ""
+    cl: list[str] = []
+    if af.material_types:
+        cl.append(f"{p}material_type = ANY(:af_mt)")
+        params["af_mt"] = af.material_types
+    if af.jurisdiction:
+        cl.append(f"{p}jurisdiction = :af_jur")
+        params["af_jur"] = af.jurisdiction
+    if af.year_from is not None:
+        cl.append(f"COALESCE({p}published_date, {p}created_at::date) >= make_date(:af_yf, 1, 1)")
+        params["af_yf"] = af.year_from
+    if af.year_to is not None:
+        cl.append(f"COALESCE({p}published_date, {p}created_at::date) <= make_date(:af_yt, 12, 31)")
+        params["af_yt"] = af.year_to
+    return " AND " + " AND ".join(cl)
+
+
+# ─── 안전 자료실 B-4: licensed_restricted 단일 술어 (a안 U-2① — 항상 적용) ──────
+def _license_sql(alias: str) -> str:
+    """licensed_restricted(extract_meta.license.restricted=true) 문서를 retrieval 에서 제외.
+
+    a안: 색인은 허용하되, 구매 전자책/유료자료의 verbatim span 이 RAG 증거·digest 발행에
+    들어가는 경로를 구조적으로 차단. 이 단일 술어를 모든 retrieval leg + digest loader 가
+    공유 — 경로별 가드 누락 방지([[feedback_structural_integrity_over_path_discipline]]).
+    개인 파일 열람(GET /documents/{id}?download)은 a안상 허용이라 미적용.
+
+    axis 필터(조건부)와 달리 항상 적용. restricted 부재/false = COALESCE 로 미제외 →
+    기존 코퍼스(restricted=true 0건)에서 결과 불변. 반환 ' AND ...' (alias='' = 컬럼 직접).
+    술어 정의 = license_filter.restricted_exclude_sql 공유(digest/briefing/study 풀이와 단일 source).
+    """
+    from services.search.license_filter import restricted_exclude_sql
+    return " AND " + restricted_exclude_sql(alias)
+
+
 # 2단계 gate (R2-B1) — SQL string interpolation 직전 final allowlist.
 _VALID_DOCS_TABLE = re.compile(r"^(documents|documents_cand_[a-z0-9_]+)$")
 # corpus_chunks = document_chunks WHERE in_corpus=true 뷰 (Hier-Decomp-1 c2 choke point).
@@ -137,6 +228,34 @@ async def _embed_query_via_tei(endpoint: str, text_: str) -> list[float] | None:
        return None


+async def _embed_query_via_ollama(cfg: dict, text_: str) -> list[float] | None:
+    """Phase 2A 후보 쿼리 임베딩 — Ollama /api/embed + 비대칭 instruct prefix.
+
+    쿼리 측 전용: QWEN3_QUERY_INSTRUCT 를 선두에 붙인다 (문서 적재 = plain).
+    embed_dimensions 지정(qwen4m) 시 Ollama dimensions 옵션 = MRL truncate+재정규화
+    (G-1 fixture: 1024 출력 L2=1.0 실측). cache 미사용 — slug 별 분포 상이.
+    """
+    if not text_:
+        return None
+    import httpx
+    body: dict = {"model": cfg["embed_model"], "input": [QWEN3_QUERY_INSTRUCT + text_]}
+    if cfg.get("embed_dimensions"):
+        body["dimensions"] = cfg["embed_dimensions"]
+    try:
+        async with httpx.AsyncClient(timeout=60.0) as c:
+            r = await c.post(cfg["embed_endpoint"], json=body)
+            r.raise_for_status()
+            embs = r.json().get("embeddings")
+        if not isinstance(embs, list) or not embs or not isinstance(embs[0], list):
+            raise ValueError("unexpected /api/embed shape")
+        return embs[0]
+    except Exception as exc:
+        logger.warning(
+            "candidate ollama embed failed model=%s err=%r", cfg.get("embed_model"), exc
+        )
+        return None
+
+
 def _query_embed_key(text_: str) -> str:
    return hashlib.sha256(f"{text_}|bge-m3".encode("utf-8")).hexdigest()

@@ -174,7 +293,7 @@ def query_embed_cache_stats() -> dict[str, int]:


 async def search_text(
-    session: AsyncSession, query: str, limit: int
+    session: AsyncSession, query: str, limit: int, *, axis: "AxisFilter | None" = None
 ) -> list["SearchResult"]:
    """FTS + trigram 필드별 가중치 검색 (Phase 1.2-B UNION 분해).

@@ -205,8 +324,12 @@ async def search_text(
    # SQLAlchemy async session 내 두 execute는 같은 connection 사용
    await session.execute(text("SELECT set_limit(0.15)"))

+    _params: dict[str, Any] = {"q": query, "limit": limit}
+    # license(항상) + axis(조건부). license 가 항상 ' AND ...' 이라 WHERE 는 늘 존재.
+    _where = _license_sql("d") + _axis_sql("d", axis, _params)
+
    result = await session.execute(
-        text("""
+        text(f"""
            WITH candidates AS (
                -- title trigram (idx_documents_title_trgm)
                SELECT id FROM documents
@@ -259,13 +382,15 @@ async def search_text(
                       WHEN similarity(coalesce(d.ai_summary, ''), :q) >= 0.3 THEN 'summary'
                       WHEN similarity(coalesce(d.extracted_text, ''), :q) >= 0.3 THEN 'content'
                       ELSE 'fts'
-                   END AS match_reason
+                   END AS match_reason,
+                   d.material_type, d.jurisdiction, d.published_date
            FROM documents d
            JOIN candidates c ON d.id = c.id
+            WHERE{_where[4:]}
            ORDER BY score DESC
            LIMIT :limit
        """),
-        {"q": query, "limit": limit},
+        _params,
    )
    return [SearchResult(**row._mapping) for row in result]

@@ -280,6 +405,7 @@ async def search_vector(
    snapshot_chunk_id_max: int | None = None,
    corpus_variant: str | None = None,
    exact_knn: bool = False,
+    axis: "AxisFilter | None" = None,
 ) -> list["SearchResult"]:
    """Hybrid 벡터 검색 — doc + chunks 동시 retrieval (Phase 1.2-G).

@@ -323,7 +449,10 @@ async def search_vector(
    else:
        docs_table = cfg["docs_table"]
        chunks_table = cfg["chunks_table"]
-        query_embedding = await _embed_query_via_tei(cfg["embed_endpoint"], query)
+        if cfg.get("embed_kind") == "ollama":
+            query_embedding = await _embed_query_via_ollama(cfg, query)
+        else:
+            query_embedding = await _embed_query_via_tei(cfg["embed_endpoint"], query)

    logger.info(
        "[embedding-dispatch] backend=%s docs_table=%s chunks_table=%s snapshot_doc_id_max=%s "
@@ -351,6 +480,7 @@ async def search_vector(
                docs_table=docs_table,
                snapshot_doc_id_max=snapshot_doc_id_max,
                exact_knn=exact_knn,
+                axis=axis,
            )

    async def _chunks_call() -> list["SearchResult"]:
@@ -360,6 +490,7 @@ async def search_vector(
                chunks_table=chunks_table,
                snapshot_chunk_id_max=snapshot_chunk_id_max,
                exact_knn=exact_knn,
+                axis=axis,
            )

    doc_results, chunk_results = await asyncio.gather(_docs_call(), _chunks_call())
@@ -375,6 +506,7 @@ async def _search_vector_docs(
    docs_table: str = "documents",
    snapshot_doc_id_max: int | None = None,
    exact_knn: bool = False,
+    axis: "AxisFilter | None" = None,
 ) -> list["SearchResult"]:
    """documents (또는 documents_cand_<slug>).embedding 직접 검색.

@@ -399,28 +531,34 @@ async def _search_vector_docs(
        if snapshot_doc_id_max is not None:
            snapshot_clause = " AND id <= :snapshot_doc_id_max"
            params["snapshot_doc_id_max"] = snapshot_doc_id_max
+        axis_clause = _axis_sql("", axis, params)   # alias 없음 (단일 FROM documents)
+        license_clause = _license_sql("")            # B-4: restricted 항상 제외
        sql = f"""
            SELECT id, title, ai_domain, ai_summary, file_format,
                   (1 - (embedding <=> cast(:embedding AS vector))) AS score,
                   left(extracted_text, 1200) AS snippet,
                   'vector_doc' AS match_reason,
-                   NULL::bigint AS chunk_id, NULL::integer AS chunk_index, NULL::text AS section_title
+                   NULL::bigint AS chunk_id, NULL::integer AS chunk_index, NULL::text AS section_title,
+                   material_type, jurisdiction, published_date
            FROM documents
-            WHERE embedding IS NOT NULL AND deleted_at IS NULL{snapshot_clause}
+            WHERE embedding IS NOT NULL AND deleted_at IS NULL{snapshot_clause}{axis_clause}{license_clause}
            ORDER BY embedding <=> cast(:embedding AS vector)
            LIMIT :limit
        """
    else:
        # candidate: docs_table 은 (doc_id, embed_input, embed_input_hash, embedding) 만 보유 → JOIN documents
+        axis_clause = _axis_sql("d", axis, params)
+        license_clause = _license_sql("d")           # B-4: restricted 항상 제외
        sql = f"""
            SELECT d.id, d.title, d.ai_domain, d.ai_summary, d.file_format,
                   (1 - (c.embedding <=> cast(:embedding AS vector))) AS score,
                   left(d.extracted_text, 1200) AS snippet,
                   'vector_doc' AS match_reason,
-                   NULL::bigint AS chunk_id, NULL::integer AS chunk_index, NULL::text AS section_title
+                   NULL::bigint AS chunk_id, NULL::integer AS chunk_index, NULL::text AS section_title,
+                   d.material_type, d.jurisdiction, d.published_date
            FROM {docs_table} c
            JOIN documents d ON d.id = c.doc_id
-            WHERE d.deleted_at IS NULL
+            WHERE d.deleted_at IS NULL{axis_clause}{license_clause}
            ORDER BY c.embedding <=> cast(:embedding AS vector)
            LIMIT :limit
        """
@@ -436,6 +574,7 @@ async def _search_vector_chunks(
    chunks_table: str = "document_chunks",
    snapshot_chunk_id_max: int | None = None,
    exact_knn: bool = False,
+    axis: "AxisFilter | None" = None,
 ) -> list["SearchResult"]:
    """document_chunks (또는 document_chunks_cand_<slug>).embedding window partition.

@@ -461,12 +600,25 @@ async def _search_vector_chunks(
        snapshot_clause = " AND c.id <= :snapshot_chunk_id_max"
        params["snapshot_chunk_id_max"] = snapshot_chunk_id_max

+    # C-1: axis 필터는 inner topk 에 JOIN (R6 결정 — outer post-filter 면 ANN top-:inner_k
+    # 후보를 뽑은 뒤 거르므로 좁은 필터(GB 법령 등)에서 후보 붕괴). 미지정 시 JOIN 없음 = byte 불변.
+    if axis and axis.active():
+        chunk_join = " JOIN documents df ON df.id = c.doc_id"
+        chunk_axis = _axis_sql("df", axis, params)
+    else:
+        chunk_join = ""
+        chunk_axis = ""
+
+    # B-4: restricted 제외 — outer 가 documents d 를 항상 JOIN 하므로 post-rank 위치.
+    # restricted 는 소수(구매자료)라 inner topk 후 제외해도 candidate collapse 없음(axis 와 상이).
+    license_clause = _license_sql("d")
+
    sql = f"""
        WITH topk AS (
            SELECT c.id AS chunk_id, c.doc_id, c.chunk_index, c.section_title, c.text,
                   c.embedding <=> cast(:embedding AS vector) AS dist
-            FROM {chunks_table} c
-            WHERE c.embedding IS NOT NULL{snapshot_clause}
+            FROM {chunks_table} c{chunk_join}
+            WHERE c.embedding IS NOT NULL{snapshot_clause}{chunk_axis}
            ORDER BY c.embedding <=> cast(:embedding AS vector)
            LIMIT :inner_k
        ),
@@ -479,10 +631,12 @@ async def _search_vector_chunks(
               d.ai_summary AS ai_summary, d.file_format AS file_format,
               (1 - r.dist) AS score, left(r.text, 1200) AS snippet,
               'vector_chunk' AS match_reason,
-               r.chunk_id AS chunk_id, r.chunk_index AS chunk_index, r.section_title AS section_title
+               r.chunk_id AS chunk_id, r.chunk_index AS chunk_index, r.section_title AS section_title,
+               d.material_type AS material_type, d.jurisdiction AS jurisdiction,
+               d.published_date AS published_date
        FROM ranked r
        JOIN documents d ON d.id = r.doc_id
-        WHERE r.rn <= 2 AND d.deleted_at IS NULL
+        WHERE r.rn <= 2 AND d.deleted_at IS NULL{license_clause}
        ORDER BY r.dist
        LIMIT :limit
    """
@@ -47,6 +47,7 @@ from .rerank_service import (
    rerank_chunks,
 )
 from .retrieval_service import (
+    AxisFilter,
    compress_chunks_to_docs,
    search_text,
    search_vector,
@@ -148,6 +149,7 @@ async def run_search(
    rewrite_backend: str | None = None,
    corpus_variant: str | None = None,
    exact_knn: bool = False,
+    axis: AxisFilter | None = None,
 ) -> PipelineResult:
    """검색 파이프라인 실행.

@@ -275,6 +277,7 @@ async def run_search(
                snapshot_chunk_id_max=snapshot_chunk_id_max,
                corpus_variant=corpus_variant,
                exact_knn=exact_knn,
+                axis=axis,
            )
        timing["vector_ms"] = (time.perf_counter() - t0) * 1000
        if not raw_chunks:
@@ -284,7 +287,7 @@ async def run_search(
        results = vector_results
    else:
        t0 = time.perf_counter()
-        text_results = await search_text(session, q, limit)
+        text_results = await search_text(session, q, limit, axis=axis)
        timing["text_ms"] = (time.perf_counter() - t0) * 1000

        if mode == "hybrid":
@@ -306,6 +309,7 @@ async def run_search(
                    snapshot_chunk_id_max=snapshot_chunk_id_max,
                    corpus_variant=corpus_variant,
                    exact_knn=exact_knn,
+                    axis=axis,
                )
            timing["vector_ms"] = (time.perf_counter() - t1) * 1000

@@ -458,6 +462,10 @@ def _rrf_fuse_variants(
            score=rrf_score,
            snippet=doc.snippet,
            match_reason=f"{doc.match_reason}+multi_query_rrf",
+            # C-1: 분류 축 메타 전파 (SearchResult 재구성 지점 — fusion 2곳과 동기)
+            material_type=doc.material_type,
+            jurisdiction=doc.jurisdiction,
+            published_date=doc.published_date,
        ))
    return fused[:limit]

@@ -47,7 +47,7 @@ logger = setup_logger("synthesis")

 # ─── 상수 (plan 영구 룰) ─────────────────────────────────
 PROMPT_VERSION = "v2"
-LLM_TIMEOUT_MS = 30000  # 2026-05-17 B-3: 15s 시 동시 부하 (Mac mini 26B classifier+evidence+synthesis serialized) 빈발 timeout — classifier (30s) 와 align
+LLM_TIMEOUT_MS = 120000  # 2026-06-11 Qwen3.6-27B-6bit 전환: 프리필 ~112 tok/s·디코드 ~11.7 tok/s 실측 — 30s 면 synthesis(답변 본체) 상시 timeout. synthesis 는 graceful skip 불가(=답변 실패)라 단독 상향, config ask.backend.timeout_read_s=120 와 align
 CACHE_TTL = 3600  # 1h (answer 는 원문 변경에 민감 → query_analyzer 24h 보다 짧게)
 CACHE_MAXSIZE = 300
 MAX_ANSWER_CHARS = 600
@@ -24,6 +24,7 @@ from models.chunk import DocumentChunk
 from models.document import Document
 from models.study_question import StudyQuestion
 from models.study_topic import StudyTopicDocument
+from services.search.license_filter import restricted_exclude_orm

 logger = logging.getLogger(__name__)

@@ -124,11 +125,14 @@ async def _gather_document_evidence(
        return []

    # 매핑된 documents 메타 (제목·요약 표기)
+    # B-4: licensed_restricted 제외 → valid_doc_ids 에서 빠지므로 아래 청크 쿼리(doc_id IN)도
+    # 자동 차단. study 풀이 RAG 도 retrieval/digest 와 동일 단일 술어 공유(a안 U-2①).
    doc_meta_rows = (
        await session.execute(
            select(Document.id, Document.title, Document.ai_summary).where(
                Document.id.in_(doc_ids),
                Document.deleted_at.is_(None),
+                restricted_exclude_orm(),
            )
        )
    ).all()
@@ -0,0 +1,256 @@
+"""C-4 ① API 표준 공지(Important Standards Announcements) 수집 워커 (사이클 3).
+
+RSS 없음. 실측(2026-06-11) 결과 '페이지 diff' 가 아니라 공지별 상세 URL 이 있는
+목록 페이지(10건/페이지, ?page=N&pageSize=10 페이지네이션 ~12+) — 목록 링크 파싱
+→ 신규 상세 페이지만 ingest 가 정확하고 dedup 도 자연스럽다 (rss+page 패턴의 HTML 판).
+510/570/653 개정 공지가 업무 직결 — 표준 본문은 유료라 공지만 수집 (카드 C-4).
+
+스케줄 = monthly (main.py 5일 07:05 KST) — 최근 2페이지 diff (월 1~2건 공지 페이스).
+초기 일괄: docker exec hyungi_document_server-fastapi-1 \
+    python -m workers.api_standards_collector --bulk   # 전 페이지 (~120건, politeness ~30분)
+
+멱등: edit_url(정규화)+file_hash dedup — 재실행 = 신규분만.
+"""
+
+import argparse
+import asyncio
+import hashlib
+import re
+from datetime import datetime, timezone
+
+from sqlalchemy import select
+
+from core.crawl_politeness import (
+    CrawlBlocked,
+    CrawlFetchError,
+    CrawlSkip,
+    fetch_page,
+)
+from core.database import async_session
+from core.utils import setup_logger
+from models.document import Document
+from models.news_source import NewsSource
+from models.queue import enqueue_stage
+from workers.fulltext_worker import (
+    _WEB_MIN_BODY_LEN,
+    _extract_body,
+    _raw_html_path,
+    _save_raw_html,
+    _strip_article_footer,
+)
+from workers.news_collector import (
+    _get_or_create_health,
+    _normalize_url,
+    _record_failure,
+    _record_success,
+)
+from workers.static_corpus_ingest import _page_title
+
+logger = setup_logger("api_standards")
+
+_BASE = "https://www.api.org"
+_LISTING_PATH = "/products-and-services/standards/important-standards-announcements"
+_LISTING_URL = f"{_BASE}{_LISTING_PATH}"
+_SOURCE_NAME = "API 표준 공지"
+
+_SCHEDULED_PAGES = 2   # monthly diff 범위 (20건 — 월 1~2건 페이스에 충분한 겹침)
+_BULK_MAX_PAGES = 15   # 실측 12페이지 + 여유. 빈 페이지에서 조기 종료.
+
+_DETAIL_RE = re.compile(
+    r'href="(' + re.escape(_LISTING_PATH) + r'/[^"?#]+)"'
+)
+_DATE_RE = re.compile(
+    r"(January|February|March|April|May|June|July|August|September|October"
+    r"|November|December)\s+(\d{1,2}),?\s+(\d{4})"
+)
+_MONTHS = {m: i for i, m in enumerate(
+    ["January", "February", "March", "April", "May", "June", "July",
+     "August", "September", "October", "November", "December"], start=1)}
+
+
+def _parse_listing(html_text: str) -> list[str]:
+    """상세 공지 절대 URL — 순서 보존 dedup (페이지네이션 링크는 ?가 패턴에서 배제)."""
+    seen: set[str] = set()
+    out: list[str] = []
+    for m in _DETAIL_RE.finditer(html_text):
+        url = f"{_BASE}{m.group(1)}"
+        if url not in seen:
+            seen.add(url)
+            out.append(url)
+    return out
+
+
+def _parse_pub_date(text: str) -> datetime | None:
+    """본문 첫 'Month DD, YYYY' — 공지 게시일 관행. 실패 = None (색인은 채널 게이트로 무조건)."""
+    m = _DATE_RE.search(text)
+    if not m:
+        return None
+    try:
+        return datetime(int(m.group(3)), _MONTHS[m.group(1)], int(m.group(2)),
+                        tzinfo=timezone.utc)
+    except ValueError:
+        return None
+
+
+async def _get_or_create_source(session) -> NewsSource:
+    result = await session.execute(
+        select(NewsSource).where(NewsSource.name == _SOURCE_NAME)
+    )
+    source = result.scalars().first()
+    if source is None:
+        source = NewsSource(
+            name=_SOURCE_NAME, feed_url=_LISTING_URL, feed_type="rss",
+            fetch_method="page", fulltext_policy="none",
+            source_channel="crawl", category="Engineering", language="en", country="US",
+            enabled=False,  # 6h 뉴스 사이클 비대상 — 본 워커가 monthly 폴링
+        )
+        session.add(source)
+        await session.flush()
+    return source
+
+
+async def _ingest_detail(session, source: NewsSource, url: str) -> str:
+    """공지 1건. 반환: 'ok' / 'dup' / 'skip'."""
+    normalized_url = _normalize_url(url)
+    ann_hash = hashlib.sha256(f"api-ann|{normalized_url}".encode()).hexdigest()[:32]
+    existing = await session.execute(
+        select(Document).where(
+            (Document.file_hash == ann_hash)
+            | (Document.edit_url.in_([normalized_url, url]))
+        ).limit(1)
+    )
+    if existing.scalars().first():
+        return "dup"
+
+    try:
+        html_text, final_url = await fetch_page(url)
+    except (CrawlBlocked, CrawlSkip, CrawlFetchError) as e:
+        logger.warning(f"[api-std] fetch 실패 skip: {url} — {type(e).__name__}: {e}")
+        return "skip"
+
+    body, engine, engine_ver = _extract_body(html_text)
+    if not engine:
+        logger.warning(f"[api-std] 추출 실패 skip (< {_WEB_MIN_BODY_LEN}자): {url}")
+        return "skip"
+    clean_body = _strip_article_footer(body.replace("\x00", ""))
+    if len(clean_body) < _WEB_MIN_BODY_LEN:
+        return "skip"
+
+    now = datetime.now(timezone.utc)
+    raw_path = _raw_html_path(source.id, ann_hash, now)
+    raw_saved = True
+    try:
+        _save_raw_html(raw_path, html_text)
+    except OSError as e:
+        raw_saved = False
+        logger.error(f"[api-std] 원본 보존 실패 (ingest 는 진행): {e}")
+
+    pub_dt = _parse_pub_date(clean_body)
+    title = _page_title(html_text, fallback=url.rsplit("/", 1)[-1][:90])
+    title = re.sub(r"\s*\|\s*API\s*$", "", title).strip() or title
+
+    doc = Document(
+        file_path=f"crawl/{_SOURCE_NAME}/{ann_hash}",
+        file_hash=ann_hash,
+        file_format="article",
+        file_size=0,
+        file_type="note",
+        title=title,
+        extracted_text=f"{title}\n\n{clean_body}",
+        extracted_at=now,
+        extractor_version=f"listing+page@{engine}",
+        md_content=clean_body,
+        md_status="success",
+        md_extraction_engine=engine,
+        md_extraction_engine_version=engine_ver,
+        md_format_version="1.0",
+        md_generated_at=now,
+        md_source_hash=hashlib.sha256(html_text.encode("utf-8", errors="replace")).hexdigest(),
+        md_content_hash=hashlib.sha256(clean_body.encode("utf-8")).hexdigest(),
+        content_origin="extracted",
+        source_channel="crawl",
+        data_origin="external",
+        edit_url=normalized_url,
+        review_status="approved",
+        ai_domain="Engineering",
+        ai_sub_group=_SOURCE_NAME,
+        ai_tags=["Engineering/API 표준 공지"],
+        # 안전 자료실 A-2 — 표준 '공지' = standard (코드 본문 아님 — ASME/API 본문은 paywall)
+        material_type="standard",
+        jurisdiction="US",
+        published_date=pub_dt.date() if pub_dt else None,
+        extract_meta={
+            "source_id": source.id,
+            "source_name": _SOURCE_NAME,
+            "published_at": pub_dt.isoformat() if pub_dt else None,
+            "license": {"scheme": "proprietary", "redistribute": False,
+                        "attribution": "American Petroleum Institute"},
+            "fulltext": {
+                "status": "api_announcement",
+                "engine": engine,
+                "final_url": final_url,
+                "raw_html_path": str(raw_path) if raw_saved else None,
+                "body_chars": len(clean_body),
+                "resolved_at": now.isoformat(),
+            },
+        },
+    )
+    doc.file_size = len(doc.extracted_text.encode())
+    session.add(doc)
+    await session.flush()
+    await enqueue_stage(session, doc.id, "summarize")
+    await enqueue_stage(session, doc.id, "embed")
+    await enqueue_stage(session, doc.id, "chunk")
+    logger.info(f"[api-std] ingest {len(clean_body)}자 ({engine}): {title[:60]}")
+    return "ok"
+
+
+async def run(bulk: bool = False) -> None:
+    """monthly 진입점 (스케줄러) — bulk 는 CLI 전용 (전 페이지 일괄)."""
+    now = datetime.now(timezone.utc)
+    async with async_session() as session:
+        source = await _get_or_create_source(session)
+        await session.commit()
+        source_id = source.id
+
+    max_pages = _BULK_MAX_PAGES if bulk else _SCHEDULED_PAGES
+    counts = {"ok": 0, "dup": 0, "skip": 0}
+    try:
+        for page in range(1, max_pages + 1):
+            listing_url = (
+                _LISTING_URL if page == 1
+                else f"{_LISTING_URL}?page={page}&pageSize=10"
+            )
+            html_text, _ = await fetch_page(listing_url)
+            detail_urls = _parse_listing(html_text)
+            if not detail_urls:
+                break  # 빈 페이지 = 끝 (bulk 조기 종료)
+            for url in detail_urls:
+                async with async_session() as session:
+                    src = await session.get(NewsSource, source_id)
+                    status = await _ingest_detail(session, src, url)
+                    await session.commit()
+                counts[status] += 1
+            logger.info(f"[api-std] 목록 p{page}: 누적 {counts}")
+    except (CrawlBlocked, CrawlSkip, CrawlFetchError) as e:
+        logger.error(f"[api-std] 목록 수집 실패: {e}")
+        async with async_session() as session:
+            health = await _get_or_create_health(session, source_id)
+            _record_failure(health, str(e) or repr(e), now)
+            await session.commit()
+        return
+
+    async with async_session() as session:
+        health = await _get_or_create_health(session, source_id)
+        _record_success(health, counts["ok"], False, now)
+        src = await session.get(NewsSource, source_id)
+        src.last_fetched_at = now
+        await session.commit()
+    logger.info(f"[api-std] 완료: {counts}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="API 표준 공지 수집")
+    parser.add_argument("--bulk", action="store_true", help="전 페이지 일괄 (초기 백필)")
+    args = parser.parse_args()
+    asyncio.run(run(bulk=args.bulk))
@@ -8,6 +8,7 @@
 import asyncio
 from datetime import date

+from core.config import settings
 from core.utils import setup_logger
 from services.briefing.pipeline import run_briefing_pipeline

@@ -22,6 +23,9 @@ async def run(target_date: date | None = None) -> dict | None:
    Args:
        target_date: KST 기준 briefing_date (None = 오늘). API regenerate 가 명시 지정 가능.
    """
+    if "briefing" in settings.pipeline_held_stages:
+        logger.info("[briefing] 보류 (pipeline.held_stages) — 이번 실행 skip")
+        return None
    try:
        result = await asyncio.wait_for(
            run_briefing_pipeline(target_date),
@@ -0,0 +1,185 @@
+"""C-2 잔여 ② CCPS Process Safety Beacon 수집 워커 (사이클 3).
+
+월간 1페이지 PDF + 한국어 번역판 — RAG 청크로 이상적 크기 (카드 C-2).
+aiche.org 는 평문 httpx 를 UA 무관 403 (2026-06-11 실측: Archiver UA·브라우저 UA 모두)
+→ playwright-fetcher 익명 컨텍스트 경유 (B-3 인프라 재사용):
+  목록 페이지 브라우저 fetch → beacon PDF 링크 파싱 → referer 쿠키 승계 다운로드.
+
+알려진 리스크: WAF 가 헤드리스 자체를 차단하면 _CHALLENGE_MARKERS → CrawlBlocked
+→ health 실패 기록 후 종료 (르몽드 B-3 PARK 선례 — 그 경우 대안 = 이메일 구독
+.eml 트랙 결합, [[feedback_antibot_headless_subscription_wall]]).
+
+스케줄 = monthly (main.py 5일 07:20 KST). 월간 1건 페이스라 diff 는 file_path dedup 으로 충분.
+수동: docker exec hyungi_document_server-fastapi-1 python -m workers.ccps_collector
+"""
+
+import asyncio
+import hashlib
+import re
+from datetime import datetime, timezone
+from pathlib import Path
+from urllib.parse import urljoin, urlparse
+
+from sqlalchemy import select
+
+from core.config import settings
+from core.crawl_politeness import (
+    CrawlBlocked,
+    CrawlFetchError,
+    CrawlSkip,
+    download_via_browser,
+    fetch_page_via_browser,
+)
+from core.database import async_session
+from core.utils import setup_logger
+from models.document import Document
+from models.news_source import NewsSource
+from models.queue import enqueue_stage
+from workers.kosha_collector import _safe_filename
+from workers.news_collector import (
+    _get_or_create_health,
+    _record_failure,
+    _record_success,
+)
+
+logger = setup_logger("ccps_collector")
+
+_BEACON_URL = "https://www.aiche.org/ccps/resources/process-safety-beacon"
+_SOURCE_NAME = "CCPS Process Safety Beacon"
+_MAX_PDFS_PER_RUN = 10  # 월간 1~2건(영/한) 페이스 — 페이지 구조 오판 시 폭주 방지
+
+
+def _beacon_pdf_links(html_text: str, base_url: str) -> list[str]:
+    """beacon 관련 PDF 링크 — href/앵커텍스트에 'beacon' 포함만 (보수적).
+
+    필터에 안 걸린 PDF 가 있으면 호출측이 로그로 가시화 (첫 실측에서 패턴 보정용).
+    """
+    seen: set[str] = set()
+    out: list[str] = []
+    for m in re.finditer(
+        r'<a\s+[^>]*href="([^"]+\.pdf(?:\?[^"]*)?)"[^>]*>(.*?)</a>',
+        html_text, re.I | re.S,
+    ):
+        href, text = m.group(1), re.sub(r"<[^>]+>", " ", m.group(2))
+        if "beacon" not in href.lower() and "beacon" not in text.lower():
+            continue
+        absolute = urljoin(base_url, href)
+        path = urlparse(absolute).path
+        if path not in seen:
+            seen.add(path)
+            out.append(absolute)
+    return out
+
+
+def _all_pdf_hrefs(html_text: str) -> list[str]:
+    return sorted({m.group(1) for m in re.finditer(r'href="([^"]+\.pdf(?:\?[^"]*)?)"', html_text, re.I)})
+
+
+async def _get_or_create_source(session) -> NewsSource:
+    result = await session.execute(
+        select(NewsSource).where(NewsSource.name == _SOURCE_NAME)
+    )
+    source = result.scalars().first()
+    if source is None:
+        source = NewsSource(
+            name=_SOURCE_NAME, feed_url=_BEACON_URL, feed_type="rss",
+            fetch_method="page", fulltext_policy="none",
+            source_channel="crawl", category="Safety", language="en", country="US",
+            enabled=False,  # 6h 뉴스 사이클 비대상 — 본 워커가 monthly 폴링
+        )
+        session.add(source)
+        await session.flush()
+    return source
+
+
+async def _ingest_pdf(session, pdf_url: str) -> bool:
+    """Beacon PDF 1건 → NAS 저장 + Document + extract enqueue. 반환 = 신규 여부."""
+    fname = _safe_filename(Path(urlparse(pdf_url).path).name)
+    rel_path = f"crawl_raw/ccps_beacon/{fname}"
+    existing = await session.execute(
+        select(Document).where(Document.file_path == rel_path).limit(1)
+    )
+    if existing.scalars().first():
+        return False
+
+    content, content_type = await download_via_browser(pdf_url, referer=_BEACON_URL)
+    if "pdf" not in content_type.lower() and not content.startswith(b"%PDF"):
+        raise CrawlSkip(f"PDF 아님 (content-type={content_type[:60]}): {pdf_url}")
+
+    dest = Path(settings.nas_mount_path) / rel_path
+    dest.parent.mkdir(parents=True, exist_ok=True)
+    dest.write_bytes(content)
+
+    doc = Document(
+        file_path=rel_path,
+        file_hash=hashlib.sha256(content).hexdigest(),
+        file_format="pdf",
+        file_size=len(content),
+        file_type="immutable",
+        title=fname.rsplit(".", 1)[0].replace("_", " ").replace("-", " "),
+        source_channel="crawl",
+        data_origin="external",
+        import_source="ccps_beacon",
+        edit_url=pdf_url,
+        ai_tags=["Safety/CCPS Beacon"],
+        extract_meta={"ccps": {"kind": "beacon_pdf"}},
+    )
+    session.add(doc)
+    await session.flush()
+    await enqueue_stage(session, doc.id, "extract")
+    logger.info(f"[ccps] Beacon ingest: {rel_path} ({len(content)} bytes)")
+    return True
+
+
+async def run() -> None:
+    """monthly 진입점 — 실패는 health 기록 (circuit 가 A-8 패널 가시화)."""
+    now = datetime.now(timezone.utc)
+    async with async_session() as session:
+        source = await _get_or_create_source(session)
+        await session.commit()
+        source_id = source.id
+
+    try:
+        html_text, final_url = await fetch_page_via_browser(_BEACON_URL, profile=None)
+        links = _beacon_pdf_links(html_text, final_url)
+        if not links:
+            others = _all_pdf_hrefs(html_text)
+            # 필터 0건 = 페이지 구조/명명 변경 가능성 — 발견 PDF 를 가시화해 보정 단서 제공
+            raise CrawlFetchError(
+                f"beacon PDF 0건 (전체 PDF {len(others)}건: {others[:5]})"
+            )
+
+        new_count = 0
+        for pdf_url in links[:_MAX_PDFS_PER_RUN]:
+            async with async_session() as session:
+                try:
+                    if await _ingest_pdf(session, pdf_url):
+                        new_count += 1
+                    await session.commit()
+                except (CrawlBlocked, CrawlSkip, CrawlFetchError) as e:
+                    await session.rollback()
+                    logger.warning(f"[ccps] PDF 실패 skip ({pdf_url}): {e}")
+        if len(links) > _MAX_PDFS_PER_RUN:
+            logger.warning(
+                f"[ccps] PDF {len(links)}건 중 {_MAX_PDFS_PER_RUN}건만 처리 "
+                f"(월간 1~2건 가정 초과 — 페이지 구조 확인 필요)"
+            )
+
+        async with async_session() as session:
+            health = await _get_or_create_health(session, source_id)
+            _record_success(health, new_count, False, now)
+            src = await session.get(NewsSource, source_id)
+            src.last_fetched_at = now
+            await session.commit()
+        logger.info(f"[ccps] 완료: 신규 {new_count}건 (링크 {len(links)}건)")
+    except (CrawlBlocked, CrawlSkip, CrawlFetchError) as e:
+        # CrawlBlocked = WAF 헤드리스 차단 신호 — 연속되면 circuit open (PARK 판단 근거)
+        logger.error(f"[ccps] 수집 실패: {type(e).__name__}: {e}")
+        async with async_session() as session:
+            health = await _get_or_create_health(session, source_id)
+            _record_failure(health, str(e) or repr(e), now)
+            await session.commit()
+
+
+if __name__ == "__main__":
+    asyncio.run(run())
@@ -311,6 +311,10 @@ async def process(document_id: int, session: AsyncSession) -> None:
    country, source, src_lang = await _lookup_news_source(session, doc)
    if src_lang:
        language = src_lang
+    # 안전 자료실 A-2 — 뉴스 lookup 미해당(crawl/law/업로드) 문서는 jurisdiction 을
+    # chunk.country 미러로 (leg 간 국가 일치. EU/INT 도 이 경로로 첫 유입 — String(10) 수용).
+    if country is None and doc.jurisdiction:
+        country = doc.jurisdiction
    domain_category = "news" if doc.source_channel == "news" else "document"

    # 기존 chunks 삭제 (재처리)
@@ -31,12 +31,18 @@ from pydantic import BaseModel, Field, ValidationError
 from sqlalchemy import text as sql_text
 from sqlalchemy.ext.asyncio import AsyncSession

-from ai.client import AIClient, parse_json_response, strip_thinking
+from ai.client import (
+    AIClient,
+    call_deep_or_defer,
+    is_deferrable_error,
+    parse_json_response,
+    strip_thinking,
+)
 from ai.envelope import EscalationEnvelope
 from core.config import settings
 from core.utils import setup_logger
 from models.document import Document
-from models.queue import enqueue_stage
+from models.queue import StageDeferred, enqueue_stage
 from policy.prompt_render import render_4b, policy_version as compute_policy_version
 from policy.routing import decide_routing
 from services.document_telemetry import record_analyze_event
@@ -56,6 +62,15 @@ FACET_DOCTYPES = {"발주서", "세금계산서", "명세표", "도면", "증명
 # 자료실 자동 분류 제안 대상 (거래 하위)
 LIBRARY_SUGGESTION_DOCTYPES = {"발주서", "세금계산서", "명세표"}

+# 안전 자료실 A-2 — document_type → material_type 결정적 매핑 (제안 전용, 자동 전이 금지).
+# 모호한 doctype(Reference/Report 등)은 매핑하지 않음 — 무리한 전수 분류 시도 금지 (plan 0-1).
+_DOCTYPE_TO_MATERIAL = {
+    "Law_Document": "law",
+    "Academic_Paper": "paper",
+    "Manual": "manual",
+    "Standard": "standard",
+}
+
 # PR-B prompt_version task 이름
 SUMMARY_TRIAGE_TASK = "p3a_short_summary"

@@ -345,13 +360,20 @@ _FRONTMATTER_PRESERVED_KEYS = {
 # ───────────────────────── main process ────────────────────────────────


-async def process(document_id: int, session: AsyncSession) -> None:
+async def process(
+    document_id: int, session: AsyncSession, *, use_deep: bool = False
+) -> None:
    """문서 분류 + 요약 + tier triage.

    1) Legacy: classify() → ai_domain/document_type/ai_tags/ai_confidence/ai_suggestion
    2) Legacy: summarize() → ai_summary
    3) PR-B B-1: summary_triage (4B) → ai_tldr/ai_bullets/ai_analysis_tier='triage'

+    use_deep (2026-06-12 fair-share, queue_drain 전용): triage LLM 호출을 deep 슬롯
+    (맥북, 라우터 경유)으로 보낸다 — sampling 은 triage 의 temperature/max_tokens 를
+    유지(분류 결정성), endpoint 만 교체. 맥북 불가 = StageDeferred 전파(drain 이
+    보류 처리). False(기본/consumer) = 기존 call_triage(맥미니 직접) 그대로.
+
    예외 — source_channel='law_monitor':
      법령은 외부 source-of-truth (law.go.kr) 보유 + immutable + 자동 재수집.
      AI 분류는 무가치 + 본문 해석 환각 위험. 26B legacy + 4B triage 전부 skip.
@@ -446,10 +468,20 @@ async def process(document_id: int, session: AsyncSession) -> None:
        logger.info(f"doc {document_id}: frontmatter 부분 인식 → LLM 으로 미설정 필드 보완")

    client = AIClient()
+    # fair-share (2026-06-12): use_deep 시 legacy classify/summarize 도 deep 슬롯(맥북)
+    # 경유 — 그래야 drain 의 "맥북 분담" 이 실제로 성립 (triage 만 보내면 50K 요약
+    # 프리필이 맥미니에 남는다). deep 슬롯 sampling = primary 와 동일(0.3/0.9/8192).
+    legacy_cfg = settings.ai.deep if (use_deep and settings.ai.deep is not None) else None
    try:
-        # ─── 1. Legacy classify (primary 26B) ───
+        # ─── 1. Legacy classify (primary 또는 deep) ───
        truncated = doc.extracted_text[:MAX_CLASSIFY_TEXT]
-        raw_response = await client.classify(truncated)
+        try:
+            raw_response = await client.classify(truncated, cfg=legacy_cfg)
+        except Exception as exc:
+            if legacy_cfg is not None and is_deferrable_error(exc):
+                # 맥북 불가 — 첫 호출(최저 비용 지점)에서 보류로 전환, doc 쓰기 0
+                raise StageDeferred(f"macbook_unavailable:{type(exc).__name__}") from exc
+            raise
        parsed = parse_json_response(raw_response)

        if not parsed:
@@ -469,6 +501,24 @@ async def process(document_id: int, session: AsyncSession) -> None:
        if not doc.document_type:
            doc.document_type = doc_type if doc_type in DOCUMENT_TYPES else "Note"

+        # ─── 안전 자료실 A-2: material_type 제안 (업로드 경로 — LLM 직접 부여 금지) ───
+        # document_type → material_type 결정적 매핑만 제안으로 적재 (프롬프트 변경 0).
+        # 승인(accept-suggestion) 시에만 전이 — law 는 국가 필수 입력 (KR 기본값 오염 차단,
+        # 자동 전이 금지 사상은 category 와 동일). 수집기 deterministic 경로는 이미 채워져
+        # 있어(material_type IS NOT NULL) 본 제안 비대상. 거래문서 제안(ai_suggestion 점유)과
+        # 충돌 시 기존 제안 우선 (두 제안이 겹치는 문서는 실무상 없음 — 거래 vs 안전자료).
+        _mt_prop = _DOCTYPE_TO_MATERIAL.get(doc.document_type or "")
+        if _mt_prop and doc.material_type is None and doc.ai_suggestion is None:
+            doc.ai_suggestion = {
+                "proposed_material_type": _mt_prop,
+                "proposed_jurisdiction": None,
+                "confidence": doc.ai_confidence,
+                "source_updated_at": (
+                    doc.updated_at.isoformat() if doc.updated_at else None
+                ),
+                "reason": "document_type→material_type 결정적 매핑",
+            }
+
        # confidence
        confidence = parsed.get("confidence", 0.5)
        doc.ai_confidence = max(0.0, min(1.0, float(confidence)))
@@ -517,12 +567,17 @@ async def process(document_id: int, session: AsyncSession) -> None:
                "reason": "classify pipeline",
            }

-        # ─── 2. Legacy 요약 (primary 26B) ───
-        summary = await client.summarize(doc.extracted_text[:50000])
+        # ─── 2. Legacy 요약 (primary 또는 deep) ───
+        try:
+            summary = await client.summarize(doc.extracted_text[:50000], cfg=legacy_cfg)
+        except Exception as exc:
+            if legacy_cfg is not None and is_deferrable_error(exc):
+                raise StageDeferred(f"macbook_unavailable:{type(exc).__name__}") from exc
+            raise
        doc.ai_summary = strip_thinking(summary)

-        # ─── 메타데이터 (legacy 완료) ───
-        doc.ai_model_version = settings.ai.primary.model
+        # ─── 메타데이터 (legacy 완료) — 실제 처리 머신 귀속 (drain=qwen-macbook) ───
+        doc.ai_model_version = (legacy_cfg or settings.ai.primary).model
        doc.ai_processed_at = datetime.now(timezone.utc)

        logger.info(
@@ -533,7 +588,9 @@ async def process(document_id: int, session: AsyncSession) -> None:

        # ─── 3. PR-B B-1 — tier triage (4B, 실패는 legacy 결과 보존) ───
        try:
-            await _run_tier_triage(client, doc, session)
+            await _run_tier_triage(client, doc, session, use_deep=use_deep)
+        except StageDeferred:
+            raise  # 보류는 실패가 아님 — drain/consumer 가 attempts 미소모 처리
        except Exception as exc:
            logger.exception(f"[triage] id={document_id} 전체 실패 — legacy 유지: {exc}")

@@ -541,8 +598,10 @@ async def process(document_id: int, session: AsyncSession) -> None:
        await client.close()


-async def _run_tier_triage(client: AIClient, doc: Document, session: AsyncSession) -> None:
-    """summary_triage (p3a_short_summary) 경로."""
+async def _run_tier_triage(
+    client: AIClient, doc: Document, session: AsyncSession, *, use_deep: bool = False
+) -> None:
+    """summary_triage (p3a_short_summary) 경로. use_deep = process() 에서 전달 (drain 전용)."""
    document_id = doc.id
    text = doc.extracted_text or ""
    input_chars = len(text)
@@ -550,6 +609,14 @@ async def _run_tier_triage(client: AIClient, doc: Document, session: AsyncSessio
    triage_start = time.perf_counter()
    parse_error: str | None = None
    triage_out = TriageOutput()
+    # drain 경유 시 triage 도 deep 슬롯(맥북) — sampling 은 triage 것 유지(결정성).
+    deep_triage_cfg = None
+    if use_deep and settings.ai.deep is not None:
+        deep_triage_cfg = settings.ai.deep.model_copy(update={
+            "temperature": settings.ai.triage.temperature,
+            "top_p": settings.ai.triage.top_p,
+            "max_tokens": settings.ai.triage.max_tokens,
+        })

    # 입력이 triage 한도 초과면 호출 생략하고 long_context 로 escalate
    if input_chars > TRIAGE_TEXT_LIMIT:
@@ -590,7 +657,14 @@ async def _run_tier_triage(client: AIClient, doc: Document, session: AsyncSessio
    prompt = rendered.replace("{extracted_text}", text[:TRIAGE_TEXT_LIMIT])

    try:
-        raw_triage = await client.call_triage(prompt)
+        if deep_triage_cfg is not None:
+            # drain 전용 — deep 슬롯 endpoint + triage sampling. 맥북 불가(StageDeferred)
+            # 는 아래 generic except 에 먹히지 않게 먼저 전파.
+            raw_triage = await call_deep_or_defer(client, prompt, cfg=deep_triage_cfg)
+        else:
+            raw_triage = await client.call_triage(prompt)
+    except StageDeferred:
+        raise  # drain 이 attempts 미소모 + 백오프로 처리 (sleep-안전)
    except Exception as exc:
        logger.warning(
            "[triage] 4B 호출 실패 id=%s type=%s repr=%r",
@@ -656,6 +730,7 @@ async def _run_tier_triage(client: AIClient, doc: Document, session: AsyncSessio
        escalation_reason=escalation_reason,
        parse_error=parse_error,
        routing_decision=routing_decision,
+        model_name=(deep_triage_cfg.model if deep_triage_cfg is not None else None),
    )


@@ -670,6 +745,7 @@ async def _apply_triage_result(
    escalation_reason: str | None,
    parse_error: str | None,
    routing_decision=None,
+    model_name: str | None = None,  # fair-share: 실제 호출 경로 모델 (None=triage 기본)
 ) -> None:
    """TriageOutput → Document 필드 + R2 suppression + envelope enqueue + audit.

@@ -760,7 +836,7 @@ async def _apply_triage_result(
        layers_returned=["tldr", "bullets"] if not parse_error else [],
        cached=False,
        latency_ms=latency_ms,
-        model_name=settings.ai.triage.model,
+        model_name=(model_name or settings.ai.triage.model),
        prompt_version=(f"{SUMMARY_TRIAGE_TASK}@{pv}" if pv else SUMMARY_TRIAGE_TASK),
        error_code=parse_error,
        source="document_server",
@@ -0,0 +1,401 @@
+"""C-2 잔여 ① US CSB sitemap diff 수집 워커 (plan crawl-24x7-1, 사이클 3).
+
+RSS 폐지 → sitemap.xml lastmod diff 폴링이 정석 (정부 사이트라 lastmod 양호 —
+2026-06-11 실측 1,307 URL, 조사 보고서 페이지는 루트 슬러그). 페이지 본문(4-tier
+≥200자 게이트) + 보고서 PDF(/assets/, recommendation 상태요약 제외) →
+기존 extract 파이프라인(marker/kordoc) 재사용.
+
+스케줄 = weekly (main.py 월 06:50 KST):
+  워터마크(selector_override.sitemap_watermark — B-3 probe 설정과 같은 JSONB 슬롯)
+  이후 lastmod 만, 오래된 것부터 cap(40페이지/회). 워터마크는 처리분까지만 전진
+  = 잔량 자동 점진 백필 (KOSHA GUIDE cap 패턴). cap 미처리 잔량은 매회 로그
+  (silent cap 금지). diff 건수 > sanity(300) = sitemap 부패/lastmod 남발 의심 가시 경고.
+
+초기 일괄 (cap 해제, politeness 로 수 시간 — docker exec -d, 진행 중 같은 서비스
+재배포 금지 [[feedback_docker_exec_orphan_kill]] 자매 함정):
+  docker exec hyungi_document_server-fastapi-1 \
+      python -m workers.csb_collector --limit 3        # 검증용
+  docker exec -d hyungi_document_server-fastapi-1 \
+      python -m workers.csb_collector --bulk           # 전체
+
+멱등: 페이지 = edit_url(정규화)+file_hash dedup (first-wins — lastmod 갱신 페이지의
+본문 재적재는 안 함, 갱신의 실체인 신규 PDF 는 개별 dedup 으로 적재됨).
+PDF = file_path dedup. 워터마크 경계는 >= 재조회 — 경계 페이지 1회 재fetch 후
+dedup 이 잡는다 (lastmod 실측 distinct 라 누적 재fetch 없음).
+"""
+
+import argparse
+import asyncio
+import hashlib
+import random
+import re
+from datetime import datetime, timezone
+from pathlib import Path
+from urllib.parse import urljoin, urlparse
+
+import httpx
+from sqlalchemy import select
+
+from core.config import settings
+from core.crawl_politeness import (
+    CRAWL_UA,
+    CrawlBlocked,
+    CrawlFetchError,
+    CrawlSkip,
+    fetch_page,
+)
+from core.database import async_session
+from core.utils import setup_logger
+from models.document import Document
+from models.news_source import NewsSource
+from models.queue import enqueue_stage
+from workers.fulltext_worker import (
+    _WEB_MIN_BODY_LEN,
+    _extract_body,
+    _raw_html_path,
+    _save_raw_html,
+    _strip_article_footer,
+)
+from workers.kosha_collector import _safe_filename
+from workers.news_collector import (
+    FeedError,
+    _get_or_create_health,
+    _normalize_url,
+    _record_failure,
+    _record_success,
+)
+from workers.static_corpus_ingest import _page_title
+
+logger = setup_logger("csb_collector")
+
+_SITEMAP_URL = "https://www.csb.gov/sitemap.xml"
+_SOURCE_NAME = "US CSB 사고조사보고서"
+
+_RUN_PAGE_CAP = 40      # weekly 1회 처리 상한 — 잔량은 워터마크 미전진으로 자동 이월
+_DIFF_SANITY = 300      # 주간 diff 가 이를 넘으면 sitemap lastmod 남발/부패 의심 (카드 C-2)
+_MAX_PDF_BYTES = 50 * 1024 * 1024
+_PDF_DELAY = (2.0, 5.0)  # 같은 도메인 연속 PDF 다운로드 간격 (kosha _DOWNLOAD_DELAY 동률)
+
+# 텍스트 코퍼스 무가치/관리성 섹션 — 첫 path segment 기준 (조사 보고서·뉴스 릴리스는
+# 루트 슬러그라 영향 없음. /news/·/investigations/ 는 목록 페이지뿐이라 제외).
+_SKIP_FIRST_SEGMENT = {
+    "videos", "photos", "events", "members", "disclaimers", "media-room",
+    "about-the-csb", "about-us", "foia", "news", "investigations",
+    "site-map", "subscribe", "unsubscribe", "optout", "test",
+    "privacy-policy", "vulnerability-disclosure-policy", "en-espanol",
+    "newsletter", "recom-stats", "500.aspx", "documents", "records-details",
+}
+
+
+def _parse_sitemap(xml_text: str) -> list[tuple[str, datetime]]:
+    """(url, lastmod) 목록 — lastmod 없는/파싱불가 항목은 제외 (diff 축이 없음)."""
+    out: list[tuple[str, datetime]] = []
+    for m in re.finditer(
+        r"<url>\s*<loc>([^<]+)</loc>\s*<lastmod>([^<]+)</lastmod>", xml_text
+    ):
+        try:
+            lastmod = datetime.fromisoformat(m.group(2).strip())
+        except ValueError:
+            continue
+        if lastmod.tzinfo is None:
+            lastmod = lastmod.replace(tzinfo=timezone.utc)
+        out.append((m.group(1).strip(), lastmod))
+    return out
+
+
+def _should_skip(url: str) -> bool:
+    path = urlparse(url).path.strip("/")
+    if not path:
+        return True  # 홈
+    return path.split("/", 1)[0].lower() in _SKIP_FIRST_SEGMENT
+
+
+def _pdf_links(html_text: str, base_url: str) -> list[str]:
+    """페이지 내 보고서 PDF — /assets/recommendation/(상태변경 요약 다수)은 제외.
+
+    cache-buster 쿼리(?17346)는 다운로드 URL 에는 유지, dedup/파일명은 path 기준.
+    """
+    seen: set[str] = set()
+    out: list[str] = []
+    for m in re.finditer(r'href="([^"]+\.pdf(?:\?[^"]*)?)"', html_text, re.I):
+        absolute = urljoin(base_url, m.group(1))
+        path = urlparse(absolute).path
+        if "/assets/recommendation/" in path.lower():
+            continue
+        if (urlparse(absolute).hostname or "").lower() != "www.csb.gov":
+            continue
+        if path not in seen:
+            seen.add(path)
+            out.append(absolute)
+    return out
+
+
+async def _download_pdf(url: str, dest: Path) -> int:
+    """PDF 다운로드 — 크기 cap + 연속 간격 (politeness 는 순차 실행 전제)."""
+    await asyncio.sleep(random.uniform(*_PDF_DELAY))
+    async with httpx.AsyncClient(timeout=60, follow_redirects=True) as client:
+        resp = await client.get(url, headers={"User-Agent": CRAWL_UA})
+    if resp.status_code != 200:
+        raise FeedError(f"PDF 다운로드 {resp.status_code}: {url}")
+    if len(resp.content) > _MAX_PDF_BYTES:
+        raise FeedError(f"PDF 크기 초과 ({len(resp.content)} bytes): {url}")
+    dest.parent.mkdir(parents=True, exist_ok=True)
+    dest.write_bytes(resp.content)
+    return len(resp.content)
+
+
+async def _get_or_create_source(session) -> NewsSource:
+    result = await session.execute(
+        select(NewsSource).where(NewsSource.name == _SOURCE_NAME)
+    )
+    source = result.scalars().first()
+    if source is None:
+        source = NewsSource(
+            name=_SOURCE_NAME, feed_url=_SITEMAP_URL, feed_type="rss",
+            fetch_method="sitemap+page", fulltext_policy="none",
+            source_channel="crawl", category="Safety", language="en", country="US",
+            enabled=False,  # 6h 뉴스 사이클 비대상 — 본 워커가 weekly 폴링
+        )
+        session.add(source)
+        await session.flush()
+    return source
+
+
+def _watermark(source: NewsSource) -> datetime | None:
+    raw = (source.selector_override or {}).get("sitemap_watermark")
+    if not raw:
+        return None
+    try:
+        return datetime.fromisoformat(raw)
+    except ValueError:
+        return None
+
+
+def _set_watermark(source: NewsSource, value: datetime) -> None:
+    # JSONB 변경 감지를 위해 dict 재할당 (fulltext_worker._set_fulltext_meta 동일 규약)
+    cfg = dict(source.selector_override or {})
+    cfg["sitemap_watermark"] = value.isoformat()
+    source.selector_override = cfg
+
+
+async def _ingest_pdf(session, page_slug: str, pdf_url: str) -> bool:
+    """PDF 1건 → NAS 저장 + Document + extract enqueue. 반환 = 신규 여부."""
+    fname = _safe_filename(Path(urlparse(pdf_url).path).name)
+    rel_path = f"crawl_raw/csb/{page_slug}/{fname}"
+    existing = await session.execute(
+        select(Document).where(Document.file_path == rel_path).limit(1)
+    )
+    if existing.scalars().first():
+        return False
+
+    dest = Path(settings.nas_mount_path) / rel_path
+    size = await _download_pdf(pdf_url, dest)
+    doc = Document(
+        file_path=rel_path,
+        file_hash=hashlib.sha256(dest.read_bytes()).hexdigest(),
+        file_format="pdf",
+        file_size=size,
+        file_type="immutable",
+        title=fname.rsplit(".", 1)[0].replace("_", " "),
+        source_channel="crawl",
+        data_origin="external",
+        import_source="csb_sitemap",
+        edit_url=pdf_url,
+        ai_tags=["Safety/CSB/보고서"],
+        # 안전 자료실 A-2 — ingest 시점 deterministic. CSB = 미 연방기관 = public domain.
+        material_type="incident",
+        jurisdiction="US",
+        extract_meta={"csb": {"page_slug": page_slug, "kind": "report_pdf"},
+                      "license": {"scheme": "public_domain", "redistribute": True,
+                                  "attribution": "U.S. Chemical Safety Board"}},
+    )
+    session.add(doc)
+    await session.flush()
+    await enqueue_stage(session, doc.id, "extract")
+    logger.info(f"[csb] PDF ingest: {rel_path} ({size} bytes)")
+    return True
+
+
+async def _ingest_url(session, source: NewsSource, url: str, lastmod: datetime) -> dict:
+    """변경 URL 1건: 페이지 fetch → PDF 전수 스캔(개별 dedup) + 본문 신규면 적재.
+
+    페이지 재방문(lastmod 갱신)에서도 PDF 스캔은 항상 수행 — 갱신의 실체
+    (최종 보고서 추가 등)가 PDF 로 오는 경우가 핵심 가치다.
+    """
+    counts = {"page": 0, "pdf": 0, "skip": 0}
+    try:
+        html_text, final_url = await fetch_page(url)
+    except (CrawlBlocked, CrawlSkip, CrawlFetchError) as e:
+        logger.warning(f"[csb] fetch 실패 skip: {url} — {type(e).__name__}: {e}")
+        counts["skip"] = 1
+        return counts
+
+    page_slug = _safe_filename(urlparse(url).path.strip("/").split("/")[-1] or "root")
+
+    for pdf_url in _pdf_links(html_text, final_url):
+        try:
+            if await _ingest_pdf(session, page_slug, pdf_url):
+                counts["pdf"] += 1
+        except FeedError as e:
+            logger.warning(f"[csb] PDF 실패 skip ({pdf_url}): {e}")
+
+    # 페이지 본문 — first-wins (이미 있으면 본문 재적재 없음)
+    normalized_url = _normalize_url(url)
+    page_hash = hashlib.sha256(f"csb-page|{normalized_url}".encode()).hexdigest()[:32]
+    existing = await session.execute(
+        select(Document).where(
+            (Document.file_hash == page_hash)
+            | (Document.edit_url.in_([normalized_url, url]))
+        ).limit(1)
+    )
+    if existing.scalars().first():
+        return counts
+
+    body, engine, engine_ver = _extract_body(html_text)
+    if not engine:
+        logger.info(f"[csb] 본문 부족 — 페이지 비적재 (PDF 만): {url}")
+        return counts
+    clean_body = _strip_article_footer(body.replace("\x00", ""))
+    if len(clean_body) < _WEB_MIN_BODY_LEN:
+        return counts
+
+    now = datetime.now(timezone.utc)
+    raw_path = _raw_html_path(source.id, page_hash, now)
+    raw_saved = True
+    try:
+        _save_raw_html(raw_path, html_text)
+    except OSError as e:
+        raw_saved = False
+        logger.error(f"[csb] 원본 보존 실패 (ingest 는 진행): {e}")
+
+    title = _page_title(html_text, fallback=page_slug.replace("-", " ")[:90])
+    doc = Document(
+        file_path=f"crawl/{_SOURCE_NAME}/{page_hash}",
+        file_hash=page_hash,
+        file_format="article",
+        file_size=0,
+        file_type="note",
+        title=title,
+        extracted_text=f"{title}\n\n{clean_body}",
+        extracted_at=now,
+        extractor_version=f"sitemap+page@{engine}",
+        md_content=clean_body,
+        md_status="success",
+        md_extraction_engine=engine,
+        md_extraction_engine_version=engine_ver,
+        md_format_version="1.0",
+        md_generated_at=now,
+        md_source_hash=hashlib.sha256(html_text.encode("utf-8", errors="replace")).hexdigest(),
+        md_content_hash=hashlib.sha256(clean_body.encode("utf-8")).hexdigest(),
+        content_origin="extracted",
+        source_channel="crawl",
+        data_origin="external",
+        edit_url=normalized_url,
+        review_status="approved",
+        ai_domain="Safety",
+        ai_sub_group=_SOURCE_NAME,
+        ai_tags=["Safety/CSB"],
+        # 안전 자료실 A-2 — ingest 시점 deterministic (classify-skip 경로)
+        material_type="incident",
+        jurisdiction="US",
+        published_date=lastmod.date() if lastmod else None,
+        extract_meta={
+            "source_id": source.id,
+            "source_name": _SOURCE_NAME,
+            "published_at": lastmod.isoformat(),
+            "license": {"scheme": "public_domain", "redistribute": True,
+                        "attribution": "U.S. Chemical Safety Board"},
+            "fulltext": {
+                "status": "csb_sitemap",
+                "engine": engine,
+                "final_url": final_url,
+                "raw_html_path": str(raw_path) if raw_saved else None,
+                "body_chars": len(clean_body),
+                "resolved_at": now.isoformat(),
+            },
+        },
+    )
+    doc.file_size = len(doc.extracted_text.encode())
+    session.add(doc)
+    await session.flush()
+    await enqueue_stage(session, doc.id, "summarize")
+    await enqueue_stage(session, doc.id, "embed")
+    await enqueue_stage(session, doc.id, "chunk")
+    counts["page"] = 1
+    logger.info(f"[csb] page ingest {len(clean_body)}자 ({engine}): {title[:60]}")
+    return counts
+
+
+async def run(bulk: bool = False, limit: int = 0) -> None:
+    """weekly 진입점 (스케줄러) — bulk/limit 은 CLI 전용."""
+    now = datetime.now(timezone.utc)
+    async with async_session() as session:
+        source = await _get_or_create_source(session)
+        await session.commit()
+        source_id = source.id
+        watermark = _watermark(source)
+
+    try:
+        xml_text, _ = await fetch_page(
+            _SITEMAP_URL, content_types=("text/xml", "application/xml", "text/html")
+        )
+        entries = _parse_sitemap(xml_text)
+        if not entries:
+            raise FeedError("sitemap 파싱 0건 — 포맷 변경/부패 의심")
+    except (CrawlBlocked, CrawlSkip, CrawlFetchError, FeedError) as e:
+        logger.error(f"[csb] sitemap 수집 실패: {e}")
+        async with async_session() as session:
+            health = await _get_or_create_health(session, source_id)
+            _record_failure(health, str(e) or repr(e), now)
+            await session.commit()
+        return
+
+    changed = sorted(
+        (
+            (url, lastmod) for url, lastmod in entries
+            if not _should_skip(url) and (watermark is None or lastmod >= watermark)
+        ),
+        key=lambda pair: pair[1],
+    )
+    if watermark is not None and len(changed) > _DIFF_SANITY:
+        logger.error(
+            f"[csb] diff {len(changed)}건 > sanity {_DIFF_SANITY} — "
+            f"sitemap lastmod 남발/부패 의심 (cap 처리는 계속, 관찰 필요)"
+        )
+
+    cap = len(changed) if bulk else _RUN_PAGE_CAP
+    if limit:
+        cap = min(cap, limit)
+    todo, deferred = changed[:cap], max(len(changed) - cap, 0)
+    logger.info(
+        f"[csb] sitemap {len(entries)}건 중 변경 {len(changed)}건, 처리 {len(todo)}건"
+        + (f" (잔여 {deferred}건 — 워터마크 미전진으로 자동 이월)" if deferred else "")
+    )
+
+    totals = {"page": 0, "pdf": 0, "skip": 0}
+    for i, (url, lastmod) in enumerate(todo, 1):
+        async with async_session() as session:
+            src = await session.get(NewsSource, source_id)
+            counts = await _ingest_url(session, src, url, lastmod)
+            _set_watermark(src, lastmod)
+            await session.commit()
+        for k in totals:
+            totals[k] += counts[k]
+        if i % 10 == 0:
+            logger.info(f"[csb] 진행 {i}/{len(todo)} {totals}")
+
+    async with async_session() as session:
+        health = await _get_or_create_health(session, source_id)
+        _record_success(health, totals["page"] + totals["pdf"], False, now)
+        src = await session.get(NewsSource, source_id)
+        src.last_fetched_at = now
+        await session.commit()
+    logger.info(f"[csb] 완료: {totals} (변경 {len(changed)}건 중 {len(todo)}건 처리)")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="CSB sitemap diff 수집")
+    parser.add_argument("--bulk", action="store_true", help="cap 해제 — 초기 일괄")
+    parser.add_argument("--limit", type=int, default=0, help="처리 상한 (검증용)")
+    args = parser.parse_args()
+    asyncio.run(run(bulk=args.bulk, limit=args.limit))
@@ -1,10 +1,10 @@
-"""일일 다이제스트 워커 — PostgreSQL/CalDAV 쿼리 → Markdown + SMTP
+"""일일 다이제스트 워커 — PostgreSQL/CalDAV 쿼리 → Markdown 생성

 v1 scripts/pkm_daily_digest.py에서 포팅.
 DEVONthink/OmniFocus → PostgreSQL/CalDAV 쿼리로 전환.
+SMTP 발송은 2026-06-10 제거 (한 번도 전달 성공한 적 없는 기능 — 폐기 결정).
 """

-import os
 from datetime import datetime, timezone
 from zoneinfo import ZoneInfo
 from pathlib import Path
@@ -13,7 +13,7 @@ from sqlalchemy import func, select, text

 from core.config import settings
 from core.database import async_session
-from core.utils import send_smtp_email, setup_logger
+from core.utils import setup_logger
 from models.document import Document
 from models.queue import ProcessingQueue

@@ -133,16 +133,4 @@ async def run():
        if old.stat().st_mtime < cutoff:
            old.rename(archive_dir / old.name)

-    # ─── SMTP 발송 ───
-    smtp_host = os.getenv("MAILPLUS_HOST", "")
-    smtp_port = int(os.getenv("MAILPLUS_SMTP_PORT", "465"))
-    smtp_user = os.getenv("MAILPLUS_USER", "")
-    smtp_pass = os.getenv("MAILPLUS_PASS", "")
-    if smtp_host and smtp_user:
-        send_smtp_email(
-            smtp_host, smtp_port, smtp_user, smtp_pass,
-            f"PKM 다이제스트 — {date_display}",
-            markdown,
-        )
-
    logger.info(f"다이제스트 생성 완료: {digest_path}")
@@ -20,12 +20,12 @@ from sqlalchemy.ext.asyncio import AsyncSession

 import json
 import re
-from ai.client import AIClient, parse_json_response, strip_thinking
+from ai.client import AIClient, call_deep_or_defer, parse_json_response, strip_thinking
 from ai.envelope import EscalationEnvelope
 from core.config import settings
 from core.utils import setup_logger
 from models.document import Document
-from models.queue import ProcessingQueue
+from models.queue import ProcessingQueue, StageDeferred
 from policy.prompt_render import render_26b, policy_version as compute_policy_version
 from services.document_telemetry import record_analyze_event
 from services.search.llm_gate import Priority, acquire_mlx_gate
@@ -54,8 +54,18 @@ class DeepSummaryOutput(BaseModel):
    confidence: float = 0.5


-async def process(document_id: int, session: AsyncSession) -> None:
-    """deep_summary 큐 pickup → 26B 호출 → 필드 저장."""
+async def process(
+    document_id: int, session: AsyncSession, *, defer_on_deep_unavailable: bool = False
+) -> None:
+    """deep_summary 큐 pickup → LLM 호출 → 필드 저장.
+
+    defer_on_deep_unavailable:
+      False (기본, consumer 경로) = 맥북(deep 슬롯) 우선 시도, 불가 시 즉시
+        맥미니 primary 로 처리. 2026-06-12 fair-share: 양 머신이 동일 모델
+        (Qwen3.6-27B-6bit)이라 폴백 = 품질 강등이 아니라 단순 분배.
+      True (queue_drain 전용) = 맥북 불가를 StageDeferred 로 올려 drain 이
+        보류 후 run 을 멈춘다 (drain = 맥북 분담 전용 레버 시멘틱 유지).
+    """
    doc = await session.get(Document, document_id)
    if not doc:
        raise ValueError(f"deep_summary: document id={document_id} 없음")
@@ -101,17 +111,40 @@ async def process(document_id: int, session: AsyncSession) -> None:
    )

    client = AIClient()
+    # ds-macbook-offload-1: deep 슬롯 구성 시 맥북 M5 Max 경유(라우터). 부재 시 기존 경로 그대로.
+    deep_cfg = client.ai.deep
+    used_cfg = deep_cfg or settings.ai.primary
    latency_ms = 0
    parse_error: str | None = None
    deep_out = DeepSummaryOutput()

    try:
        start = time.perf_counter()
-        async with acquire_mlx_gate(Priority.BACKGROUND):  # 2026-05-17 B-1: classify-escalate worker
-            raw = await client.call_primary(prompt)
+        if deep_cfg is not None:
+            # 맥북 우선 — 맥미니 mlx gate 미점유(별 endpoint). doc 쓰기는 완주+파싱
+            # 후에만 일어나므로 어느 시점에 끊겨도 부분 쓰기 0.
+            try:
+                raw = await call_deep_or_defer(client, prompt)
+            except StageDeferred:
+                if defer_on_deep_unavailable:
+                    raise  # drain 전용 — 맥북 레버 시멘틱 (보류 후 run 종료)
+                # consumer 경로: 동일 모델이라 강등 아님 — 맥미니가 즉시 처리 (2026-06-12)
+                logger.info(
+                    f"[deep] id={document_id} 맥북 불가 → 맥미니 primary 처리 (fair-share)"
+                )
+                used_cfg = settings.ai.primary
+                async with acquire_mlx_gate(Priority.BACKGROUND):
+                    raw = await client.call_primary(prompt)
+        else:
+            async with acquire_mlx_gate(Priority.BACKGROUND):  # 2026-05-17 B-1: classify-escalate worker
+                raw = await client.call_primary(prompt)
        latency_ms = int((time.perf_counter() - start) * 1000)
+    except StageDeferred:
+        # 보류는 실패가 아님 — analyze_event 미기록(가짜 완료 방지), drain 이 백오프 기록.
+        logger.info(f"[deep] id={document_id} 맥북 일시 불가 — 보류 (deferred)")
+        raise
    except Exception as exc:
-        logger.warning(f"[deep] 26B 호출 실패 id={document_id}: {exc}")
+        logger.warning(f"[deep] 호출 실패 id={document_id} model={used_cfg.model}: {exc}")
        parse_error = "call_failed"
        raw = ""
    finally:
@@ -147,12 +180,13 @@ async def process(document_id: int, session: AsyncSession) -> None:
        doc_id=document_id,
        user_id=None,
        mode="summary_deep",
-        text_limit=settings.ai.primary.context_char_limit or 260000,
+        text_limit=used_cfg.context_char_limit or 260000,
        truncated=False,
        layers_returned=["detail_summary", "inconsistencies"] if not parse_error else [],
        cached=False,
        latency_ms=latency_ms,
-        model_name=settings.ai.primary.model,
+        # deep 슬롯 사용 시 실처리 모델(qwen-macbook alias) 기록 — 어느 머신이 처리했는지 추적
+        model_name=used_cfg.model,
        prompt_version=(f"{DEEP_SUMMARY_TASK}@{pv}" if pv else DEEP_SUMMARY_TASK),
        error_code=parse_error,
        source="document_server",
@@ -10,6 +10,7 @@ global_digests / digest_topics 테이블에 저장한다.

 import asyncio

+from core.config import settings
 from core.utils import setup_logger
 from services.digest.pipeline import run_digest_pipeline

@@ -24,6 +25,9 @@ async def run() -> None:
    pipeline 자체는 timeout 으로 감싸지 않음 (per-call timeout 은 summarizer 가 처리).
    여기서는 전체 hard cap 만 강제.
    """
+    if "digest" in settings.pipeline_held_stages:
+        logger.info("[global_digest] 보류 (pipeline.held_stages) — 이번 실행 skip")
+        return
    try:
        result = await asyncio.wait_for(
            run_digest_pipeline(),
@@ -58,6 +58,23 @@ SCAN_TARGETS: list[tuple[str, str | None]] = [
    ("Videos", "video"),
 ]

+# 안전 자료실 A-2/B-4 — watch 타깃별 (material_type, jurisdiction, license) deterministic 축.
+# 키 = 타깃 경로의 마지막 성분. license = extract_meta.license 에 그대로 주입(None=미주입).
+#   restricted=true → retrieval_service._license_sql 가 RAG 증거·digest 에서 제외(a안 U-2① —
+#   구매자료 verbatim span 차단, 색인 자체는 허용. 개인 파일 열람은 미차단).
+# 사용자 결정(2026-06-13): Books/Papers=proprietary+restricted / Manuals=proprietary·restricted=false
+#   (검색·RAG 활용) / KGS=법정 위임 상세기준 law/KR·KOGL 공공·restricted 아님.
+_TARGET_AXIS: dict[str, tuple[str, str | None, dict | None]] = {
+    "KGS_Code": ("law", "KR", {"scheme": "kogl", "redistribute": True,
+                               "restricted": False, "attribution": "한국가스안전공사(KGS)"}),
+    "Books": ("book", None, {"scheme": "proprietary", "redistribute": False,
+                             "restricted": True, "attribution": "구매 도서"}),
+    "Papers_Purchased": ("paper", None, {"scheme": "proprietary", "redistribute": False,
+                                         "restricted": True, "attribution": "구매 논문"}),
+    "Manuals": ("manual", None, {"scheme": "proprietary", "redistribute": False,
+                                 "restricted": False, "attribution": "기술 매뉴얼"}),
+}
+

 def should_skip(path: Path) -> bool:
    if path.name in SKIP_NAMES or path.name.startswith("._"):
@@ -242,6 +259,11 @@ async def watch_inbox():
            if not scan_root.exists():
                continue

+            # 안전 자료실 A-2/B-4 — 타깃 폴더 기반 (material, jurisdiction, license)
+            target_mt, target_jur, target_license = _TARGET_AXIS.get(
+                Path(sub).name, (None, None, None)
+            )
+
            for file_path in scan_root.rglob("*"):
                if not file_path.is_file() or should_skip(file_path):
                    continue
@@ -275,7 +297,14 @@ async def watch_inbox():
                        source_channel="drive_sync",
                        category=category,
                        needs_conversion=needs_conversion,
+                        # 안전 자료실 A-2/B-4 — watch 타깃 매핑 (KGS=law/KR 등, 비대상=NULL)
+                        material_type=target_mt,
+                        jurisdiction=target_jur,
                    )
+                    # B-4 — 타깃 폴더 license 주입(restricted 포함, 비대상=미주입). classify 는
+                    # material_type IS NULL 일 때만 제안 + extract_meta 미기록이라 주입 보존.
+                    if target_license:
+                        doc.extract_meta = {"license": dict(target_license)}
                    session.add(doc)
                    await session.flush()

@@ -291,6 +320,15 @@ async def watch_inbox():
                        existing.category = category
                    if needs_conversion and not getattr(existing, "needs_conversion", False):
                        existing.needs_conversion = True
+                    # B-4 — 축/license 보정(B-4 이전 적재분이 재변경 시): material 미설정 시 주입,
+                    # license 부재 시에만 merge 주입(clobber 회피 — 기존 extract_meta 키 보존).
+                    if existing.material_type is None and target_mt is not None:
+                        existing.material_type = target_mt
+                        existing.jurisdiction = target_jur
+                    if target_license and not (existing.extract_meta or {}).get("license"):
+                        meta = dict(existing.extract_meta or {})
+                        meta["license"] = dict(target_license)
+                        existing.extract_meta = meta

                    if next_stage:
                        await enqueue_stage(session, existing.id, next_stage)
@@ -0,0 +1,320 @@
+"""fulltext 승격 워커 (A-2 + A-7, plan crawl-24x7-1)
+
+news_collector 가 fulltext_policy='page' 소스의 기사에 enqueue 한 'fulltext' stage 를 소비:
+  기사 페이지 politeness fetch (A-4) → 원본 HTML NAS gzip 보존 (A-7)
+  → extract_worker 4-tier 재사용 (tier 2 sibling .md 는 디스크 원본이 없어 비적용)
+  → extracted_text/md_content 승격 → summarize + (30일 게이트) embed/chunk enqueue.
+
+실패 처리 (큐 어휘 = DB enum, 분기만 워커):
+  - 일시 오류 (5xx/timeout)            : raise → 큐 재시도 (max_attempts 3)
+  - 차단/비대상 (403/429/robots/비HTML/추출부족): RSS 요약으로 격하(degrade) 후 완료
+    → summarize/embed/chunk enqueue 보장 (기사 유실 0). 격하 사유는 extract_meta.fulltext 에 기록.
+  - 영구 실패 (3회 소진)                : 야간 reconcile_unresolved() 가 summarize 안전망 enqueue
+    ([[feedback_silent_skip_accumulation]] — 조건부 skip 이 영구 침묵으로 누적되지 않게).
+
+승격 게이트: 전 tier 공통 본문 >= 200자 (devonagent 와 달리 tier 4 도 게이트 적용 —
+페이월/오류 페이지의 nav 찌꺼기를 본문으로 승격하느니 RSS 요약 격하가 낫다).
+"""
+
+import gzip
+import hashlib
+import re
+from datetime import datetime, timezone
+from pathlib import Path
+
+from sqlalchemy import exists, select
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.orm import aliased
+
+from core.config import settings
+from core.crawl_politeness import (
+    CrawlBlocked,
+    CrawlFetchError,
+    CrawlSkip,
+    fetch_page,
+    fetch_page_via_browser,
+    probe_session,
+)
+from core.database import async_session
+from core.utils import setup_logger
+from models.document import Document
+from models.news_source import NewsSource
+from models.queue import ProcessingQueue, enqueue_stage
+from workers.extract_worker import (
+    _WEB_MIN_BODY_LEN,
+    _extract_web_with_bs4,
+    _extract_web_with_readability,
+    _extract_web_with_trafilatura,
+)
+
+logger = setup_logger("fulltext_worker")
+
+# 한국 기사 푸터 1층 후처리 (A-2) — 보수적으로 라인 단위만 제거
+_FOOTER_PATTERNS = [
+    re.compile(r"^.{0,120}(무단\s*전재|무단\s*복제|재배포\s*금지|저작권자\s*[ⓒ©(]).*$", re.M),
+    re.compile(r"^[\w.+-]+@[\w.-]+\.[A-Za-z]{2,}\s*$", re.M),  # 단독 이메일 라인
+    re.compile(r"^\s*\S{2,4}\s*기자\s*$", re.M),               # 단독 '◯◯◯ 기자' 라인
+]
+
+
+def _strip_article_footer(body: str) -> str:
+    for pat in _FOOTER_PATTERNS:
+        body = pat.sub("", body)
+    return re.sub(r"\n{3,}", "\n\n", body).strip()
+
+
+def _extract_body(html_text: str) -> tuple[str, str | None, str | None]:
+    """(body, engine, engine_version). 전 tier >= 200자 게이트, 미달이면 ("", None, None)."""
+    body, ver = _extract_web_with_trafilatura(html_text)
+    if body and len(body) >= _WEB_MIN_BODY_LEN:
+        return body, "trafilatura", ver
+    body, ver = _extract_web_with_readability(html_text)
+    if body and len(body) >= _WEB_MIN_BODY_LEN:
+        return body, "readability", ver
+    body, ver = _extract_web_with_bs4(html_text)
+    if body and len(body) >= _WEB_MIN_BODY_LEN:
+        return body, "bs4_text", ver
+    return "", None, None
+
+
+def _raw_html_path(source_id: int | None, file_hash: str, now: datetime) -> Path:
+    """A-7 원본 보존 경로 — NAS 본진. 한글 디렉토리의 NFC/NFD 비대칭을 피해 source_id 사용.
+
+    file_hash 는 DB 컬럼이 character(64) 라 32자 해시가 공백 패딩되어 돌아옴 — strip 필수
+    (미적용 시 NAS 파일명에 공백 32개 = 쉘/rsync 함정).
+    """
+    src_dir = f"src_{source_id}" if source_id is not None else "src_unknown"
+    return (
+        Path(settings.nas_mount_path) / "crawl_raw" / src_dir
+        / now.strftime("%Y-%m") / f"{file_hash.strip()}.html.gz"
+    )
+
+
+def _save_raw_html(path: Path, html_text: str) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with gzip.open(path, "wb") as f:
+        f.write(html_text.encode("utf-8", errors="replace"))
+
+
+async def _enqueue_downstream(session: AsyncSession, doc: Document) -> None:
+    """승격/격하 공통 후속 — summarize 무조건 + 30일 게이트 통과 시 embed/chunk."""
+    await enqueue_stage(session, doc.id, "summarize")
+    published_raw = (doc.extract_meta or {}).get("published_at")
+    if doc.source_channel == "crawl":
+        # 도메인 재료 코퍼스 — 발행일 무관 전량 색인 (30일 게이트는 뉴스 전용)
+        await enqueue_stage(session, doc.id, "embed")
+        await enqueue_stage(session, doc.id, "chunk")
+        return
+    days_old = 0
+    if published_raw:
+        try:
+            pub_dt = datetime.fromisoformat(published_raw)
+            days_old = (datetime.now(timezone.utc) - pub_dt).days
+        except ValueError:
+            days_old = 0  # 파싱 불가 = 신규 취급 (수집 시점 기본과 동일)
+    if days_old <= 30:
+        await enqueue_stage(session, doc.id, "embed")
+        await enqueue_stage(session, doc.id, "chunk")
+
+
+def _set_fulltext_meta(doc: Document, **fields) -> None:
+    """extract_meta.fulltext 갱신 — JSONB 변경 감지를 위해 dict 재할당."""
+    meta = dict(doc.extract_meta or {})
+    meta["fulltext"] = {**meta.get("fulltext", {}), **fields}
+    doc.extract_meta = meta
+
+
+_PROBE_TTL_SECONDS = 6 * 3600  # probe 유효 시간 — 만료 시 배치 경계에서 재검증
+
+
+async def _auth_session_ready(session: AsyncSession, source: NewsSource) -> tuple[bool, str]:
+    """B-3 ② 내용 기반 probe 게이트 + relogin_requested 소비 (수동 half-open).
+
+    플래그 소비는 '불가용 스킵' 분기보다 앞 — 어댑터 틱마다 도달 (r5 데드 버튼 함정 고정).
+    probe 실패 상태에서는 auth fetch 0회 (자동 재시도 루프 = 계정 잠금 직행 — B-3 ③).
+    복구 경로 = storage_state 갱신 후 relogin_requested 플래그 set (수동).
+    probe 설정은 source.selector_override JSONB: probe_url / min_body_chars / paywall_markers.
+    """
+    from workers.news_collector import _get_or_create_health
+
+    health = await _get_or_create_health(session, source.id)
+    now = datetime.now(timezone.utc)
+    cfg = source.selector_override or {}
+    probe_url = cfg.get("probe_url")
+
+    force = False
+    if health.relogin_requested:
+        health.relogin_requested = False  # 소비 = 1회 half-open 시도
+        health.updated_at = now
+        force = True
+        logger.info(f"[fulltext/auth] {source.name} relogin_requested 소비 — half-open probe")
+
+    if not force:
+        if health.last_probe_ok is False:
+            return False, "probe 실패 상태 (storage_state 갱신 + relogin_requested 대기)"
+        if (
+            health.last_probe_ok
+            and health.last_probe_at
+            and (now - health.last_probe_at).total_seconds() < _PROBE_TTL_SECONDS
+        ):
+            return True, ""
+
+    if not probe_url:
+        return False, "selector_override.probe_url 미설정"
+
+    result = await probe_session(
+        source.auth_profile,
+        probe_url,
+        int(cfg.get("min_body_chars", 800)),
+        list(cfg.get("paywall_markers", [])),
+    )
+    health.last_probe_at = now
+    health.last_probe_ok = bool(result.get("ok"))
+    health.updated_at = now
+    if not health.last_probe_ok:
+        logger.warning(f"[fulltext/auth] {source.name} probe 실패: {result.get('reason')}")
+        return False, str(result.get("reason"))
+    logger.info(f"[fulltext/auth] {source.name} probe OK ({result.get('body_chars')}자)")
+    return True, ""
+
+
+async def _degrade(session: AsyncSession, doc: Document, reason: str) -> None:
+    """본문 승격 실패 — RSS 요약 그대로 후속 단계 진행 (기사 유실 0)."""
+    _set_fulltext_meta(
+        doc, status="degraded", reason=reason[:300],
+        resolved_at=datetime.now(timezone.utc).isoformat(),
+    )
+    await _enqueue_downstream(session, doc)
+    logger.warning(f"[fulltext] doc={doc.id} 격하(RSS 요약 유지): {reason}")
+
+
+async def process(document_id: int, session: AsyncSession) -> None:
+    """기사 1건 풀텍스트 승격. queue_consumer 컨벤션 시그니처 (커밋은 consumer 가)."""
+    doc = await session.get(Document, document_id)
+    if not doc:
+        raise ValueError(f"문서 ID {document_id}를 찾을 수 없음")
+    if not doc.edit_url:
+        await _degrade(session, doc, "edit_url 없음")
+        return
+
+    meta = doc.extract_meta or {}
+    source_id = meta.get("source_id")
+
+    # B-3: 구독 소스(auth_profile)는 Playwright 세션 fetch — probe 게이트 선행
+    source = await session.get(NewsSource, source_id) if source_id else None
+    auth_profile = source.auth_profile if source is not None else None
+
+    if auth_profile:
+        ready, why = await _auth_session_ready(session, source)
+        if not ready:
+            await _degrade(session, doc, f"구독 세션 불가용: {why}")
+            return
+
+    try:
+        if auth_profile:
+            html_text, final_url = await fetch_page_via_browser(doc.edit_url, auth_profile)
+        else:
+            html_text, final_url = await fetch_page(doc.edit_url)
+    except (CrawlBlocked, CrawlSkip) as e:
+        await _degrade(session, doc, f"{type(e).__name__}: {e}")
+        return
+    except CrawlFetchError:
+        raise  # 일시 오류 — 큐 재시도
+
+    now = datetime.now(timezone.utc)
+
+    # A-7: 원본 HTML 보존 (추출기 교체 시 전체 재추출 가능 상태 유지)
+    raw_path = _raw_html_path(source_id, doc.file_hash, now)
+    try:
+        _save_raw_html(raw_path, html_text)
+        raw_saved = True
+    except OSError as e:
+        # NAS 일시 장애 시 보존만 누락하고 승격은 진행 — 사유 기록 (silent 누락 회피)
+        raw_saved = False
+        logger.error(f"[fulltext] doc={doc.id} 원본 보존 실패 (승격은 진행): {e}")
+
+    body, engine, engine_ver = _extract_body(html_text)
+    if not engine:
+        await _degrade(session, doc, f"추출 실패 (전 tier < {_WEB_MIN_BODY_LEN}자)")
+        return
+
+    clean_body = _strip_article_footer(body.replace("\x00", ""))
+    if len(clean_body) < _WEB_MIN_BODY_LEN:
+        await _degrade(session, doc, "푸터 제거 후 본문 부족")
+        return
+
+    # B-3: 추출 결과도 페이월 마커로 게이트 — probe 통과 후 만료된 세션의
+    # '페이월 안내문' 본문 승격(silent corruption) 차단 + 즉시 probe 상태 강등
+    if auth_profile:
+        from workers.news_collector import _get_or_create_health
+
+        markers = (source.selector_override or {}).get("paywall_markers", [])
+        hit = next((m for m in markers if m and m.lower() in clean_body.lower()), None)
+        if hit:
+            health = await _get_or_create_health(session, source.id)
+            health.last_probe_ok = False
+            health.updated_at = datetime.now(timezone.utc)
+            await _degrade(session, doc, f"본문 페이월 마커 검출({hit}) — 세션 손상 의심")
+            return
+
+    title = doc.title or ""
+    doc.extracted_text = f"{title}\n\n{clean_body}" if title else clean_body
+    doc.extracted_at = now
+    doc.extractor_version = f"rss+page@{engine}"
+    doc.md_content = clean_body
+    doc.md_status = "success"
+    doc.md_extraction_engine = engine
+    doc.md_extraction_engine_version = engine_ver
+    doc.md_format_version = "1.0"
+    doc.md_generated_at = now
+    doc.md_source_hash = hashlib.sha256(html_text.encode("utf-8", errors="replace")).hexdigest()
+    doc.md_content_hash = hashlib.sha256(clean_body.encode("utf-8")).hexdigest()
+    doc.md_extraction_error = None  # 수집 시점의 '변환 비대상' 마커 해제
+    doc.content_origin = "extracted"
+    doc.file_size = len(doc.extracted_text.encode())
+    _set_fulltext_meta(
+        doc, status="promoted", engine=engine,
+        raw_html_path=str(raw_path) if raw_saved else None,
+        final_url=final_url, body_chars=len(clean_body),
+        resolved_at=now.isoformat(),
+    )
+
+    await _enqueue_downstream(session, doc)
+    logger.info(
+        f"[fulltext/{engine}] doc={doc.id} {len(clean_body)}자 승격 "
+        f"(raw={'saved' if raw_saved else 'MISSING'})"
+    )
+
+
+async def reconcile_unresolved() -> None:
+    """안전망 (야간 1회): fulltext 영구 실패(3회 소진)로 summarize 가 영영 안 잡힌
+    뉴스 문서에 RSS 요약 기준 후속 단계를 enqueue. 멱등 — enqueue 후엔 조건 불일치."""
+    async with async_session() as session:
+        # 외부 쿼리 FROM 에 ProcessingQueue 가 이미 있어 alias 없이는 auto-correlation 이
+        # 서브쿼리 FROM 을 전부 제거 → InvalidRequestError (queue_consumer.reset_stale_items 패턴)
+        pq = aliased(ProcessingQueue)
+        summarize_q = (
+            select(pq.id)
+            .where(
+                pq.document_id == Document.id,
+                pq.stage == "summarize",
+            )
+        )
+        result = await session.execute(
+            select(Document)
+            .join(ProcessingQueue, ProcessingQueue.document_id == Document.id)
+            .where(
+                ProcessingQueue.stage == "fulltext",
+                ProcessingQueue.status == "failed",
+                Document.source_channel == "news",
+                ~exists(summarize_q),
+            )
+            .limit(200)
+        )
+        docs = result.scalars().unique().all()
+        for doc in docs:
+            _set_fulltext_meta(doc, status="failed_reconciled")
+            await _enqueue_downstream(session, doc)
+        if docs:
+            await session.commit()
+            logger.warning(f"[fulltext] reconcile: 영구 실패 {len(docs)}건 RSS 요약으로 후속 enqueue")
@@ -0,0 +1,488 @@
+"""C-2 KOSHA Open API 수집 워커 (plan crawl-24x7-1).
+
+4 API (2026-06-10/06-13 실키 live 검증 + fixture 박제 — tests/fixtures/kosha_*_response.json):
+  재해사례 게시판: GET /B552468/disaster_api02/getdisaster_api02            callApiId=1060
+  재해사례 첨부:   GET /B552468/disaster_attach_api02/Disaster_attach_api02 callApiId=1070
+  KOSHA GUIDE:    GET /B552468/koshaguide/getKoshaGuide                    callApiId=1050
+  사망사고 속보:   GET /B552468/news_api02/getNews_api02                     callApiId=1040
+
+daily 스케줄 1회 (main.py):
+  재해사례 = 최근 페이지만 diff (boardno dedup) — 사례 본문 Document(텍스트 네이티브)
+            + 첨부 PDF/HWP 다운로드 → /documents/crawl_raw/kosha/{boardno}/ 저장
+            → 파일 Document + extract enqueue (kordoc HWP/PDF 기존 파이프라인 재사용).
+  사망사고 = 최근 페이지만 diff (arno dedup) — 속보 본문 Document(HTML → _clean_html).
+            첨부 API 없음·business 필드 없음. 등록일 = arno 접두 8자리(YYYYMMDD).
+  GUIDE   = 전체 레지스트리 메타 diff (1039건, 100/page = 11 call) → 신규/개정만,
+            일일 ingest cap(기본 25) = backlog 자동 점진 백필(~6주) + 부하 평탄화.
+            cap 으로 미처리 잔량은 매회 로그 (silent cap 금지).
+
+키: KOSHA_API_KEY (credentials.env) — 공공데이터포털 '인코딩' 키를 그대로 저장.
+    httpx params= 로 넘기면 % 가 재인코딩되므로 반드시 URL 문자열에 직접 결합.
+개정 감지: GUIDE dedup 키 = 규정번호+공표일자 — 같은 번호의 새 공표일자 = 신규 문서로 적재.
+"""
+
+import asyncio
+import hashlib
+import os
+import random
+import re
+from datetime import date, datetime, timezone
+from pathlib import Path
+
+import httpx
+from sqlalchemy import select
+
+from core.config import settings
+from core.crawl_politeness import CRAWL_UA
+from core.database import async_session
+from core.utils import setup_logger
+from models.document import Document
+from models.news_source import NewsSource
+from models.queue import enqueue_stage
+from workers.news_collector import (
+    FeedError,
+    _clean_html,
+    _get_or_create_health,
+    _record_failure,
+    _record_success,
+)
+
+logger = setup_logger("kosha_collector")
+
+_BASE = "https://apis.data.go.kr/B552468"
+_BOARD_EP = f"{_BASE}/disaster_api02/getdisaster_api02"
+_ATTACH_EP = f"{_BASE}/disaster_attach_api02/Disaster_attach_api02"
+_GUIDE_EP = f"{_BASE}/koshaguide/getKoshaGuide"
+_FATAL_EP = f"{_BASE}/news_api02/getNews_api02"
+
+_CASE_SOURCE = "KOSHA 재해사례"
+_GUIDE_SOURCE = "KOSHA GUIDE"
+_FATAL_SOURCE = "KOSHA 사망사고"
+
+_CASE_PAGES = 2          # daily diff 범위 (30×2 = 최근 60건 — 등록일 역순 API)
+_CASE_ROWS = 30
+_FATAL_PAGES = 2         # 사망사고 속보 daily diff (30×2 = 최근 60건 — 등록일 역순)
+_FATAL_ROWS = 30
+_GUIDE_ROWS = 100
+_GUIDE_DAILY_CAP = int(os.getenv("KOSHA_GUIDE_DAILY_CAP", "25"))
+_MAX_FILE_BYTES = 50 * 1024 * 1024
+_DOWNLOAD_DELAY = (2.0, 5.0)  # portal.kosha.or.kr 파일서버 — 연속 다운로드 간격
+
+# 안전 자료실 A-2 — KOSHA 산출물 라이선스 (KOGL 유형 미확정 → 보수적 redistribute=False,
+# 근거 확보 시 완화. 0-3 license 메타 deterministic 주입).
+_KOSHA_LICENSE = {"scheme": "kogl", "redistribute": False, "attribution": "한국산업안전보건공단(KOSHA)"}
+
+
+def _ymd_to_date(ymd: str | None) -> date | None:
+    """'YYYYMMDD'/'YYYY-MM-DD' → date. 형식 불일치는 None (fail-quiet — 날짜는 보조 축)."""
+    digits = re.sub(r"\D", "", ymd or "")
+    if len(digits) != 8:
+        return None
+    try:
+        return date(int(digits[:4]), int(digits[4:6]), int(digits[6:8]))
+    except ValueError:
+        return None
+
+
+def _api_key() -> str:
+    key = os.getenv("KOSHA_API_KEY", "")
+    if not key:
+        raise FeedError("KOSHA_API_KEY 미설정 — KOSHA 수집 불가")
+    return key
+
+
+async def _api_get(url: str) -> dict:
+    """공통 GET — 게이트웨이/제공자 이중 에러 체계 검사."""
+    async with httpx.AsyncClient(timeout=25) as client:
+        resp = await client.get(url, headers={"User-Agent": CRAWL_UA})
+    if resp.status_code != 200:
+        raise FeedError(f"KOSHA API {resp.status_code} @ {url.split('?')[0]}")
+    try:
+        payload = resp.json()
+    except ValueError as e:
+        # 게이트웨이 에러는 XML/plain 으로 옴 (SERVICE_KEY_IS_NOT_REGISTERED 등)
+        raise FeedError(f"KOSHA API 비-JSON 응답: {resp.text[:120]}") from e
+    code = (payload.get("header") or {}).get("resultCode")
+    if code != "00":
+        raise FeedError(f"KOSHA API resultCode={code}: {(payload.get('header') or {}).get('resultMsg')}")
+    return payload
+
+
+def _items(payload: dict) -> list[dict]:
+    """body.items.item — 단건이면 dict 로 오는 data.go.kr 관행 방어."""
+    item = ((payload.get("body") or {}).get("items") or {}).get("item")
+    if item is None:
+        return []
+    return [item] if isinstance(item, dict) else list(item)
+
+
+def _fatal_fields(item: dict) -> dict | None:
+    """사망사고 item(arno/keyword/contents 3필드 고정) → Document 필드 매핑.
+
+    순수 함수(httpx/DB 불요 — fixture 단위 테스트 대상). 필수 = arno+keyword,
+    부재 시 None(skip). 날짜 전용 필드가 없어 등록 식별자 arno 접두에서 유도:
+    arno = 'YYYYMMDDHHMMSS' + 임의 6자 (2019~ 라이브 전수 동형 검증). 접두 8자리=KST
+    등록일 → published_date, 14자리=등록시각 → reg_dt(원문 그대로, tz 해석 미주장).
+    """
+    arno = str(item.get("arno") or "").strip()
+    title = (item.get("keyword") or "").strip()
+    if not arno or not title:
+        return None
+    text = _clean_html(item.get("contents") or "", max_len=None)
+    reg_dt = arno[:14] if re.fullmatch(r"\d{14}", arno[:14]) else None
+    return {
+        "arno": arno,
+        "title": title,
+        "text": text,
+        "published_date": _ymd_to_date(arno[:8]),
+        "reg_dt": reg_dt,
+    }
+
+
+def _safe_filename(name: str) -> str:
+    """NAS 파일명 정화 — 경로분리자/제어문자/공백연쇄 제거 (쉘 함정 회피)."""
+    name = re.sub(r"[/\\\x00-\x1f]", "_", name).strip()
+    name = re.sub(r"\s+", " ", name)
+    return name[:140] or "unnamed"
+
+
+async def _download(url: str, dest: Path) -> int:
+    """첨부/규정 파일 다운로드 — 크기 cap + 디렉토리 생성 + 연속 간격."""
+    await asyncio.sleep(random.uniform(*_DOWNLOAD_DELAY))
+    async with httpx.AsyncClient(timeout=60, follow_redirects=True) as client:
+        resp = await client.get(url, headers={"User-Agent": CRAWL_UA})
+    if resp.status_code != 200:
+        raise FeedError(f"파일 다운로드 {resp.status_code}: {url}")
+    if len(resp.content) > _MAX_FILE_BYTES:
+        raise FeedError(f"파일 크기 초과 ({len(resp.content)} bytes): {url}")
+    dest.parent.mkdir(parents=True, exist_ok=True)
+    dest.write_bytes(resp.content)
+    return len(resp.content)
+
+
+async def _get_or_create_source(session, name: str, feed_url: str) -> NewsSource:
+    result = await session.execute(select(NewsSource).where(NewsSource.name == name))
+    source = result.scalars().first()
+    if source is None:
+        source = NewsSource(
+            name=name, feed_url=feed_url, feed_type="rss", fetch_method="api",
+            fulltext_policy="none", source_channel="crawl", category="Safety",
+            language="ko", country="KR",
+            enabled=False,  # 6h 뉴스 사이클 비대상 — 본 워커가 daily 폴링
+        )
+        session.add(source)
+        await session.flush()
+    return source
+
+
+async def _ingest_attachment(session, boardno: str, filenm: str, filepath: str) -> bool:
+    """첨부 1건 → NAS 저장 + 파일 Document + extract enqueue. 반환 = 신규 여부."""
+    safe = _safe_filename(filenm)
+    rel_path = f"crawl_raw/kosha/{boardno}/{safe}"
+    existing = await session.execute(
+        select(Document).where(Document.file_path == rel_path).limit(1)
+    )
+    if existing.scalars().first():
+        return False
+
+    dest = Path(settings.nas_mount_path) / rel_path
+    size = await _download(filepath, dest)
+    ext = (safe.rsplit(".", 1)[-1].lower() if "." in safe else "bin")[:10]
+
+    doc = Document(
+        file_path=rel_path,
+        file_hash=hashlib.sha256(dest.read_bytes()).hexdigest(),
+        file_format=ext,
+        file_size=size,
+        file_type="immutable",
+        title=safe.rsplit(".", 1)[0],
+        source_channel="crawl",
+        data_origin="external",
+        import_source="kosha_api",
+        edit_url=filepath,
+        ai_tags=["Safety/KOSHA재해사례/첨부"],
+        # 안전 자료실 A-2 — ingest 시점 deterministic (classify 경유해도 LLM 비의존)
+        material_type="incident",
+        jurisdiction="KR",
+        extract_meta={"kosha": {"boardno": boardno, "kind": "case_attachment"},
+                      "license": dict(_KOSHA_LICENSE)},
+    )
+    session.add(doc)
+    await session.flush()
+    # extract → (crawl override) classify → embed/chunk — 기존 파일 파이프라인 재사용
+    await enqueue_stage(session, doc.id, "extract")
+    logger.info(f"[kosha] 첨부 ingest: {rel_path} ({size} bytes)")
+    return True
+
+
+async def collect_disaster_cases(session) -> int:
+    """재해사례 daily diff — 최근 _CASE_PAGES 페이지, boardno dedup."""
+    key = _api_key()
+    source = await _get_or_create_source(session, _CASE_SOURCE, _BOARD_EP)
+    new_count = 0
+
+    for page in range(1, _CASE_PAGES + 1):
+        payload = await _api_get(
+            f"{_BOARD_EP}?serviceKey={key}&callApiId=1060&pageNo={page}&numOfRows={_CASE_ROWS}"
+        )
+        items = _items(payload)
+        if not items:
+            break
+        page_all_dup = True
+        for item in items:
+            boardno = str(item.get("boardno") or "").strip()
+            title = (item.get("keyword") or "").strip()
+            if not boardno or not title:
+                continue
+            fhash = hashlib.sha256(f"kosha-case|{boardno}".encode()).hexdigest()[:32]
+            existing = await session.execute(
+                select(Document).where(Document.file_hash == fhash).limit(1)
+            )
+            if existing.scalars().first():
+                continue
+            page_all_dup = False
+
+            contents = (item.get("contents") or "").strip()
+            business = (item.get("business") or "").strip()
+            now = datetime.now(timezone.utc)
+            doc = Document(
+                file_path=f"crawl/{_CASE_SOURCE}/{boardno}",
+                file_hash=fhash,
+                file_format="article",
+                file_size=len(contents.encode()),
+                file_type="note",
+                title=title,
+                extracted_text=f"{title}\n\n[{business}]\n{contents}",
+                extracted_at=now,
+                extractor_version="kosha_api",
+                md_status="skipped",
+                md_extraction_error="kosha case: 텍스트 네이티브, markdown 변환 비대상",
+                source_channel="crawl",
+                data_origin="external",
+                review_status="approved",
+                ai_domain="Safety",
+                ai_sub_group=_CASE_SOURCE,
+                ai_tags=[f"Safety/KOSHA재해사례/{business or '기타'}"],
+                # 안전 자료실 A-2 — ingest 시점 deterministic (classify-skip 경로)
+                material_type="incident",
+                jurisdiction="KR",
+                extract_meta={
+                    "source_id": source.id,
+                    "source_name": _CASE_SOURCE,
+                    "published_at": None,
+                    "kosha": {"boardno": boardno, "business": business,
+                              "atcflcnt": item.get("atcflcnt")},
+                    "license": dict(_KOSHA_LICENSE),
+                },
+            )
+            session.add(doc)
+            await session.flush()
+            await enqueue_stage(session, doc.id, "summarize")
+            await enqueue_stage(session, doc.id, "embed")
+            await enqueue_stage(session, doc.id, "chunk")
+            new_count += 1
+
+            # 첨부 (PDF/HWP) — 본문보다 정보량 큰 정식 사례 보고서
+            if int(item.get("atcflcnt") or 0) > 0:
+                attach = await _api_get(
+                    f"{_ATTACH_EP}?serviceKey={key}&callApiId=1070"
+                    f"&pageNo=1&numOfRows=10&boardno={boardno}"
+                )
+                for att in _items(attach):
+                    filenm = (att.get("filenm") or "").strip()
+                    filepath = (att.get("filepath") or "").strip()
+                    if not filenm or not filepath.startswith("https://"):
+                        continue
+                    try:
+                        await _ingest_attachment(session, boardno, filenm, filepath)
+                    except FeedError as e:
+                        logger.warning(f"[kosha] 첨부 실패 skip ({boardno}/{filenm}): {e}")
+        if page_all_dup:
+            break  # 등록일 역순 — 페이지 전체가 기존이면 이후 페이지도 기존
+
+    logger.info(f"[kosha] 재해사례 신규 {new_count}건")
+    return new_count
+
+
+async def collect_fatal_accidents(session) -> int:
+    """사망사고 속보 daily diff — 최근 _FATAL_PAGES 페이지, arno dedup.
+
+    재해사례(1060)와 별 채널(1040): business 필드·첨부 API 없음, contents=HTML.
+    본문 = 텍스트 네이티브(_clean_html) → md 변환 비대상, summarize/embed/chunk 큐.
+    """
+    key = _api_key()
+    source = await _get_or_create_source(session, _FATAL_SOURCE, _FATAL_EP)
+    new_count = 0
+
+    for page in range(1, _FATAL_PAGES + 1):
+        payload = await _api_get(
+            f"{_FATAL_EP}?serviceKey={key}&callApiId=1040&pageNo={page}&numOfRows={_FATAL_ROWS}"
+        )
+        items = _items(payload)
+        if not items:
+            break
+        page_all_dup = True
+        for item in items:
+            fields = _fatal_fields(item)
+            if fields is None:
+                continue
+            arno = fields["arno"]
+            fhash = hashlib.sha256(f"kosha-fatal|{arno}".encode()).hexdigest()[:32]
+            existing = await session.execute(
+                select(Document).where(Document.file_hash == fhash).limit(1)
+            )
+            if existing.scalars().first():
+                continue
+            page_all_dup = False
+
+            text = fields["text"]
+            now = datetime.now(timezone.utc)
+            doc = Document(
+                file_path=f"crawl/{_FATAL_SOURCE}/{arno}",
+                file_hash=fhash,
+                file_format="article",
+                file_size=len(text.encode()),
+                file_type="note",
+                title=fields["title"],
+                extracted_text=f"{fields['title']}\n\n{text}",
+                extracted_at=now,
+                extractor_version="kosha_api",
+                md_status="skipped",
+                md_extraction_error="kosha fatal: 텍스트 네이티브, markdown 변환 비대상",
+                source_channel="crawl",
+                data_origin="external",
+                review_status="approved",
+                ai_domain="Safety",
+                ai_sub_group=_FATAL_SOURCE,
+                ai_tags=["Safety/KOSHA사망사고"],
+                # 안전 자료실 A-2 — ingest 시점 deterministic (classify-skip 경로)
+                material_type="incident",
+                jurisdiction="KR",
+                published_date=fields["published_date"],
+                extract_meta={
+                    "source_id": source.id,
+                    "source_name": _FATAL_SOURCE,
+                    "published_at": None,
+                    "kosha": {"arno": arno, "kind": "fatal_accident",
+                              "reg_dt": fields["reg_dt"]},
+                    "license": dict(_KOSHA_LICENSE),
+                },
+            )
+            session.add(doc)
+            await session.flush()
+            await enqueue_stage(session, doc.id, "summarize")
+            await enqueue_stage(session, doc.id, "embed")
+            await enqueue_stage(session, doc.id, "chunk")
+            new_count += 1
+        if page_all_dup:
+            break  # 등록일 역순 — 페이지 전체가 기존이면 이후 페이지도 기존
+
+    logger.info(f"[kosha] 사망사고 신규 {new_count}건")
+    return new_count
+
+
+async def collect_kosha_guide(session, cap: int = _GUIDE_DAILY_CAP) -> int:
+    """GUIDE 레지스트리 전체 메타 diff → 신규/개정만 다운로드 (일일 cap 점진 백필)."""
+    key = _api_key()
+    await _get_or_create_source(session, _GUIDE_SOURCE, _GUIDE_EP)
+    new_specs: list[dict] = []
+    page, total = 1, None
+
+    while True:
+        payload = await _api_get(
+            f"{_GUIDE_EP}?serviceKey={key}&callApiId=1050&pageNo={page}&numOfRows={_GUIDE_ROWS}"
+        )
+        if total is None:
+            total = int((payload.get("body") or {}).get("totalCount") or 0)
+        items = _items(payload)
+        if not items:
+            break
+        for item in items:
+            no = (item.get("techGdlnNo") or "").strip()
+            ymd = (item.get("techGdlnOfancYmd") or "").strip()
+            url = (item.get("fileDownloadUrl") or "").strip()
+            if not no or not url.startswith("https://"):
+                continue
+            fhash = hashlib.sha256(f"kosha-guide|{no}|{ymd}".encode()).hexdigest()[:32]
+            existing = await session.execute(
+                select(Document).where(Document.file_hash == fhash).limit(1)
+            )
+            if not existing.scalars().first():
+                new_specs.append({"no": no, "ymd": ymd, "url": url,
+                                  "name": (item.get("techGdlnNm") or no).strip(),
+                                  "fhash": fhash})
+        if page * _GUIDE_ROWS >= total:
+            break
+        page += 1
+
+    todo, deferred = new_specs[:cap], len(new_specs) - min(len(new_specs), cap)
+    ingested = 0
+    for spec in todo:
+        safe_no = _safe_filename(spec["no"])
+        rel_path = f"crawl_raw/kosha_guide/{safe_no}-{spec['ymd'] or 'nodate'}.pdf"
+        dest = Path(settings.nas_mount_path) / rel_path
+        try:
+            size = await _download(spec["url"], dest)
+        except FeedError as e:
+            logger.warning(f"[kosha] GUIDE 다운로드 실패 skip ({spec['no']}): {e}")
+            continue
+        doc = Document(
+            file_path=rel_path,
+            file_hash=spec["fhash"],
+            file_format="pdf",
+            file_size=size,
+            file_type="immutable",
+            title=f"{spec['name']} ({spec['no']})",
+            source_channel="crawl",
+            data_origin="external",
+            import_source="kosha_api",
+            edit_url=spec["url"],
+            ai_tags=["Safety/KOSHA GUIDE"],
+            # 안전 자료실 A-2 — GUIDE = 구속력 없는 권고 기술지침 (law 아님, plan 0-1)
+            material_type="guide",
+            jurisdiction="KR",
+            published_date=_ymd_to_date(spec["ymd"]),
+            extract_meta={"kosha": {"kind": "guide", "techGdlnNo": spec["no"],
+                                    "ofancYmd": spec["ymd"]},
+                          "license": dict(_KOSHA_LICENSE)},
+        )
+        session.add(doc)
+        await session.flush()
+        await enqueue_stage(session, doc.id, "extract")
+        ingested += 1
+
+    # silent cap 금지 — 잔량 가시화 (자동 점진 백필: 내일 cap 만큼 또 소화)
+    logger.info(f"[kosha] GUIDE 신규/개정 {len(new_specs)}건 중 {ingested}건 ingest"
+                + (f" (cap {cap}, 잔여 {deferred}건 — 일일 점진 백필)" if deferred > 0 else ""))
+    return ingested
+
+
+async def run() -> None:
+    """daily 1회 — 소스별 실패 격리 (재해사례 실패가 GUIDE 를 막지 않게)."""
+    now = datetime.now(timezone.utc)
+    for name, collector in ((_CASE_SOURCE, collect_disaster_cases),
+                            (_FATAL_SOURCE, collect_fatal_accidents),
+                            (_GUIDE_SOURCE, collect_kosha_guide)):
+        async with async_session() as session:
+            result = await session.execute(select(NewsSource).where(NewsSource.name == name))
+            source = result.scalars().first()
+            try:
+                count = await collector(session)
+                if source is None:  # 첫 실행에서 collector 가 생성
+                    result = await session.execute(
+                        select(NewsSource).where(NewsSource.name == name))
+                    source = result.scalars().first()
+                health = await _get_or_create_health(session, source.id)
+                _record_success(health, count, False, now)
+                await session.commit()
+            except Exception as e:
+                logger.error(f"[kosha] {name} 수집 실패: {e}")
+                await session.rollback()  # 부분 적재 폐기 후 health 만 기록
+                if source is not None:
+                    health = await _get_or_create_health(session, source.id)
+                    _record_failure(health, str(e) or repr(e), now)
+                    await session.commit()
+
+
+if __name__ == "__main__":
+    asyncio.run(run())
@@ -6,7 +6,7 @@

 import os
 import re
-from datetime import datetime, timezone
+from datetime import date, datetime, timezone
 from pathlib import Path
 from xml.etree import ElementTree as ET

@@ -15,7 +15,7 @@ from sqlalchemy import select

 from core.config import settings
 from core.database import async_session
-from core.utils import create_caldav_todo, escape_ical_text, file_hash, send_smtp_email, setup_logger
+from core.utils import create_caldav_todo, file_hash, setup_logger
 from models.automation import AutomationState
 from models.document import Document
 from models.queue import enqueue_stage
@@ -262,6 +262,16 @@ async def _save_law_split(
                f"개정구분: {revision_type}"
            )

+        # 안전 자료실 A-2 — 공포일 파싱 (law published_date = COALESCE(시행일, 공포일) 계약,
+        # 본 레거시 워커는 공포일만 보유 — 시행일 기반 버전 체인은 B-1 statute_collector 소관)
+        _digits = re.sub(r"\D", "", str(proclamation_date or ""))
+        pub_date = None
+        if len(_digits) == 8:
+            try:
+                pub_date = date(int(_digits[:4]), int(_digits[4:6]), int(_digits[6:8]))
+            except ValueError:
+                pub_date = None
+
        doc = Document(
            file_path=rel_path,
            file_hash=file_hash(file_path),
@@ -272,6 +282,13 @@ async def _save_law_split(
            source_channel="law_monitor",
            data_origin="work",
            category="law",
+            # 안전 자료실 A-2 — ingest 시점 deterministic. 법령 텍스트 = 저작권법 제7조
+            # 비보호 저작물 (public domain). 본 워커는 휴면(LAW_OC 미설정)이나 코드 경로 유지.
+            material_type="law",
+            jurisdiction="KR",
+            published_date=pub_date,
+            extract_meta={"license": {"scheme": "public_domain", "redistribute": True,
+                                      "attribution": "국가법령정보센터"}},
            user_note=note or None,
        )
        session.add(doc)
@@ -337,8 +354,7 @@ def _safe_name(name: str) -> str:


 def _send_notifications(law_name: str, proclamation_date: str, revision_type: str):
-    """CalDAV + SMTP 알림"""
-    # CalDAV
+    """CalDAV 할일 알림 (SMTP 발송은 2026-06-10 폐기 — CalDAV 가 단일 알림 채널)"""
    caldav_url = os.getenv("CALDAV_URL", "")
    caldav_user = os.getenv("CALDAV_USER", "")
    caldav_pass = os.getenv("CALDAV_PASS", "")
@@ -349,15 +365,3 @@ def _send_notifications(law_name: str, proclamation_date: str, revision_type: st
            description=f"공포일자: {proclamation_date}, 개정구분: {revision_type}",
            due_days=7,
        )
-
-    # SMTP
-    smtp_host = os.getenv("MAILPLUS_HOST", "")
-    smtp_port = int(os.getenv("MAILPLUS_SMTP_PORT", "465"))
-    smtp_user = os.getenv("MAILPLUS_USER", "")
-    smtp_pass = os.getenv("MAILPLUS_PASS", "")
-    if smtp_host and smtp_user:
-        send_smtp_email(
-            smtp_host, smtp_port, smtp_user, smtp_pass,
-            subject=f"[법령 변경] {law_name} ({revision_type})",
-            body=f"법령명: {law_name}\n공포일자: {proclamation_date}\n개정구분: {revision_type}",
-        )
@@ -17,7 +17,7 @@ from sqlalchemy import select

 from core.config import settings
 from core.database import async_session
-from core.utils import file_hash, send_smtp_email, setup_logger
+from core.utils import file_hash, setup_logger
 from models.automation import AutomationState
 from models.document import Document
 from models.queue import enqueue_stage
@@ -201,11 +201,4 @@ async def run():

        await session.commit()

-        # SMTP 알림
-        smtp_host = os.getenv("MAILPLUS_HOST", "")
-        smtp_port = int(os.getenv("MAILPLUS_SMTP_PORT", "465"))
-        if archived and smtp_host:
-            body = f"이메일 {len(archived)}건 수집 완료:\n\n" + "\n".join(f"- {s}" for s in archived)
-            send_smtp_email(smtp_host, smtp_port, user, password, "PKM 이메일 수집 알림", body)
-
        logger.info(f"이메일 {len(archived)}건 수집 완료 (max_uid={max_uid})")
@@ -394,13 +394,29 @@ async def _process_office(
    partial arm 은 PDF split 전용 — office 는 이진이라 여기 없음. 'completed' 는 A-3 직렬화 전용(워커 미사용).
    quality 는 content-type-aware: office=scored(_compute_quality). 동기 변환은 to_thread 로 event loop 비차단.
    """
-    from workers.office_md import OfficeMdError, convert_office_to_md
+    from workers.office_md import (
+        OfficeMdError,
+        convert_hwp_to_md_and_images,
+        convert_office_to_md,
+    )

-    is_hwp = Path(container_path).suffix.lower() in (".hwp", ".hwpx")
-    engine = "libreoffice_hwp" if is_hwp else "markitdown"
+    suffix = Path(container_path).suffix.lower()
+    if suffix == ".hwp":
+        engine = "pyhwp"  # HWP5 binary: libhwplo 못 읽어 pyhwp 로 교체(2026-06-09)
+    elif suffix == ".hwpx":
+        engine = "libreoffice_hwp"  # HWPX 는 pyhwp 미지원 → LibreOffice 폴백
+    else:
+        engine = "markitdown"
+
+    hwp_images: list[dict[str, Any]] = []
    try:
-        # 동기 subprocess(LibreOffice)/markitdown — 스레드로 빼서 이벤트 루프 비차단.
-        md_content = await asyncio.to_thread(convert_office_to_md, container_path)
+        # 동기 subprocess/markitdown — 스레드로 빼서 이벤트 루프 비차단.
+        if suffix == ".hwp":
+            md_content, hwp_images = await asyncio.to_thread(
+                convert_hwp_to_md_and_images, container_path
+            )
+        else:
+            md_content = await asyncio.to_thread(convert_office_to_md, container_path)
    except OfficeMdError as exc:
        logger.warning(f"[marker] office md 변환 실패 id={document_id} engine={engine}: {exc}")
        await _fail(session, document_id, f"office_md: {str(exc)[:990]}", engine=engine)
@@ -410,8 +426,49 @@ async def _process_office(
        await _fail(session, document_id, f"office_md_unexpected: {str(exc)[:980]}", engine=engine)
        return

+    # ---- 이미지 NAS persist (.hwp 전용) ----
+    # hwp5html 은 bindata raster 를 추출하나 본문 xhtml 에 <img> 앵커가 없어(orphan, --css/--html
+    # 동일) 인라인 위치 복원 불가 → marker(PDF) 의 _persist_images_to_nas 로 NAS 영속 후 md 말미
+    # 갤러리로 부착(docimg: ref = 뷰어 해석). OLE 수식/도형은 앵커도 raster 도 아니라 제외.
+    # docx/xlsx/pptx/hwpx 는 이미지 미처리(기존 동작 유지).
+    saved_images: list[dict[str, Any]] = []
+    orphan_paths: list[str] = []
+    if suffix == ".hwp" and MARKDOWN_IMAGE_PERSIST:
+        if hwp_images:
+            images_resp = [
+                {
+                    "bytes_b64": base64.b64encode(im["data"]).decode("ascii"),
+                    "format": im.get("format") or "png",
+                    "slug": "",
+                    "width": None,
+                    "height": None,
+                }
+                for im in hwp_images
+            ]
+            try:
+                saved_images = _persist_images_to_nas(document_id, images_resp)
+            except OSError as exc:
+                # NAS 일시 끊김 등 — transient. queue retry 로 복구.
+                logger.warning(
+                    f"[marker] hwp image persist NAS write failed id={document_id}: "
+                    f"{type(exc).__name__}: {exc}"
+                )
+                raise
+            if saved_images:
+                gallery = "\n\n## 첨부 이미지\n\n" + "\n\n".join(
+                    f"![](docimg:{img['image_key']})" for img in saved_images
+                )
+                md_content = md_content + gallery
+        # 재변환 시 현재 saved_images 기준으로 과거 document_images row/NAS 파일 정리.
+        orphan_paths = await _sync_document_images(
+            session, document_id, saved_images, {"engine": engine}
+        )
+
    # 성공 — 계약상 md_content 는 비공백(빈출력은 raise). quality scored.
    quality = _compute_quality(md_content, doc.extracted_text or "", {"page_count": None})
+    if saved_images:
+        quality.setdefault("warnings", []).append(f"hwp_images_appended:{len(saved_images)}")
+
    await session.execute(
        update(Document).where(Document.id == document_id).values(
            md_content=md_content,
@@ -429,7 +486,21 @@ async def _process_office(
        )
    )
    await session.commit()
-    logger.info(f"[marker] office success id={document_id} engine={engine} len={len(md_content)}")
+
+    # commit 후 고아 NAS 파일 unlink (best-effort, 실패해도 DB 정합 유지).
+    for orphan_path in orphan_paths:
+        try:
+            Path(orphan_path).unlink(missing_ok=True)
+        except Exception as exc:
+            logger.warning(
+                f"[marker] orphan image unlink failed id={document_id} path={orphan_path}: "
+                f"{type(exc).__name__}: {exc}"
+            )
+
+    logger.info(
+        f"[marker] office success id={document_id} engine={engine} "
+        f"len={len(md_content)} images={len(saved_images)}"
+    )


 async def _process_split(
@@ -1,20 +1,30 @@
-"""뉴스 수집 워커 — RSS/API에서 기사 수집, documents에 저장"""
+"""뉴스 수집 워커 — RSS/API에서 기사 수집, documents에 저장

+plan crawl-24x7-1 A그룹 (2026-06-10):
+  A-1 조건부 GET(ETag/Last-Modified 그대로 재전송) + 콘텐츠 해시 변경감지
+  A-2 fulltext_policy='page' 소스는 'fulltext' stage 로 본문 승격 위임
+  A-5 source_health 기록 + circuit breaker (소스별 실패 격리)
+  A-6 first-wins + 포털 전재 2차 dedup (제목+최근 3일, 12자 이상 제목 한정)
+"""
+
+import asyncio
 import hashlib
 import re
-from datetime import datetime, timezone
+from datetime import datetime, timedelta, timezone
 from html import unescape
-from urllib.parse import urlparse, urlunparse
+from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse

 import feedparser
 import httpx
 from sqlalchemy import select

+from core.crawl_politeness import CRAWL_UA
 from core.database import async_session
 from core.utils import setup_logger
 from models.document import Document
 from models.news_source import NewsSource
 from models.queue import enqueue_stage
+from models.source_health import SourceHealth

 logger = setup_logger("news_collector")

@@ -26,6 +36,7 @@ CATEGORY_MAP = {
    "환경": "Environment", "기술": "Technology",
    # 영어
    "World": "International", "International": "International",
+    "World news": "International",  # Guardian sectionName (B-2)
    "Technology": "Technology", "Tech": "Technology", "Sci-Tech": "Technology",
    "Arts": "Culture", "Culture": "Culture",
    "Climate": "Environment", "Environment": "Environment",
@@ -35,27 +46,50 @@ CATEGORY_MAP = {
    "Kultur": "Culture", "Wissenschaft": "Technology",
    # 프랑스어
    "Environnement": "Environment",
+    # 도메인 채널 (source_channel='crawl', 0-5 (a)) — 양쪽 공통 맵
+    "안전": "Safety", "Safety": "Safety",
+    "공학": "Engineering", "Engineering": "Engineering",
+    "철학": "Philosophy", "Philosophy": "Philosophy",
 }


+class FeedError(Exception):
+    """소스 단위 fetch/parse 실패 — run() 이 source_health 실패로 기록."""
+
+
 def _normalize_category(raw: str) -> str:
    """카테고리 표준화"""
    return CATEGORY_MAP.get(raw, CATEGORY_MAP.get(raw.strip(), "Other"))


-def _clean_html(text: str) -> str:
-    """HTML 태그 제거 + 정제"""
+def _clean_html(text: str, max_len: int | None = 1000) -> str:
+    """HTML 태그 제거 + 정제. max_len=None 이면 절단 없음 (feed-full 전문용)."""
    if not text:
        return ""
    text = re.sub(r"<[^>]+>", "", text)
    text = unescape(text)
-    return text.strip()[:1000]
+    text = text.strip()
+    return text if max_len is None else text[:max_len]
+
+
+# tracking 파라미터 판별 — prefix(utm_/at_=BBC/ns_=BBC/mc_=mailchimp) + 단독 키
+_TRACKING_PREFIXES = ("utm_", "at_", "ns_", "mc_")
+_TRACKING_PARAMS = {"fbclid", "gclid", "igshid", "ref", "smid", "partner", "cmp", "ocid", "ftag"}


 def _normalize_url(url: str) -> str:
-    """URL 정규화 (tracking params 제거)"""
+    """URL 정규화 — tracking 파라미터만 제거, 콘텐츠 식별 파라미터는 보존.
+
+    query 전체 제거 금지: hada.io/topic?id= · aitimes articleView.html?idxno= ·
+    HN item?id= 등 query-식별 사이트에서 별개 기사가 같은 URL 로 붕괴된다.
+    저장(edit_url)·조회 양쪽이 이 함수를 공유해야 dedup 이 성립.
+    """
    parsed = urlparse(url)
-    return urlunparse((parsed.scheme, parsed.netloc, parsed.path, "", "", ""))
+    kept = [
+        (k, v) for k, v in parse_qsl(parsed.query, keep_blank_values=True)
+        if not (k.lower().startswith(_TRACKING_PREFIXES) or k.lower() in _TRACKING_PARAMS)
+    ]
+    return urlunparse((parsed.scheme, parsed.netloc, parsed.path, "", urlencode(kept), ""))


 def _article_hash(title: str, published: str, source_name: str) -> str:
@@ -73,8 +107,104 @@ def _normalize_to_utc(dt) -> datetime:
    return datetime.now(timezone.utc)


+# ── A-5: circuit breaker 정책 ──
+# 연속 실패 >= OPEN 임계 → open (재시도 간격 지수 확대, 6h × 2^n, cap 48h)
+# 연속 실패 >  DISABLE 임계 → disabled (수집 제외 + 가시 로그, 수동 복구 대상)
+# news_sources.enabled 는 건드리지 않는다 — 사용자 의도(enabled)와 자동 상태(circuit) 분리.
+_CIRCUIT_OPEN_AFTER = 3
+_CIRCUIT_DISABLE_AFTER = 10
+_BACKOFF_BASE_HOURS = 6
+_BACKOFF_CAP_HOURS = 48
+_EMPTY_STREAK_ALERT = 8  # 6h 사이클 × 8 = 약 2일 연속 빈 피드 → 가시 경고
+
+
+def _should_attempt(health: SourceHealth, now: datetime) -> bool:
+    """circuit 상태에 따라 이번 사이클 fetch 여부 결정.
+
+    주의 (B-3 계약 ②, r5): 추후 relogin_requested 플래그 소비는 반드시 이
+    open-스킵 분기보다 *앞*에 두어야 한다 — open 이 스케줄 제외 형태가 되면
+    배치 경계가 안 와 플래그가 영원히 미소비(half-open 데드 버튼)가 된다.
+    """
+    if health.circuit_state == "disabled":
+        return False
+    if health.circuit_state == "open" and health.last_error_at is not None:
+        over = max(health.consecutive_failures - _CIRCUIT_OPEN_AFTER, 0)
+        backoff_h = min(_BACKOFF_BASE_HOURS * (2 ** over), _BACKOFF_CAP_HOURS)
+        if now - health.last_error_at < timedelta(hours=backoff_h):
+            return False
+    return True
+
+
+def _record_success(health: SourceHealth, items: int, not_modified: bool, now: datetime) -> None:
+    health.consecutive_failures = 0
+    health.total_fetches += 1
+    health.last_success_at = now
+    health.last_fetch_items = items
+    if health.circuit_state != "closed":
+        logger.info(f"[health] source={health.source_id} circuit {health.circuit_state}→closed")
+        health.circuit_state = "closed"
+        health.circuit_opened_at = None
+    # 빈 피드 streak: 304/해시동일은 정상 신호라 미집계, 200+entries 0 만 집계 (피드 부패 감시)
+    if not_modified:
+        pass
+    elif items == 0:
+        health.empty_streak += 1
+        if health.empty_streak >= _EMPTY_STREAK_ALERT:
+            logger.error(
+                f"[health] source={health.source_id} 빈 피드 {health.empty_streak}회 연속 "
+                f"— 피드 부패 의심 (RSSHub 류 라우트 깨짐 패턴)"
+            )
+    else:
+        health.empty_streak = 0
+    health.updated_at = now
+
+
+def _record_failure(health: SourceHealth, error: str, now: datetime) -> None:
+    health.consecutive_failures += 1
+    health.total_fetches += 1
+    health.total_failures += 1
+    health.last_error = error[:500]
+    health.last_error_at = now
+    health.updated_at = now
+    cf = health.consecutive_failures
+    if cf > _CIRCUIT_DISABLE_AFTER and health.circuit_state != "disabled":
+        health.circuit_state = "disabled"
+        logger.error(
+            f"[health] source={health.source_id} 연속 실패 {cf}회 — circuit DISABLED "
+            f"(수집 제외, A-8 패널에서 수동 복구 필요)"
+        )
+    elif cf >= _CIRCUIT_OPEN_AFTER and health.circuit_state == "closed":
+        health.circuit_state = "open"
+        health.circuit_opened_at = now
+        logger.warning(f"[health] source={health.source_id} 연속 실패 {cf}회 — circuit open")
+
+
+async def _get_or_create_health(session, source_id: int) -> SourceHealth:
+    result = await session.execute(
+        select(SourceHealth).where(SourceHealth.source_id == source_id)
+    )
+    health = result.scalars().first()
+    if health is None:
+        health = SourceHealth(source_id=source_id)
+        session.add(health)
+        await session.flush()
+    return health
+
+
+# 수동 POST /api/news/collect 와 6h 스케줄 사이클의 동시 실행 차단 (단일 프로세스·단일
+# 이벤트루프). 동시 진입 시 _get_or_create_health 가 같은 source_id 를 양쪽에서 INSERT
+# → uq_source_health_source_id 위반 IntegrityError 로 사이클 전체가 죽는 경합의 원천 봉쇄.
+_run_lock = asyncio.Lock()
+
+
 async def run():
    """뉴스 수집 실행"""
+    async with _run_lock:
+        await _run_locked()
+
+
+async def _run_locked():
+    now = datetime.now(timezone.utc)
    async with async_session() as session:
        result = await session.execute(
            select(NewsSource).where(NewsSource.enabled == True)
@@ -87,17 +217,24 @@ async def run():

        total = 0
        for source in sources:
+            health = await _get_or_create_health(session, source.id)
+            if not _should_attempt(health, now):
+                logger.info(f"[{source.name}] circuit {health.circuit_state} — 이번 사이클 skip")
+                continue
            try:
                if source.feed_type == "api":
-                    count = await _fetch_api(session, source)
+                    count, status = await _fetch_api(session, source)
                else:
-                    count = await _fetch_rss(session, source)
+                    count, status = await _fetch_rss(session, source)

                source.last_fetched_at = datetime.now(timezone.utc)
+                _record_success(health, count, status == "not_modified", now)
                total += count
            except Exception as e:
-                logger.error(f"[{source.name}] 수집 실패: {e}")
+                # str 이 빈 예외(httpx.ConnectError('')) 대비 — health 기록과 동일 규칙
+                logger.error(f"[{source.name}] 수집 실패: {str(e) or repr(e)}")
                source.last_fetched_at = datetime.now(timezone.utc)
+                _record_failure(health, str(e) or repr(e), now)

        await session.commit()
        logger.info(f"뉴스 수집 완료: {total}건 신규")
@@ -108,8 +245,164 @@ ALLOWED_CONTENT_TYPES = ("application/rss+xml", "application/atom+xml",
                         "application/xml", "text/xml")


-async def _fetch_rss(session, source: NewsSource) -> int:
-    """RSS 피드 수집 — redirect 재검증 + 크기/content-type 제한"""
+# 연결 재시도 간격 — MOEL 추가 실측(2026-06-11): 드랍이 연결 단위 랜덤이라
+# 1.5s 후 재시도도 연속으로 걸리는 케이스 발생(직후 다른 연결은 즉시 성공) → 2회로 보강.
+_CONNECT_RETRY_DELAYS = (2.0, 5.0)
+
+
+async def _get_with_connect_retry(client, url: str):
+    """연결 계층(TCP/TLS) 오류만 재시도(최대 2회) — HTTP 상태 오류는 비대상 (호출측 분기 보존).
+
+    MOEL 실측(2026-06-11): 정부 사이트 보안장비가 TLS 핸드셰이크를 연결 단위로 간헐 드랍
+    (curl rc=35, 직후 재시도는 성공) → 사이클당 1회 fetch 인 피드 수집이 ConnectError('')
+    로 실패 누적·circuit open. 지속 장애는 그대로 circuit 몫.
+    """
+    for delay in _CONNECT_RETRY_DELAYS:
+        try:
+            return await client.get(url)
+        except (httpx.ConnectError, httpx.ConnectTimeout) as e:
+            logger.info(f"연결 오류 {delay}s 후 재시도 ({url.split('?')[0]}): {repr(e)}")
+            await asyncio.sleep(delay)
+    return await client.get(url)
+
+
+async def _is_portal_duplicate(session, title: str) -> bool:
+    """A-6 2차 dedup: 포털 전재본 vs 원본이 다른 URL 로 이중 적재되는 케이스.
+
+    보조 키 = 제목 + 최근 3일 (다른 소스/다른 URL 이므로 1차 키로 안 잡힘).
+    범용 제목 오탐 방지: 12자 미만 제목은 비적용. skip 은 전부 로그 (silent 누락 회피).
+    """
+    if len(title) < 12:
+        return False
+    cutoff = datetime.now(timezone.utc) - timedelta(days=3)
+    dup = await session.execute(
+        select(Document.id).where(
+            Document.title == title,
+            Document.source_channel == "news",
+            Document.file_format == "article",
+            Document.extracted_at >= cutoff,
+        ).limit(1)
+    )
+    return dup.scalars().first() is not None
+
+
+async def _enqueue_processing(session, doc: Document, source: NewsSource, pub_dt: datetime) -> None:
+    """후속 단계 enqueue.
+
+    fulltext_policy='page' 소스는 'fulltext' stage 만 — summarize/embed/chunk 는
+    fulltext_worker 가 승격(또는 격하) 확정 후 enqueue (RSS 요약 선요약 → 풀텍스트
+    도착 시 summarize_worker 의 '이미 요약 있음 skip' 에 막히는 순서 함정 회피).
+    """
+    if source.fetch_method == "signal-only":
+        # B-4: 시그널 = 검색 색인만 (embed/chunk). fulltext/summarize 절대 enqueue 안 함 —
+        # 레지스트리가 fulltext_policy='page' 로 잘못 설정돼도 페이지 fetch 0 (방어 우선).
+        # 요약 LLM 스킵 = 맥미니 부하 0. 다이제스트/브리핑은 ai_summary IS NULL 문서를
+        # 처음부터 제외(services/digest/loader.py)하므로 시그널 문서가 자연 배제된다.
+        if source.source_channel == "crawl" or (datetime.now(timezone.utc) - pub_dt).days <= 30:
+            await enqueue_stage(session, doc.id, "embed")
+            await enqueue_stage(session, doc.id, "chunk")
+        return
+    if source.fulltext_policy == "page" and doc.edit_url:
+        await enqueue_stage(session, doc.id, "fulltext")
+        return
+    await enqueue_stage(session, doc.id, "summarize")
+    if source.source_channel == "crawl":
+        # 도메인 재료 코퍼스 — 발행일 무관 전량 색인 (30일 게이트는 뉴스 전용)
+        await enqueue_stage(session, doc.id, "embed")
+        await enqueue_stage(session, doc.id, "chunk")
+        return
+    days_old = (datetime.now(timezone.utc) - pub_dt).days
+    if days_old <= 30:
+        await enqueue_stage(session, doc.id, "embed")
+        await enqueue_stage(session, doc.id, "chunk")
+
+
+def _entry_body(source: NewsSource, entry, summary: str) -> tuple[str, str]:
+    """(body, extractor_version) — 정책별 본문 선택, 순수 함수 (shape 테스트 대상).
+
+    signal-only: 피드 요약이 곧 본문 — 절단 없음 (arXiv 초록 1.3~1.6K자 보존,
+      1000자 cap 적용 시 초록 꼬리 유실). 페이지 fetch 는 어떤 경우에도 없음 (B-4).
+    feed-full: 피드 본문이 전문인 소스만 신뢰 (truncate·광고 삽입이 흔해 일반
+      소스의 summary/content:encoded 를 전문으로 오인 저장 금지 — A-6).
+    """
+    if source.fetch_method == "signal-only":
+        body = _clean_html(
+            entry.get("summary", "") or entry.get("description", ""), max_len=None
+        )
+        return (body or summary), "rss-signal"
+    if source.fulltext_policy == "feed-full":
+        content_list = entry.get("content") or []
+        raw_body = content_list[0].get("value", "") if content_list else ""
+        full_body = _clean_html(raw_body or entry.get("summary", ""), max_len=None)
+        if len(full_body) > len(summary):
+            return full_body, "rss-feed-full"
+    return summary, "rss"
+
+
+def _build_extract_meta(source: NewsSource, pub_dt: datetime) -> dict:
+    """fulltext_worker / 패널이 쓰는 출처 메타 (documents 에 source FK 가 없어 여기 기록)."""
+    meta = {
+        "source_id": source.id,
+        "source_name": source.name,
+        "published_at": pub_dt.isoformat(),
+    }
+    # 안전 자료실 A-2: 소스 레지스트리의 라이선스를 deterministic 주입 (0-3 license 메타).
+    # P3 다이제스트/발행류가 redistribute=false 소스를 구조적으로 제외하는 게이트 입력.
+    if source.license_scheme:
+        meta["license"] = {
+            "scheme": source.license_scheme,
+            "redistribute": bool(source.license_redistribute),
+            "attribution": source.name,
+        }
+    return meta
+
+
+def _material_axis(source: NewsSource) -> tuple[str | None, str | None]:
+    """안전 자료실 분류 축 (material_type, jurisdiction) — 레지스트리 deterministic.
+
+    - material_type = news_sources.material_type (NULL = 비대상, 뉴스/철학 등)
+    - jurisdiction = source.country 전파. 단 paper 는 NULL 강제
+      (국제 학술지에 관할 개념 부적합 — plan 0-1 계약. 레지스트리 country=US 여도 미전파).
+    """
+    mt = source.material_type
+    if not mt:
+        return None, None
+    if mt == "paper":
+        return mt, None
+    return mt, source.country
+
+
+def _doc_identity(source: NewsSource, source_short: str, category: str) -> dict:
+    """채널별 문서 정체성 — news 채널은 기존 값 그대로(무회귀), crawl 채널은 도메인 정체성.
+
+    file_path 접두사가 곧 채널 디렉토리. ai_domain 은 다이제스트/검색 필터의 분기 축이라
+    crawl 채널이 'News' 를 오염시키지 않게 분리 (0-5 채널 레벨 분리 사상).
+    """
+    material_type, jurisdiction = _material_axis(source)
+    if source.source_channel == "crawl":
+        domain = category if category and category != "Other" else "Domain"
+        return {
+            "path_prefix": "crawl",
+            "ai_domain": domain,
+            "ai_tags": [f"{domain}/{source_short}"],
+            "material_type": material_type,
+            "jurisdiction": jurisdiction,
+        }
+    return {
+        "path_prefix": "news",
+        "ai_domain": "News",
+        "ai_tags": [f"News/{source_short}/{category}"],
+        "material_type": material_type,
+        "jurisdiction": jurisdiction,
+    }
+
+
+async def _fetch_rss(session, source: NewsSource) -> tuple[int, str]:
+    """RSS 피드 수집 — redirect 재검증 + 크기/content-type 제한 + 조건부 GET (A-1).
+
+    반환 (신규 건수, 상태). 상태 'not_modified' = 304 또는 콘텐츠 해시 동일.
+    소스 단위 실패는 FeedError raise — run() 이 health 실패로 기록.
+    """
    from urllib.parse import urljoin
    from core.url_validator import validate_feed_url, HTTP_EXCEPTION_DOMAINS

@@ -120,51 +413,79 @@ async def _fetch_rss(session, source: NewsSource) -> int:

    # 순수 HTTP 소스인데 allowlist에 없으면 차단
    if source.feed_url.startswith("http://") and not http_allowed:
-        logger.error(f"[{source.name}] HTTP 차단 (allowlist 미등록): {source_hostname}")
-        return 0
+        raise FeedError(f"HTTP 차단 (allowlist 미등록): {source_hostname}")

    # fetch 전 URL 재검증 (등록 이후 DNS 변경 대비)
    try:
        validate_feed_url(source.feed_url, allow_http=http_allowed)
    except ValueError as e:
-        logger.error(f"[{source.name}] URL 검증 실패: {e}")
-        return 0
+        raise FeedError(f"URL 검증 실패: {e}") from e

-    async with httpx.AsyncClient(timeout=10, follow_redirects=False) as client:
-        resp = await client.get(source.feed_url)
+    # A-1: 정직 UA + 조건부 GET — 서버가 준 워터마크를 받은 그대로 재전송
+    headers = {"User-Agent": CRAWL_UA}
+    if source.etag:
+        headers["If-None-Match"] = source.etag
+    if source.last_modified:
+        headers["If-Modified-Since"] = source.last_modified

-        # redirect 수동 처리 (최대 3회, 각 target 재검증)
+    async with httpx.AsyncClient(
+        timeout=10, follow_redirects=False, headers=headers
+    ) as client:
+        resp = await _get_with_connect_retry(client, source.feed_url)
+
+        # 304 는 redirect 처리보다 먼저 — httpx 의 is_redirect 는 3xx 전체(304 포함)에
+        # True 라, 304 를 redirect 로 오인하면 location 없는 같은 URL 을 재요청해
+        # "redirect 3회 초과" 로 오류 처리됨(조건부 GET 안정 피드 전멸 버그).
+        if resp.status_code == 304:
+            logger.info(f"[{source.name}] 304 Not Modified — 본문 미전송")
+            return 0, "not_modified"
+
+        # redirect 수동 처리 (최대 3회, 각 target 재검증) — location 있는 진짜 redirect 만.
        # allowlist 도메인이면 redirect target의 HTTP도 허용
        redirects = 0
-        while resp.is_redirect and redirects < 3:
-            location = resp.headers.get("location", "")
-            location = urljoin(str(resp.request.url), location)
+        while resp.has_redirect_location and redirects < 3:
+            location = urljoin(str(resp.request.url), resp.headers["location"])
            try:
                validate_feed_url(location, allow_http=http_allowed)
            except ValueError as e:
-                logger.error(f"[{source.name}] redirect target 차단: {e}")
-                return 0
+                raise FeedError(f"redirect target 차단: {e}") from e
            resp = await client.get(location)
+            if resp.status_code == 304:
+                logger.info(f"[{source.name}] 304 Not Modified (redirect 후) — 본문 미전송")
+                return 0, "not_modified"
            redirects += 1
-        if resp.is_redirect:
-            logger.error(f"[{source.name}] redirect 3회 초과")
-            return 0
+        if resp.has_redirect_location:
+            raise FeedError("redirect 3회 초과")

        resp.raise_for_status()

        if len(resp.content) > MAX_RESPONSE_SIZE:
-            logger.warning(f"[{source.name}] 응답 크기 초과: {len(resp.content)} bytes")
-            return 0
+            raise FeedError(f"응답 크기 초과: {len(resp.content)} bytes")

        ct = resp.headers.get("content-type", "").lower()
        if not any(t in ct for t in ALLOWED_CONTENT_TYPES):
-            logger.warning(f"[{source.name}] 비정상 content-type: {ct}")
-            return 0
+            raise FeedError(f"비정상 content-type: {ct}")
+
+    # A-1: 콘텐츠 해시 변경감지 (CDN 의 ETag 회전 대비 병행) — 저장된 해시는 항상
+    # 파싱 검증을 통과한 응답의 것이므로 동일성 비교는 파싱 전에 안전
+    new_etag = resp.headers.get("etag")
+    new_last_modified = resp.headers.get("last-modified")
+    content_hash = hashlib.sha256(resp.content).hexdigest()
+    if source.feed_content_hash == content_hash:
+        logger.info(f"[{source.name}] 콘텐츠 해시 동일 — 파싱 skip")
+        return 0, "not_modified"

    feed = feedparser.parse(resp.text)
    if feed.bozo and not feed.entries:
-        logger.warning(f"[{source.name}] RSS 파싱 실패: {feed.bozo_exception}")
-        return 0
+        raise FeedError(f"RSS 파싱 실패: {feed.bozo_exception}")
+
+    # A-1: 워터마크 영속은 파싱 검증 통과 후에만 — 부패(bozo) 응답의 ETag 를 저장하면
+    # 이후 304 로 영구 skip 되는 silent corruption 차단
+    if new_etag:
+        source.etag = new_etag
+    if new_last_modified:
+        source.last_modified = new_last_modified
+    source.feed_content_hash = content_hash
    count = 0

    for entry in feed.entries:
@@ -176,67 +497,225 @@ async def _fetch_rss(session, source: NewsSource) -> int:
        if not summary:
            summary = title

+        # 정책별 본문 선택 — signal-only(무절단 요약) / feed-full(피드 전문) / 기본(요약)
+        body, extractor_version = _entry_body(source, entry, summary)
+
        link = entry.get("link", "")
+
+        # B-5 quirk: 비디오 항목 필터 (Aeon/Psyche — 텍스트 코퍼스에 비디오 페이지 무가치)
+        if source.parser_quirk == "skip-video" and re.search(r"/videos?/", link):
+            continue
+
        published = entry.get("published_parsed") or entry.get("updated_parsed")
        pub_dt = datetime(*published[:6], tzinfo=timezone.utc) if published else datetime.now(timezone.utc)

-        # 중복 체크
+        # 중복 체크 — 레거시 행은 raw URL 로 저장돼 있어 normalized/raw 양쪽 매칭.
+        # 교차 게시(같은 기사가 두 피드에 존재)로 2행 이상 매칭될 수 있어 first() 사용
+        # (scalar_one_or_none 은 MultipleResultsFound raise — 2026-06 BBC 수집 중단 원인).
        article_id = _article_hash(title, pub_dt.strftime("%Y%m%d"), source.name)
        normalized_url = _normalize_url(link)

        existing = await session.execute(
            select(Document).where(
                (Document.file_hash == article_id) |
-                (Document.edit_url == normalized_url)
-            )
+                (Document.edit_url.in_([normalized_url, link]))
+            ).limit(1)
        )
-        if existing.scalar_one_or_none():
+        if existing.scalars().first():
+            continue
+
+        # A-6 2차: 포털 전재 dedup (first-wins — 먼저 적재된 쪽이 정본)
+        if await _is_portal_duplicate(session, title):
+            logger.info(f"[{source.name}] portal-dup skip: {title[:60]}")
            continue

        category = _normalize_category(source.category or "")
        source_short = source.name.split(" ")[0]  # "경향신문 문화" → "경향신문"
+        ident = _doc_identity(source, source_short, category)

        doc = Document(
-            file_path=f"news/{source.name}/{article_id}",
+            file_path=f"{ident['path_prefix']}/{source.name}/{article_id}",
            file_hash=article_id,
            file_format="article",
-            file_size=len(summary.encode()),
+            file_size=len(body.encode()),
            file_type="note",
            title=title,
-            extracted_text=f"{title}\n\n{summary}",
+            extracted_text=f"{title}\n\n{body}",
            extracted_at=datetime.now(timezone.utc),
-            extractor_version="rss",
-            source_channel="news",
+            extractor_version=extractor_version,
+            # article = 텍스트 네이티브(본문=extracted_text). markdown 단계 미enqueue 라
+            # 기본값 'pending' 이면 영구 비수렴 → backlog 지표 오염 + md_status_pending partial
+            # 인덱스 비대. 생성 시점에 terminal 'skipped' 로 명시(변환 비대상).
+            # fulltext_policy='page' 소스는 fulltext_worker 가 승격 시 success 로 갱신.
+            md_status="skipped",
+            md_extraction_error="news article: 텍스트 네이티브, markdown 변환 비대상",
+            source_channel=source.source_channel,
            data_origin="external",
-            edit_url=link,
+            # 조회와 동일하게 정규화해 저장 — raw(tracking param 포함) 저장 시 URL dedup 무력화
+            edit_url=normalized_url,
            review_status="approved",
-            ai_domain="News",
+            ai_domain=ident["ai_domain"],
            ai_sub_group=source_short,
-            ai_tags=[f"News/{source_short}/{category}"],
+            ai_tags=ident["ai_tags"],
+            # 안전 자료실 A-2 — 레지스트리 deterministic (classify-skip 경로라 ingest 시점 필수)
+            material_type=ident["material_type"],
+            jurisdiction=ident["jurisdiction"],
+            published_date=pub_dt.date() if pub_dt else None,
+            extract_meta=_build_extract_meta(source, pub_dt),
        )
        session.add(doc)
        await session.flush()

-        # summarize + embed + chunk 등록 (classify 불필요)
-        await enqueue_stage(session, doc.id, "summarize")
-        days_old = (datetime.now(timezone.utc) - pub_dt).days
-        if days_old <= 30:
-            await enqueue_stage(session, doc.id, "embed")
-            await enqueue_stage(session, doc.id, "chunk")
+        # summarize + embed + chunk 등록 (classify 불필요).
+        # page 정책 소스는 fulltext 만 — 후속은 fulltext_worker 가 확정 후 enqueue.
+        await _enqueue_processing(session, doc, source, pub_dt)

        count += 1

    logger.info(f"[{source.name}] RSS → {count}건 수집")
-    return count
+    return count, "ok"


-async def _fetch_api(session, source: NewsSource) -> int:
+async def _fetch_api(session, source: NewsSource) -> tuple[int, str]:
+    """API 소스 디스패치 — feed_url 호스트로 제공자 판별 (B-2).
+
+    레거시 NYT 행(feed_url=api.nytimes.com)은 무변경 경로. 신규 제공자는 호스트 분기 추가.
+    미지의 호스트 = NYT 경로로 넘기지 않고 명시 실패 (silent fallback 금지).
+    """
+    host = (urlparse(source.feed_url).hostname or "").lower()
+    if host.endswith("guardianapis.com"):
+        return await _fetch_api_guardian(session, source)
+    if host.endswith("nytimes.com"):
+        return await _fetch_api_nyt(session, source)
+    raise FeedError(f"API 제공자 미등록 호스트: {host} — 디스패치 분기 추가 필요")
+
+
+def _guardian_request(feed_url: str, api_key: str) -> tuple[str, dict]:
+    """Guardian 호출 형태 단일 source-of-truth — fixture 회귀 테스트 대상
+    (tests/fixtures/guardian_open_platform_search_response.json 박제 시 호출과 동일해야 함)."""
+    parsed = urlparse(feed_url)
+    params = {
+        **dict(parse_qsl(parsed.query)),
+        "show-fields": "bodyText,trailText",
+        "page-size": "20",
+        "order-by": "newest",
+        "api-key": api_key,
+    }
+    return f"{parsed.scheme}://{parsed.netloc}{parsed.path}", params
+
+
+async def _fetch_api_guardian(session, source: NewsSource) -> tuple[int, str]:
+    """Guardian Open Platform 수집 (B-2) — show-fields=bodyText 로 정식 전문 JSON.
+
+    feed_url 에 section 쿼리를 박아 등록 (예: https://content.guardianapis.com/search?section=world).
+    전문이 API 로 오므로 fulltext stage 불요. 키 미설정 = FeedError (health 실패 기록,
+    silent fallback 없음 — [[feedback_no_silent_fallback_explicit_opt_in]]).
+    """
+    import os
+    api_key = os.getenv("GUARDIAN_API_KEY", "")
+    if not api_key:
+        raise FeedError("GUARDIAN_API_KEY 미설정 — Guardian 수집 불가")
+
+    endpoint, params = _guardian_request(source.feed_url, api_key)
+
+    try:
+        async with httpx.AsyncClient(timeout=15) as client:
+            resp = await client.get(endpoint, params=params)
+            resp.raise_for_status()
+    except httpx.HTTPStatusError as e:
+        # 쿼리스트링(api-key 포함) 제거 — path 까지만 로깅 (NYT 와 동일 규율)
+        safe_url = str(e.request.url).split("?")[0]
+        raise FeedError(f"Guardian API 실패: {e.response.status_code} @ {safe_url}") from e
+    except httpx.RequestError as e:
+        safe_url = str(e.request.url).split("?")[0] if e.request else "unknown"
+        raise FeedError(f"Guardian API 연결 실패: {safe_url}") from e
+
+    payload = resp.json().get("response", {})
+    if payload.get("status") != "ok":
+        raise FeedError(f"Guardian API status={payload.get('status')}")
+
+    count = 0
+    for item in payload.get("results", []):
+        title = (item.get("webTitle") or "").strip()
+        if not title:
+            continue
+
+        fields = item.get("fields") or {}
+        body_text = (fields.get("bodyText") or "").strip()
+        trail = _clean_html(fields.get("trailText") or "")
+        # bodyText = plain text 전문 (HTML 정화 불요). 짧으면(라이브 블로그 잔재 등) trail 격하.
+        is_full = len(body_text) >= 200
+        body = body_text if is_full else (trail or title)
+
+        link = item.get("webUrl", "")
+        pub_str = item.get("webPublicationDate", "")
+        try:
+            pub_dt = datetime.fromisoformat(pub_str.replace("Z", "+00:00"))
+        except (ValueError, AttributeError):
+            pub_dt = datetime.now(timezone.utc)
+
+        article_id = _article_hash(title, pub_dt.strftime("%Y%m%d"), source.name)
+        normalized_url = _normalize_url(link)
+
+        # RSS 수집부와 동일: 레거시 raw URL + 교차 게시 다중 매칭 내성 (first)
+        existing = await session.execute(
+            select(Document).where(
+                (Document.file_hash == article_id) |
+                (Document.edit_url.in_([normalized_url, link]))
+            ).limit(1)
+        )
+        if existing.scalars().first():
+            continue
+
+        if await _is_portal_duplicate(session, title):
+            logger.info(f"[{source.name}] portal-dup skip: {title[:60]}")
+            continue
+
+        category = _normalize_category(item.get("sectionName", source.category or ""))
+        source_short = source.name.split(" ")[0]
+        ident = _doc_identity(source, source_short, category)
+
+        doc = Document(
+            file_path=f"{ident['path_prefix']}/{source.name}/{article_id}",
+            file_hash=article_id,
+            file_format="article",
+            file_size=len(body.encode()),
+            file_type="note",
+            title=title,
+            extracted_text=f"{title}\n\n{body}",
+            extracted_at=datetime.now(timezone.utc),
+            extractor_version="guardian_api_full" if is_full else "guardian_api",
+            md_status="skipped",
+            md_extraction_error="news article: 텍스트 네이티브, markdown 변환 비대상",
+            source_channel=source.source_channel,
+            data_origin="external",
+            edit_url=normalized_url,
+            review_status="approved",
+            ai_domain=ident["ai_domain"],
+            ai_sub_group=source_short,
+            ai_tags=ident["ai_tags"],
+            # 안전 자료실 A-2 — 레지스트리 deterministic (classify-skip 경로라 ingest 시점 필수)
+            material_type=ident["material_type"],
+            jurisdiction=ident["jurisdiction"],
+            published_date=pub_dt.date() if pub_dt else None,
+            extract_meta=_build_extract_meta(source, pub_dt),
+        )
+        session.add(doc)
+        await session.flush()
+
+        await _enqueue_processing(session, doc, source, pub_dt)
+        count += 1
+
+    logger.info(f"[{source.name}] API → {count}건 수집")
+    return count, "ok"
+
+
+async def _fetch_api_nyt(session, source: NewsSource) -> tuple[int, str]:
    """NYT API 수집 — 키 마스킹 + health degradation"""
    import os
    nyt_key = os.getenv("NYT_API_KEY", "")
    if not nyt_key:
-        logger.error("NYT_API_KEY 미설정 — US 뉴스 수집 불가")
-        return 0
+        raise FeedError("NYT_API_KEY 미설정 — US 뉴스 수집 불가")

    try:
        async with httpx.AsyncClient(timeout=10) as client:
@@ -248,12 +727,10 @@ async def _fetch_api(session, source: NewsSource) -> int:
    except httpx.HTTPStatusError as e:
        # 쿼리스트링(api-key 포함) 제거 — path까지만 로깅
        safe_url = str(e.request.url).split("?")[0]
-        logger.error(f"NYT API 실패: {e.response.status_code} @ {safe_url}")
-        return 0
+        raise FeedError(f"NYT API 실패: {e.response.status_code} @ {safe_url}") from e
    except httpx.RequestError as e:
        safe_url = str(e.request.url).split("?")[0] if e.request else "unknown"
-        logger.error(f"NYT API 연결 실패: {safe_url}")
-        return 0
+        raise FeedError(f"NYT API 연결 실패: {safe_url}") from e

    data = resp.json()
    count = 0
@@ -277,20 +754,26 @@ async def _fetch_api(session, source: NewsSource) -> int:
        article_id = _article_hash(title, pub_dt.strftime("%Y%m%d"), source.name)
        normalized_url = _normalize_url(link)

+        # RSS 수집부와 동일: 레거시 raw URL + 교차 게시 다중 매칭 내성 (first)
        existing = await session.execute(
            select(Document).where(
                (Document.file_hash == article_id) |
-                (Document.edit_url == normalized_url)
-            )
+                (Document.edit_url.in_([normalized_url, link]))
+            ).limit(1)
        )
-        if existing.scalar_one_or_none():
+        if existing.scalars().first():
+            continue
+
+        if await _is_portal_duplicate(session, title):
+            logger.info(f"[{source.name}] portal-dup skip: {title[:60]}")
            continue

        category = _normalize_category(article.get("section", source.category or ""))
        source_short = source.name.split(" ")[0]

+        ident = _doc_identity(source, source_short, category)
        doc = Document(
-            file_path=f"news/{source.name}/{article_id}",
+            file_path=f"{ident['path_prefix']}/{source.name}/{article_id}",
            file_hash=article_id,
            file_format="article",
            file_size=len(summary.encode()),
@@ -299,24 +782,30 @@ async def _fetch_api(session, source: NewsSource) -> int:
            extracted_text=f"{title}\n\n{summary}",
            extracted_at=datetime.now(timezone.utc),
            extractor_version="nyt_api",
-            source_channel="news",
+            # article = 텍스트 네이티브(본문=extracted_text). markdown 단계 미enqueue 라
+            # 기본값 'pending' 이면 영구 비수렴 → backlog 지표 오염 + md_status_pending partial
+            # 인덱스 비대. 생성 시점에 terminal 'skipped' 로 명시(변환 비대상).
+            md_status="skipped",
+            md_extraction_error="news article: 텍스트 네이티브, markdown 변환 비대상",
+            source_channel=source.source_channel,
            data_origin="external",
-            edit_url=link,
+            edit_url=normalized_url,
            review_status="approved",
-            ai_domain="News",
+            ai_domain=ident["ai_domain"],
            ai_sub_group=source_short,
-            ai_tags=[f"News/{source_short}/{category}"],
+            ai_tags=ident["ai_tags"],
+            # 안전 자료실 A-2 — 레지스트리 deterministic (classify-skip 경로라 ingest 시점 필수)
+            material_type=ident["material_type"],
+            jurisdiction=ident["jurisdiction"],
+            published_date=pub_dt.date() if pub_dt else None,
+            extract_meta=_build_extract_meta(source, pub_dt),
        )
        session.add(doc)
        await session.flush()

-        await enqueue_stage(session, doc.id, "summarize")
-        days_old = (datetime.now(timezone.utc) - pub_dt).days
-        if days_old <= 30:
-            await enqueue_stage(session, doc.id, "embed")
-            await enqueue_stage(session, doc.id, "chunk")
+        await _enqueue_processing(session, doc, source, pub_dt)

        count += 1

    logger.info(f"[{source.name}] API → {count}건 수집")
-    return count
+    return count, "ok"
@@ -5,9 +5,11 @@

 전략 (하이브리드):
  - OOXML(.docx/.xlsx/.pptx) → markitdown        ← 신규 의존성(pip install markitdown). lazy import.
-  - .hwp/.hwpx               → LibreOffice(headless) → HTML → markdownify  ← markdownify 기존 의존성.
-    (LibreOffice 가 hwp import 필터 보유. .hwpx 는 .hwp 와 다른 필터·버전 의존 → E-1: prod LibreOffice
-     버전핀 안전컨텍스트에서 PoC 실행. 표 fidelity 가 진짜 리스크 — 하니스가 측정.)
+  - .hwp(HWP5 binary)        → pyhwp hwp5html → HTML → markdownify  ← pyhwp+six 의존성.
+    (2026-06-09: LibreOffice 번들 libhwplo 필터가 실제 한컴 HWP5 파일을 못 읽어 rc=0 + 'source file
+     could not be loaded' 로 전건 실패 → 순수 Python HWP5 전용 변환기 pyhwp 로 교체.)
+  - .hwpx                    → LibreOffice(headless) → HTML → markdownify  ← markdownify 기존 의존성.
+    (HWPX(zip)는 pyhwp 미지원 → LibreOffice 폴백 유지. 현재 코퍼스는 전부 HWP5 binary.)

 실패 계약 (C-5 postcondition 의 backend 절반):
  변환 실패·빈 출력·타임아웃·의존성 부재 → OfficeMdError 를 raise 한다.
@@ -18,6 +20,7 @@
 from __future__ import annotations

 import os
+import re
 import shutil
 import subprocess
 import tempfile
@@ -34,6 +37,13 @@ _MIN_BODY_CHARS = 16
 # 이름) → 기본값 정합. soffice 만 있는 환경은 LIBREOFFICE_BIN 으로 override.
 _SOFFICE_BIN = os.environ.get("LIBREOFFICE_BIN", "libreoffice")

+# pyhwp 콘솔 스크립트(pip install pyhwp 시 PATH 등록). HWP5 binary(.hwp) 전용.
+_HWP5HTML_BIN = os.environ.get("HWP5HTML_BIN", "hwp5html")
+
+# hwp5html 이 bindata/ 로 추출하는 첨부물 중 NAS 영속 대상 raster 확장자.
+# (OLE 수식/도형은 index.xhtml 에 앵커가 없어 위치 복원 불가 → 영속 제외.)
+_RASTER_EXTS = {"jpg", "jpeg", "png", "gif", "bmp"}
+

 class OfficeMdError(Exception):
    """office/hwp → md 변환 실패 신호. 호출부는 md_status='failed' 로 라우팅."""
@@ -50,7 +60,9 @@ def convert_office_to_md(path: str | Path, *, timeout: int = 90) -> str:

    if suffix in OOXML_FORMATS:
        md = _via_markitdown(p)
-    else:  # .hwp / .hwpx
+    elif suffix == ".hwp":
+        md = _via_pyhwp_html(p, timeout=timeout)
+    else:  # .hwpx (pyhwp 미지원 → LibreOffice 폴백)
        md = _via_libreoffice_html(p, timeout=timeout)

    md = (md or "").strip()
@@ -74,8 +86,93 @@ def _via_markitdown(path: Path) -> str:
    return getattr(result, "text_content", "") or ""


+def _run_hwp5html(path: Path, *, timeout: int) -> tuple[str, list[dict]]:
+    """HWP5 binary(.hwp) → (markdown, raster_images). hwp5html 1회 실행 = md + 이미지 동시 추출.
+
+    LibreOffice 번들 libhwplo 필터가 실제 한컴 HWP5 파일을 못 읽어(rc=0 + 'source file could
+    not be loaded') 전건 실패 → 순수 Python HWP5 전용 변환기 pyhwp(CLI hwp5html)로 교체.
+    `_via_libreoffice_html` 와 동일한 실패 계약(rc≠0 또는 출력 부재 → OfficeMdError raise).
+
+    raster_images = [{'data': bytes, 'format': 'jpeg'|'png'|...}] — bindata/ 의 래스터만.
+    hwp5html 은 이미지를 본문 xhtml 에 <img> 로 앵커하지 않으므로(bindata orphan, --css/--html 동일)
+    인라인 위치는 복원 불가 → 호출부가 NAS 영속 후 말미 갤러리로 부착한다.
+    """
+    try:
+        from markdownify import markdownify  # 기존 의존성
+    except ImportError as e:  # noqa: BLE001
+        raise OfficeMdError("markdownify 미설치(기존 의존성이어야 함)") from e
+
+    with tempfile.TemporaryDirectory(prefix="office_md_hwp_") as tmp:
+        outdir = Path(tmp)
+        # hwp5html --output <dir> <file.hwp> → <dir>/index.xhtml + styles.css + bindata/
+        cmd = [_HWP5HTML_BIN, "--output", str(outdir), str(path)]
+        try:
+            proc = subprocess.run(
+                cmd, capture_output=True, text=True, timeout=timeout, check=False
+            )
+        except FileNotFoundError as e:
+            raise OfficeMdError(
+                f"pyhwp(hwp5html) 바이너리 부재({_HWP5HTML_BIN}) — `pip install pyhwp six` 필요"
+            ) from e
+        except subprocess.TimeoutExpired as e:
+            raise OfficeMdError(f"pyhwp 변환 타임아웃({timeout}s): {path.name}") from e
+
+        index_path = outdir / "index.xhtml"
+        if proc.returncode != 0 or not index_path.exists():
+            raise OfficeMdError(
+                f"pyhwp html 변환 실패: {path.name} (rc={proc.returncode}): "
+                f"{(proc.stderr or proc.stdout or '').strip()[:300]}"
+            )
+        html = index_path.read_text(encoding="utf-8", errors="replace")
+        # hwp5html 의 xhtml 은 최상단 <?xml ...?> 선언을 가짐(LibreOffice 의 .html 경로엔 없음).
+        # markdownify 의 html.parser 가 이를 PI 텍스트('xml version="1.0" encoding="utf-8"?')로
+        # 본문에 흘려 (1) md 최상단 잡음·검색/청크 오염, (2) 빈 body 셸일 때 그 ~34자가
+        # _MIN_BODY_CHARS(16) 빈출력 게이트를 무력화(빈 변환의 false-success) → markdownify 전에 제거.
+        html = re.sub(r"^\s*<\?xml[^>]*\?>\s*", "", html)
+        # 표 보존 위해 markdownify 가 table 을 GFM 으로 — heading_style ATX (libreoffice 경로와 동일).
+        md = markdownify(html, heading_style="ATX", strip=["span", "font"])
+
+        images: list[dict] = []
+        bindata = outdir / "bindata"
+        if bindata.is_dir():
+            for f in sorted(bindata.iterdir()):
+                ext = f.suffix.lower().lstrip(".")
+                if ext in _RASTER_EXTS:
+                    images.append({
+                        "data": f.read_bytes(),
+                        "format": "jpeg" if ext == "jpg" else ext,
+                    })
+        return md, images
+
+
+def _via_pyhwp_html(path: Path, *, timeout: int) -> str:
+    """HWP5 binary(.hwp) → markdown (이미지 제외). convert_office_to_md 단일 텍스트 경로용."""
+    md, _images = _run_hwp5html(path, timeout=timeout)
+    return md
+
+
+def convert_hwp_to_md_and_images(
+    path: str | Path, *, timeout: int = 90
+) -> tuple[str, list[dict]]:
+    """HWP5(.hwp) → (markdown, raster_images). marker_worker 이미지 영속 경로 전용.
+
+    실패/빈출력 계약은 convert_office_to_md 와 동일(OfficeMdError raise / 빈 md 절대 반환 금지).
+    raster_images 원소 = {'data': bytes, 'format': str}; 비어있을 수 있음(이미지 없는 문서).
+    """
+    p = Path(path)
+    if p.suffix.lower() != ".hwp":
+        raise OfficeMdError(f"convert_hwp_to_md_and_images: .hwp 전용, got {p.suffix!r}")
+    if not p.exists():
+        raise OfficeMdError(f"file not found: {p}")
+    md, images = _run_hwp5html(p, timeout=timeout)
+    md = (md or "").strip()
+    if len(md) < _MIN_BODY_CHARS:
+        raise OfficeMdError(f"empty/too-short conversion ({len(md)} chars) for {p.name}")
+    return md, images
+
+
 def _via_libreoffice_html(path: Path, *, timeout: int) -> str:
-    """LibreOffice headless 로 HTML 변환 후 markdownify. hwp/hwpx 용."""
+    """LibreOffice headless 로 HTML 변환 후 markdownify. hwpx 용(.hwp 는 pyhwp)."""
    try:
        from markdownify import markdownify  # 기존 의존성
    except ImportError as e:  # noqa: BLE001
@@ -0,0 +1,142 @@
+"""Phase 2A 후보 임베딩 백필 CLI (embedding-phase2a-1 E-1).
+
+    docker compose exec -T fastapi python -m workers.phase2a_cand_backfill \
+        --target qwen06 --doc-id-max 41944 --chunk-id-max 104140 [--batch 32]
+
+설계 원칙 (plan r3):
+  - resumable/idempotent: 대상 = NOT EXISTS(후보 테이블) — 중단/재실행 시 이어서.
+    배치 단위 커밋. C-1 백필 게이트 = "후보 카운트 == 동결셋 카운트".
+  - 동결셋: id <= *_id_max AND 베이스라인 embedding IS NOT NULL (AND docs.deleted_at IS NULL).
+    cand 테이블은 동결 범위로만 INSERT (retrieval cand path 가 snapshot filter 를 안 타는 전제).
+  - 문서/청크 입력 = production 경로와 동일 구성(embed_worker._build_embed_input /
+    chunk_worker 의 [제목][섹션][본문]) + plain (instruct prefix 는 쿼리 측 전용 — G-1 불변식).
+  - 임베딩 = Ollama /api/embed 배치 호출 (G-1 fixture: 정규화 출력).
+  - qwen4m 은 본 CLI 대상이 아님 — qwen4 적재 후 SQL 파생(subvector+l2_normalize), plan E-1.
+"""
+
+import argparse
+import asyncio
+import hashlib
+import time
+
+import httpx
+from sqlalchemy import text
+
+from core.database import async_session
+from core.utils import setup_logger
+from models.document import Document
+from workers.embed_worker import _build_embed_input
+
+logger = setup_logger("phase2a_cand_backfill")
+
+OLLAMA_EMBED = "http://ollama:11434/api/embed"
+
+TARGETS = {
+    "qwen06": {
+        "model": "qwen3-embedding:0.6b", "dim": 1024,
+        "docs": "documents_cand_qwen06", "chunks": "document_chunks_cand_qwen06",
+    },
+    "qwen4": {
+        "model": "qwen3-embedding:4b", "dim": 2560,
+        "docs": "documents_cand_qwen4", "chunks": "document_chunks_cand_qwen4",
+    },
+}
+
+
+async def _embed_batch(client: httpx.AsyncClient, model: str, texts: list[str]) -> list[list[float]]:
+    r = await client.post(OLLAMA_EMBED, json={"model": model, "input": texts}, timeout=600)
+    r.raise_for_status()
+    embs = r.json()["embeddings"]
+    if len(embs) != len(texts):
+        raise RuntimeError(f"embed count mismatch: {len(embs)} != {len(texts)}")
+    return embs
+
+
+async def backfill_docs(target: dict, doc_id_max: int, batch: int, http: httpx.AsyncClient) -> int:
+    total = 0
+    while True:
+        async with async_session() as session:
+            rows = (await session.execute(text(f"""
+                SELECT d.id FROM documents d
+                WHERE d.id <= :m AND d.embedding IS NOT NULL AND d.deleted_at IS NULL
+                  AND NOT EXISTS (SELECT 1 FROM {target['docs']} c WHERE c.doc_id = d.id)
+                ORDER BY d.id LIMIT :b
+            """), {"m": doc_id_max, "b": batch})).scalars().all()
+            if not rows:
+                break
+            docs = [(await session.get(Document, i)) for i in rows]
+            inputs = [_build_embed_input(d) for d in docs]
+            embs = await _embed_batch(http, target["model"], inputs)
+            for d, inp, e in zip(docs, inputs, embs):
+                await session.execute(text(f"""
+                    INSERT INTO {target['docs']} (doc_id, embed_input_hash, embedding)
+                    VALUES (:i, :h, cast(:e AS vector))
+                    ON CONFLICT (doc_id) DO NOTHING
+                """), {"i": d.id, "h": hashlib.sha256(inp.encode()).hexdigest()[:16], "e": str(e)})
+            await session.commit()
+            total += len(rows)
+            if total % (batch * 10) < batch:
+                logger.info(f"[{target['docs']}] +{total} (last id={rows[-1]})")
+    return total
+
+
+async def backfill_chunks(target: dict, chunk_id_max: int, batch: int, http: httpx.AsyncClient) -> int:
+    total = 0
+    while True:
+        async with async_session() as session:
+            rows = (await session.execute(text(f"""
+                SELECT c.id, c.doc_id, c.chunk_index, c.section_title, c.text, d.title
+                FROM corpus_chunks c JOIN documents d ON d.id = c.doc_id
+                WHERE c.id <= :m AND c.embedding IS NOT NULL AND d.deleted_at IS NULL
+                  AND NOT EXISTS (SELECT 1 FROM {target['chunks']} k WHERE k.id = c.id)
+                ORDER BY c.id LIMIT :b
+            """), {"m": chunk_id_max, "b": batch})).all()
+            if not rows:
+                break
+            inputs = [
+                f"[제목] {r.title or ''}\n[섹션] {r.section_title or ''}\n[본문] {r.text}"
+                for r in rows
+            ]
+            embs = await _embed_batch(http, target["model"], inputs)
+            for r, e in zip(rows, embs):
+                await session.execute(text(f"""
+                    INSERT INTO {target['chunks']} (id, doc_id, chunk_index, section_title, text, embedding)
+                    VALUES (:i, :d, :x, :s, :t, cast(:e AS vector))
+                    ON CONFLICT (id) DO NOTHING
+                """), {"i": r.id, "d": r.doc_id, "x": r.chunk_index,
+                       "s": r.section_title, "t": r.text, "e": str(e)})
+            await session.commit()
+            total += len(rows)
+            if total % (batch * 10) < batch:
+                logger.info(f"[{target['chunks']}] +{total} (last id={rows[-1]})")
+    return total
+
+
+async def run(target_key: str, doc_id_max: int, chunk_id_max: int, batch: int) -> None:
+    target = TARGETS[target_key]
+    start = time.monotonic()
+    async with httpx.AsyncClient() as http:
+        nd = await backfill_docs(target, doc_id_max, batch, http)
+        nc = await backfill_chunks(target, chunk_id_max, batch, http)
+    mins = (time.monotonic() - start) / 60
+    async with async_session() as session:
+        cd = (await session.execute(text(f"SELECT count(*) FROM {target['docs']}"))).scalar_one()
+        cc = (await session.execute(text(f"SELECT count(*) FROM {target['chunks']}"))).scalar_one()
+    logger.info(
+        f"[{target_key}] 완료 — 이번 run docs +{nd} chunks +{nc} ({mins:.1f}분) · "
+        f"누적 docs {cd} / chunks {cc} (동결 게이트 = 베이스라인 동결셋 카운트와 일치 확인)"
+    )
+
+
+def main() -> None:
+    p = argparse.ArgumentParser(description="Phase 2A 후보 임베딩 백필 (resumable)")
+    p.add_argument("--target", required=True, choices=sorted(TARGETS))
+    p.add_argument("--doc-id-max", type=int, required=True)
+    p.add_argument("--chunk-id-max", type=int, required=True)
+    p.add_argument("--batch", type=int, default=32)
+    a = p.parse_args()
+    asyncio.run(run(a.target, a.doc_id_max, a.chunk_id_max, a.batch))
+
+
+if __name__ == "__main__":
+    main()
@@ -13,17 +13,27 @@ from sqlalchemy import select, update, delete, exists
 from sqlalchemy.exc import IntegrityError, SQLAlchemyError
 from sqlalchemy.orm import aliased

+from core.config import settings
 from core.database import async_session
 from core.utils import setup_logger
-from models.queue import ProcessingQueue, enqueue_stage
+from models.queue import ProcessingQueue, StageDeferred, enqueue_stage, not_deferred_condition

 logger = setup_logger("queue_consumer")

+# pipeline.held_stages 안내 로그는 1분 사이클마다 반복하지 않고 최초 1회만.
+_hold_logged = False
+
 # stage별 배치 크기
 # stt 는 GPU 단일 점유 + 회의 30분짜리도 가능 → 배치 1. thumbnail 은 ffmpeg subprocess 로 가벼움.
 # deep_summary (PR-B B-1) 는 MLX 26B 단일 Semaphore(1) 경유 → 배치 1.
-BATCH_SIZE = {"extract": 5, "classify": 3, "summarize": 3, "embed": 1, "chunk": 1,
-              "preview": 2, "stt": 1, "thumbnail": 3, "deep_summary": 1, "markdown": 1}
+# fulltext 는 politeness 지연(같은 도메인 5–15s)이 배치 내 직렬로 걸린다 — 배치 3 이면
+# 같은 도메인 최악 ~45s/사이클, 메인 큐 1m 간격(max_instances=1, coalesce)이 흡수.
+# embed/chunk 1→10 (2026-06-12 fast-consumer): 건당 <1s 실측 — Phase 0.1 초기 보수값이
+# LLM 사이클에 인질로 잡혀 실효 ~580/일 vs 수요 최대 2,700/일 → 적체 원인이었음.
+# 10 = TEI/marker 와 GPU 공유 고려한 보수 상향(전용 1분 잡 기준 캡 ~14,400/일).
+BATCH_SIZE = {"extract": 5, "classify": 3, "summarize": 3, "embed": 10, "chunk": 10,
+              "preview": 2, "stt": 1, "thumbnail": 3, "deep_summary": 1, "markdown": 1,
+              "fulltext": 3}
 STALE_THRESHOLD_MINUTES = 10
 # markdown 대형 split 변환은 한 doc 이 수십 분(5210 ≈ 40분) 동안 processing 상태로 머문다.
 # marker_worker 는 queue 행에 heartbeat 를 찍지 않으므로(started_at 고정), main 의 10분
@@ -31,14 +41,21 @@ STALE_THRESHOLD_MINUTES = 10
 # 따라서 markdown consumer 는 별도의 generous 임계를 쓴다.
 MARKDOWN_STALE_THRESHOLD_MINUTES = int(os.getenv("MARKDOWN_STALE_MINUTES", "120"))

-# consume_queue(메인) 가 담당하는 stage. markdown 은 consume_markdown_queue 로 분리.
+# consume_queue(메인) 가 담당하는 stage. markdown 은 consume_markdown_queue,
+# embed/chunk 는 consume_fast_queue (2026-06-12) 로 분리 — 세 집합은 disjoint
+# (reset_stale_items 가 자기 집합만 reset, 교차 시 이중 복구 위험).
 # STT 도 장기 작업 가능성이 있으나 본 PR 범위 밖 — main 에 유지(follow-up).
 MAIN_QUEUE_STAGES = [
-    "extract", "classify", "summarize", "embed", "chunk",
-    "preview", "stt", "thumbnail", "deep_summary",
+    "extract", "classify", "summarize",
+    "preview", "stt", "thumbnail", "deep_summary", "fulltext",
 ]
 MARKDOWN_QUEUE_STAGES = ["markdown"]

+# 고속(비-LLM·경량 GPU) stage — LLM 사이클(분 단위)에서 분리해 1분 잡 전용 소비.
+# embed/chunk 는 건당 <1s 라 main 루프에 두면 classify(~190s×3) 뒤에서 굶는다
+# (2026-06-12 실측: 적체 3,570 · 4070 가동률 0%). markdown 분리(05-01)와 동일 패턴.
+FAST_QUEUE_STAGES = ["embed", "chunk"]
+

 async def reset_stale_items(stages, threshold_minutes=STALE_THRESHOLD_MINUTES):
    """processing 상태로 오래 방치된 항목 복구 (지정 stage 한정)
@@ -137,6 +154,9 @@ async def enqueue_next_stage(document_id: int, current_stage: str):
    # source_channel-aware override (extract stage 만). source_channel 누락 시 _default.
    extract_override_by_channel = {
        "devonagent": ["embed", "chunk"],
+        # crawl 채널 파일형 (KOSHA 첨부/GUIDE PDF 등): preview 사전 캐시 스킵 —
+        # 재료 코퍼스 대량 백필이 preview 큐를 점령하지 않게. classify → embed/chunk/markdown 유지.
+        "crawl": ["classify"],
    }

    next_stages = {
@@ -179,6 +199,7 @@ def _load_workers():
    from workers.summarize_worker import process as summarize_process
    from workers.thumbnail_worker import process as thumbnail_process
    from workers.marker_worker import process as marker_process
+    from workers.fulltext_worker import process as fulltext_process

    return {
        "extract": extract_process,
@@ -195,6 +216,9 @@ def _load_workers():
        # Phase 1B: classify 완료 후 enqueue. PDF→markdown 변환 (leaf, embed/chunk 와 독립).
        # consume_markdown_queue 가 전담 (대형 split 변환이 메인 파이프라인을 막지 않도록).
        "markdown": marker_process,
+        # crawl-24x7 A-2: 기사 페이지 fetch → 4-tier 본문 승격. 후속(summarize/embed/chunk)은
+        # 워커가 직접 enqueue — next_stages dict 미등록 (enqueue_next_stage no-op).
+        "fulltext": fulltext_process,
    }


@@ -206,13 +230,14 @@ async def _process_stage(stage, worker_fn):
    """
    batch_size = BATCH_SIZE.get(stage, 3)

-    # pending 항목 조회
+    # pending 항목 조회 (보류 백오프 deferred_until 미래 항목 제외 — ds-macbook-offload-1)
    async with async_session() as session:
        result = await session.execute(
            select(ProcessingQueue.id, ProcessingQueue.document_id)
            .where(
                ProcessingQueue.stage == stage,
                ProcessingQueue.status == "pending",
+                not_deferred_condition(),
            )
            .order_by(ProcessingQueue.created_at)
            .limit(batch_size)
@@ -266,6 +291,26 @@ async def _process_stage(stage, worker_fn):
            await enqueue_next_stage(document_id, stage)
            logger.info(f"[{stage}] document_id={document_id} 완료")

+        except StageDeferred as defer:
+            # 보류 (ds-macbook-offload-1): 맥북 일시 불가(sleep/cold/editor_busy) — 실패 아님.
+            # attempts 는 claim 시 선증가분을 반환(미소모)하고 deferred_until 백오프 후 자연 재개.
+            # 워커는 완주 전 doc 쓰기를 하지 않으므로 이 시점의 데이터 변경 = 0 (sleep-안전).
+            async with async_session() as session:
+                item = await session.get(ProcessingQueue, queue_id)
+                if not item:
+                    logger.warning(f"[{stage}] queue_id={queue_id} 없음 (삭제됨?), skip")
+                    continue
+                item.status = "pending"
+                item.started_at = None
+                item.attempts = max(0, item.attempts - 1)
+                until = datetime.now(timezone.utc) + timedelta(minutes=defer.retry_after_minutes)
+                item.payload = {**(item.payload or {}), "deferred_until": until.isoformat()}
+                await session.commit()
+            logger.info(
+                f"[{stage}] document_id={document_id} 보류({defer}) — "
+                f"{defer.retry_after_minutes}분 후 재개"
+            )
+
        except Exception as e:
            # 실패 처리
            async with async_session() as session:
@@ -304,14 +349,43 @@ async def _process_stage(stage, worker_fn):

 async def consume_queue():
    """메인 큐 소비자 — markdown 제외 전 stage 를 1분 간격으로 처리."""
+    global _hold_logged
    workers = _load_workers()

+    held = [s for s in MAIN_QUEUE_STAGES if s in settings.pipeline_held_stages]
+    if held and not _hold_logged:
+        logger.info(f"pipeline.held_stages 보류 중: {held} — claim 하지 않음 (pending 적체 = 의도)")
+        _hold_logged = True
+
    try:
        await reset_stale_items(MAIN_QUEUE_STAGES, STALE_THRESHOLD_MINUTES)
    except Exception:
        logger.exception("stale reset failed, but continuing queue consumption")

    for stage in MAIN_QUEUE_STAGES:
+        if stage in settings.pipeline_held_stages:
+            continue
+        await _process_stage(stage, workers[stage])
+
+
+async def consume_fast_queue():
+    """embed/chunk 전용 고속 소비자 — LLM 사이클과 완전 디커플 (2026-06-12).
+
+    main 루프는 classify/summarize/deep 가 사이클을 분 단위로 점유해 건당 <1s 짜리
+    embed/chunk 가 사이클당 1번씩만 기회를 얻었다 (실효 ~60건/시 = 적체 원인).
+    분리 후 = 1분 잡 × 배치 10 → 캡 ~600건/시. APScheduler max_instances=1 이라
+    배치가 1분을 넘으면 다음 fire 는 coalesce (폭주 방지).
+    """
+    workers = _load_workers()
+
+    try:
+        await reset_stale_items(FAST_QUEUE_STAGES, STALE_THRESHOLD_MINUTES)
+    except Exception:
+        logger.exception("fast stale reset failed, but continuing queue consumption")
+
+    for stage in FAST_QUEUE_STAGES:
+        if stage in settings.pipeline_held_stages:
+            continue
        await _process_stage(stage, workers[stage])


@@ -0,0 +1,195 @@
+"""수동 burst-drain CLI — 맥미니 백로그를 사용자가 의도적으로 맥북(M5 Max)으로 소화.
+
+ds-macbook-offload-1 P2-3. 운영 패턴 = csb_collector --bulk 와 동일 (컨테이너 내 실행,
+장기 배치 중 fastapi 재생성 = in-flight 절단이지만 멱등 재실행으로 무손실).
+
+    docker compose exec fastapi python -m workers.queue_drain --stage summarize --limit 200
+
+설계 원칙:
+  - deep 슬롯(config.yaml ai.models.deep) 필수 — 부재 시 명시 종료 (silent 강등 금지)
+  - claim = FOR UPDATE SKIP LOCKED 단건 전이 → consumer(1분 주기)와 이중처리 0
+  - per-item 커밋 = sleep-안전: 중단돼도 완료분 무손상, 진행 중 1건만 stale recovery
+    (10분) 로 pending 복귀. 재실행 멱등 (summarize 는 ai_summary 존재 시 skip)
+  - 보류(StageDeferred = 맥북 sleep/cold/editor_busy/네트워크 플랩): attempts 반환 +
+    deferred_until 백오프 기록. 연속 보류 --defer-retries(기본 5)회까지 --defer-wait
+    (기본 120s) 간격 재시도(분 단위 플랩 흡수), 한도 도달 = sleep 판정으로 run 종료 —
+    불가 상태의 맥북을 계속 두드리지 않는다
+  - 폴백 0: 맥미니/cloud 강등 없음
+"""
+
+import argparse
+import asyncio
+from datetime import datetime, timedelta, timezone
+
+from sqlalchemy import select
+
+from core.config import settings
+from core.database import async_session
+from core.utils import setup_logger
+from models.queue import ProcessingQueue, StageDeferred, not_deferred_condition
+
+logger = setup_logger("queue_drain")
+
+# summarize = 맥미니 백로그 본체 / deep_summary = 심층 / classify = triage 분류.
+# classify 는 2026-06-12 fair-share 로 합류 — 구 제외 사유(plan Q-4 "triage 경량 = 맥미니
+# 적합")는 Gemma a4b(42 tok/s) 전제. Qwen 27B 전환 후 classify 가 장문 프리필로 컨슈머
+# 사이클을 점유하는 최대 병목이라, 맥북(프리필 ~5배)이 가장 효과적인 분담처다.
+# classify 완료 시 enqueue_next_stage(embed/chunk/markdown) 필수 — 누락 = DAG 단절.
+DRAIN_STAGES = ("summarize", "deep_summary", "classify")
+
+
+async def _claim_one(stage: str) -> tuple[int, int] | None:
+    """pending 1건을 processing 으로 원자 전이 (SKIP LOCKED — consumer 와 경합 안전)."""
+    async with async_session() as session:
+        item = (await session.execute(
+            select(ProcessingQueue)
+            .where(
+                ProcessingQueue.stage == stage,
+                ProcessingQueue.status == "pending",
+                not_deferred_condition(),
+            )
+            .order_by(ProcessingQueue.created_at)
+            .limit(1)
+            .with_for_update(skip_locked=True)
+        )).scalar_one_or_none()
+        if item is None:
+            return None
+        item.status = "processing"
+        item.started_at = datetime.now(timezone.utc)
+        item.attempts += 1
+        claimed = (item.id, item.document_id)
+        await session.commit()
+    return claimed
+
+
+async def _mark_completed(queue_id: int) -> None:
+    async with async_session() as session:
+        item = await session.get(ProcessingQueue, queue_id)
+        if item:
+            item.status = "completed"
+            item.completed_at = datetime.now(timezone.utc)
+            await session.commit()
+
+
+async def _mark_deferred(queue_id: int, defer: StageDeferred) -> None:
+    """보류: attempts 반환(미소모) + deferred_until 백오프 — consumer 의 처리와 동형."""
+    async with async_session() as session:
+        item = await session.get(ProcessingQueue, queue_id)
+        if item:
+            item.status = "pending"
+            item.started_at = None
+            item.attempts = max(0, item.attempts - 1)
+            until = datetime.now(timezone.utc) + timedelta(minutes=defer.retry_after_minutes)
+            item.payload = {**(item.payload or {}), "deferred_until": until.isoformat()}
+            await session.commit()
+
+
+async def _mark_failed(queue_id: int, exc: Exception) -> None:
+    """실패: consumer 와 동일 재시도 정책 (attempts >= max → failed, 아니면 pending 복귀)."""
+    async with async_session() as session:
+        item = await session.get(ProcessingQueue, queue_id)
+        if item:
+            err_text = str(exc) or repr(exc) or type(exc).__name__
+            item.error_message = err_text[:500]
+            if item.attempts >= item.max_attempts:
+                item.status = "failed"
+            else:
+                item.status = "pending"
+                item.started_at = None
+            await session.commit()
+
+
+async def drain(stage: str, limit: int, defer_retries: int = 5, defer_wait: int = 120) -> None:
+    if stage not in DRAIN_STAGES:
+        raise SystemExit(f"--stage 는 {DRAIN_STAGES} 만 허용")
+    if settings.ai.deep is None:
+        raise SystemExit(
+            "config.yaml ai.models.deep 슬롯 미구성 — drain 은 맥북 분담 전용 레버라 진행하지 않음"
+            " (맥미니로의 silent 강등 금지)"
+        )
+
+    from workers.classify_worker import process as classify_process
+    from workers.deep_summary_worker import process as deep_summary_process
+    from workers.queue_consumer import enqueue_next_stage
+    from workers.summarize_worker import process as summarize_process
+
+    done = failed = 0
+    deferred = False
+    consecutive_defers = 0
+    while done + failed < limit:
+        claimed = await _claim_one(stage)
+        if claimed is None:
+            logger.info(f"[drain:{stage}] pending 소진 — 종료")
+            break
+        queue_id, document_id = claimed
+        try:
+            async with async_session() as worker_session:
+                if stage == "summarize":
+                    await summarize_process(document_id, worker_session, use_deep=True)
+                elif stage == "classify":
+                    await classify_process(document_id, worker_session, use_deep=True)
+                else:
+                    # deep_summary: drain 은 맥북 전용 레버 — 불가 시 보류(폴백은 consumer 만)
+                    await deep_summary_process(
+                        document_id, worker_session, defer_on_deep_unavailable=True
+                    )
+                await worker_session.commit()
+            await _mark_completed(queue_id)
+            # 다음 stage 연쇄 — classify 는 embed/chunk/markdown enqueue (consumer 와 동형,
+            # summarize/deep_summary 는 next_stages 미등록이라 no-op)
+            await enqueue_next_stage(document_id, stage)
+            done += 1
+            consecutive_defers = 0
+            logger.info(f"[drain:{stage}] {done}/{limit} doc={document_id} 완료")
+        except StageDeferred as defer:
+            # 일시 불가는 종류가 둘: 진짜 sleep(장시간) vs 일시 네트워크 플랩(분 단위 —
+            # 2026-06-11 실측: Tailscale direct 경로 ~10분 플랩으로 32/300 조기 종료).
+            # 연속 보류 한도까지 대기 후 재시도해 플랩을 흡수, 한도 도달 시 종료(sleep 판정).
+            await _mark_deferred(queue_id, defer)
+            consecutive_defers += 1
+            if consecutive_defers >= defer_retries:
+                deferred = True
+                logger.warning(
+                    f"[drain:{stage}] doc={document_id} 맥북 불가({defer}) — 연속 보류 "
+                    f"{consecutive_defers}회 한도 도달, run 종료. 맥북 깨운 뒤(또는 "
+                    f"{defer.retry_after_minutes}분 후) 재실행"
+                )
+                break
+            logger.warning(
+                f"[drain:{stage}] doc={document_id} 맥북 일시 불가({defer}) — "
+                f"{defer_wait}s 대기 후 재시도 ({consecutive_defers}/{defer_retries})"
+            )
+            await asyncio.sleep(defer_wait)
+        except Exception as exc:
+            await _mark_failed(queue_id, exc)
+            failed += 1
+            logger.error(f"[drain:{stage}] doc={document_id} 실패: {exc}")
+
+    # 종료 요약 (잔여 = 지금 시점 pending 수)
+    async with async_session() as session:
+        from sqlalchemy import func as sa_func
+        remaining = (await session.execute(
+            select(sa_func.count()).select_from(ProcessingQueue).where(
+                ProcessingQueue.stage == stage, ProcessingQueue.status == "pending",
+            )
+        )).scalar_one()
+    logger.info(
+        f"[drain:{stage}] 요약 — 완료 {done} · 실패 {failed} · "
+        f"보류종료 {'예' if deferred else '아니오'} · 잔여 pending {remaining}"
+    )
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="맥북(M5 Max) burst-drain — 수동 백로그 분담 레버")
+    parser.add_argument("--stage", required=True, choices=DRAIN_STAGES)
+    parser.add_argument("--limit", type=int, default=50, help="이번 run 최대 처리 건수 (기본 50)")
+    parser.add_argument("--defer-retries", type=int, default=5,
+                        help="연속 보류 허용 횟수 — 네트워크 플랩 흡수 (기본 5, 한도 도달 시 종료)")
+    parser.add_argument("--defer-wait", type=int, default=120,
+                        help="보류 재시도 간 대기 초 (기본 120)")
+    args = parser.parse_args()
+    asyncio.run(drain(args.stage, args.limit, args.defer_retries, args.defer_wait))
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,265 @@
+"""C-3 공학 정적 코퍼스 1회 일괄 ingest (plan crawl-24x7-1).
+
+National Board 기술 아티클(~86, ASP.NET 구식 — 기사 앵커가 싱글쿼트 href) +
+TWI Job Knowledge(~153, sitemap 기반). 지속 크롤링이 아니라 아카이브 일괄 +
+저빈도 증분 유형 — 스케줄러 미등록, 수동 CLI:
+
+  docker exec hyungi_document_server-fastapi-1 \
+      python -m workers.static_corpus_ingest --corpus all --limit 3   # 검증용
+  docker exec -d hyungi_document_server-fastapi-1 \
+      python -m workers.static_corpus_ingest --corpus all             # 전체 (~45분)
+
+  ※ -d 백그라운드 실행 시 중단은 host pkill 이 아니라 컨테이너 내부 PID kill
+    ([[feedback_docker_exec_orphan_kill]]).
+
+멱등: edit_url(정규화)+file_hash dedup — 재실행 = 신규분만 (그대로 monthly 증분 절차).
+politeness: fetch_page 재사용 (per-domain 1 + 5~15s jitter + robots).
+원본 보존·승격 필드: fulltext_worker 와 동일 규약 (재추출 가능 상태 유지).
+실패는 degrade 없이 skip + 말미 목록 출력 (정적 코퍼스 — RSS 요약 같은 격하 대상 부재).
+"""
+
+import argparse
+import asyncio
+import hashlib
+import re
+from datetime import datetime, timezone
+from html import unescape
+
+from sqlalchemy import select
+
+from core.crawl_politeness import CrawlBlocked, CrawlFetchError, CrawlSkip, fetch_page
+from core.database import async_session
+from core.utils import setup_logger
+from models.document import Document
+from models.news_source import NewsSource
+from models.queue import enqueue_stage
+from workers.fulltext_worker import (
+    _WEB_MIN_BODY_LEN,
+    _extract_body,
+    _raw_html_path,
+    _save_raw_html,
+    _strip_article_footer,
+)
+from workers.news_collector import _article_hash, _normalize_url
+
+logger = setup_logger("static_corpus")
+
+_NB_LISTING = "https://www.nationalboard.org/Index.aspx?pageID=164"
+_TWI_SITEMAP = "https://www.twi-global.com/sitemap.xml"
+
+
+async def _discover_national_board() -> list[str]:
+    """목록 페이지의 기사 앵커 — 싱글쿼트 href 가 기본형이라 양쪽 인용부호 매칭."""
+    html_text, _ = await fetch_page(_NB_LISTING)
+    ids = sorted(
+        {int(i) for i in re.findall(
+            r"href=['\"]/?Index\.aspx\?pageID=164&(?:amp;)?ID=(\d+)['\"]", html_text)}
+    )
+    return [f"https://www.nationalboard.org/Index.aspx?pageID=164&ID={i}" for i in ids]
+
+
+async def _discover_twi() -> list[str]:
+    """sitemap 에서 job-knowledge 시리즈만 (faqs/published-papers 는 향후 증분 후보)."""
+    xml_text, _ = await fetch_page(
+        _TWI_SITEMAP,
+        content_types=("text/xml", "application/xml", "text/html"),
+    )
+    urls = re.findall(
+        r"<loc>(https://www\.twi-global\.com/technical-knowledge/job-knowledge/[^<]+)</loc>",
+        xml_text,
+    )
+    return sorted({u for u in urls if not u.rstrip("/").endswith("job-knowledge")})
+
+
+CORPORA = {
+    "national-board": {
+        "source_name": "National Board 기술 아티클",
+        "listing_url": _NB_LISTING,
+        "discover": _discover_national_board,
+        "fetch_method": "page",
+    },
+    "twi": {
+        "source_name": "TWI Job Knowledge",
+        "listing_url": _TWI_SITEMAP,
+        "discover": _discover_twi,
+        "fetch_method": "sitemap+page",
+    },
+}
+
+
+async def _get_or_create_source(session, spec: dict) -> NewsSource:
+    """레지스트리 행 — 출처 추적 + crawl_raw src_{id} 경로 + A-8 패널 가시성.
+
+    enabled=False: 6h 뉴스 사이클 비대상 (피드가 없는 정적 코퍼스 — 증분은 본 CLI 재실행).
+    """
+    result = await session.execute(
+        select(NewsSource).where(NewsSource.name == spec["source_name"])
+    )
+    source = result.scalars().first()
+    if source is None:
+        source = NewsSource(
+            name=spec["source_name"],
+            feed_url=spec["listing_url"],
+            feed_type="rss",
+            fetch_method=spec["fetch_method"],
+            fulltext_policy="none",
+            source_channel="crawl",
+            category="Engineering",
+            language="en",
+            country="US" if "national" in spec["source_name"].lower() else "GB",
+            enabled=False,
+        )
+        session.add(source)
+        await session.flush()
+    return source
+
+
+def _page_title(html_text: str, fallback: str) -> str:
+    m = re.search(r'<meta\s+property="og:title"\s+content="([^"]+)"', html_text)
+    if not m:
+        m = re.search(r"<title[^>]*>([^<]+)</title>", html_text, re.I)
+    title = unescape(m.group(1)).strip() if m else ""
+    # 사이트 접미 잡음 제거 (TWI 는 ' - TWI', NB 는 'National Board ...' 꼬리표)
+    title = re.sub(r"\s*[-|·]\s*(TWI|National Board[^-|]*)\s*$", "", title).strip()
+    return title or fallback
+
+
+async def _ingest_one(session, source: NewsSource, url: str) -> str:
+    """기사 1건. 반환: 'ok' / 'dup' / 'skip'(추출부족·차단)."""
+    normalized_url = _normalize_url(url)
+    existing = await session.execute(
+        select(Document).where(Document.edit_url.in_([normalized_url, url])).limit(1)
+    )
+    if existing.scalars().first():
+        return "dup"
+
+    try:
+        html_text, final_url = await fetch_page(url)
+    except (CrawlBlocked, CrawlSkip, CrawlFetchError) as e:
+        logger.warning(f"[{source.name}] fetch 실패 skip: {url} — {type(e).__name__}: {e}")
+        return "skip"
+
+    body, engine, engine_ver = _extract_body(html_text)
+    if not engine:
+        logger.warning(f"[{source.name}] 추출 실패 skip (< {_WEB_MIN_BODY_LEN}자): {url}")
+        return "skip"
+    clean_body = _strip_article_footer(body.replace("\x00", ""))
+    if len(clean_body) < _WEB_MIN_BODY_LEN:
+        logger.warning(f"[{source.name}] 푸터 제거 후 본문 부족 skip: {url}")
+        return "skip"
+
+    title = _page_title(html_text, fallback=url.rsplit("/", 1)[-1][:90])
+    article_id = _article_hash(title, "static", source.name)
+    dup2 = await session.execute(
+        select(Document).where(Document.file_hash == article_id).limit(1)
+    )
+    if dup2.scalars().first():
+        return "dup"
+
+    now = datetime.now(timezone.utc)
+    raw_path = _raw_html_path(source.id, article_id, now)
+    raw_saved = True
+    try:
+        _save_raw_html(raw_path, html_text)
+    except OSError as e:
+        raw_saved = False
+        logger.error(f"[{source.name}] 원본 보존 실패 (ingest 는 진행): {e}")
+
+    doc = Document(
+        file_path=f"crawl/{source.name}/{article_id}",
+        file_hash=article_id,
+        file_format="article",
+        file_size=0,  # 아래 extracted_text 확정 후 재계산
+        file_type="note",
+        title=title,
+        extracted_text=f"{title}\n\n{clean_body}",
+        extracted_at=now,
+        extractor_version=f"static+page@{engine}",
+        md_content=clean_body,
+        md_status="success",
+        md_extraction_engine=engine,
+        md_extraction_engine_version=engine_ver,
+        md_format_version="1.0",
+        md_generated_at=now,
+        md_source_hash=hashlib.sha256(html_text.encode("utf-8", errors="replace")).hexdigest(),
+        md_content_hash=hashlib.sha256(clean_body.encode("utf-8")).hexdigest(),
+        content_origin="extracted",
+        source_channel="crawl",
+        data_origin="external",
+        edit_url=normalized_url,
+        review_status="approved",
+        ai_domain="Engineering",
+        ai_sub_group=source.name,
+        ai_tags=[f"Engineering/{source.name}"],
+        extract_meta={
+            "source_id": source.id,
+            "source_name": source.name,
+            "published_at": None,  # 정적 코퍼스 — 페이지 발행일 비신뢰, 색인은 채널 게이트로 무조건
+            "fulltext": {
+                "status": "static_corpus",
+                "engine": engine,
+                "final_url": final_url,
+                "raw_html_path": str(raw_path) if raw_saved else None,
+                "body_chars": len(clean_body),
+                "resolved_at": now.isoformat(),
+            },
+        },
+    )
+    doc.file_size = len(doc.extracted_text.encode())
+    session.add(doc)
+    await session.flush()
+
+    # crawl 채널 = 발행일 무관 전량 색인 (summarize 는 맥미니 큐 — D-4 lag 관찰 대상)
+    await enqueue_stage(session, doc.id, "summarize")
+    await enqueue_stage(session, doc.id, "embed")
+    await enqueue_stage(session, doc.id, "chunk")
+    logger.info(f"[{source.name}] ingest {len(clean_body)}자 ({engine}): {title[:60]}")
+    return "ok"
+
+
+async def run(corpus: str = "all", limit: int = 0) -> None:
+    targets = list(CORPORA) if corpus == "all" else [corpus]
+    for key in targets:
+        spec = CORPORA[key]
+        async with async_session() as session:
+            source = await _get_or_create_source(session, spec)
+            await session.commit()
+            source_id = source.id
+
+        try:
+            urls = await spec["discover"]()
+        except (CrawlBlocked, CrawlSkip, CrawlFetchError) as e:
+            logger.error(f"[{spec['source_name']}] 목록 수집 실패 — corpus 건너뜀: {e}")
+            continue
+        if limit:
+            urls = urls[:limit]
+        logger.info(f"[{spec['source_name']}] 대상 {len(urls)}건 (limit={limit or '없음'})")
+
+        counts = {"ok": 0, "dup": 0, "skip": 0}
+        failed: list[str] = []
+        for i, url in enumerate(urls, 1):
+            # 커밋 10건 단위 — 장시간 배치 중단 시 진행분 보존
+            async with async_session() as session:
+                src = await session.get(NewsSource, source_id)
+                status = await _ingest_one(session, src, url)
+                await session.commit()
+            counts[status] += 1
+            if status == "skip":
+                failed.append(url)
+            if i % 10 == 0:
+                logger.info(f"[{spec['source_name']}] 진행 {i}/{len(urls)} {counts}")
+
+        logger.info(f"[{spec['source_name']}] 완료: {counts}")
+        if failed:
+            logger.warning(
+                f"[{spec['source_name']}] skip {len(failed)}건 — 재시도는 CLI 재실행(멱등):\n  "
+                + "\n  ".join(failed)
+            )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="C-3 정적 코퍼스 일괄 ingest")
+    parser.add_argument("--corpus", choices=[*CORPORA, "all"], default="all")
+    parser.add_argument("--limit", type=int, default=0, help="corpus 당 상한 (0=전체)")
+    args = parser.parse_args()
+    asyncio.run(run(args.corpus, args.limit))
@@ -0,0 +1,43 @@
+"""statute_collector 나라별 어댑터 패키지 (plan safety-library-1 B-1).
+
+어댑터 계약 (2함수 + 상수):
+  JURISDICTION: str  — 어댑터 상수 고정. 코어가 적재 직전 assert (파싱 결과 추론 금지).
+  poll_changes(client, watch_rows) -> list[ChangeEvent]  — 개정 감지만 (경량 호출).
+  fetch_version(client, act, change) -> list[VersionPayload]  — PR②.
+    payload 리스트: primary + annex 각각 자기 version_key (R4-M4).
+
+ChangeEvent.kind: amend / repeal / bootstrap(합성 — PR② 부트스트랩이 amend 와
+동일 ingest 경로 재사용, R6-m2).
+"""
+
+from dataclasses import dataclass, field
+
+
+@dataclass
+class ChangeEvent:
+    """개정 감지 이벤트 — poll_changes 산출물."""
+    family_id: str
+    kind: str                    # amend / repeal / bootstrap
+    new_version_key: str         # KR = MST (법령일련번호)
+    title: str
+    promulgation_date: str | None = None   # YYYYMMDD
+    effective_date: str | None = None      # YYYYMMDD (목록 시행일자 — 조문별 차등 시행 주의)
+    revision_type: str | None = None       # 제개정구분명
+
+
+@dataclass
+class VersionPayload:
+    """fetch_version 산출물 1건 — primary 또는 annex 각자 자기 version_key (R4-M4).
+
+    전문 1콜 스냅샷 의미론(R7-M3 fixture 판정): 한 응답에서 primary + annex 전부 생성.
+    annex version_key = 'MST|{별표번호}-{별표가지번호}' (zero-padded 구조화 필드 그대로 —
+    suffix 문자열 파싱 아닌 필드 기반, R7-B1 a 업그레이드).
+    """
+    law_doc_kind: str            # primary / annex
+    version_key: str
+    title: str
+    content: str                 # 조문/별표 markdown 텍스트
+    promulgation_date: str | None = None   # YYYYMMDD (본문 기본정보)
+    effective_date: str | None = None      # YYYYMMDD (본문 기본정보 — 목록값과 다를 수 있음)
+    annex_label: str | None = None         # '별표1' / '별표5의2' (표시용)
+    meta: dict = field(default_factory=dict)
@@ -0,0 +1,213 @@
+"""KR 법령 어댑터 — 국가법령정보센터 (law.go.kr DRF) (plan safety-library-1 B-1 PR①).
+
+poll_changes = lawSearch 목록 diff: 워치리스트 행별 정식 법령명 exact 조회 →
+MST(법령일련번호) != watermark 이면 ChangeEvent. law_monitor 의 검증된 호출 형태 재사용.
+
+fixture (2026-06-13 라이브 박제, tests/fixtures/statute_kr/):
+  - lawsearch_*.xml — 목록 필드: 법령ID(불변)·법령일련번호(MST)·공포일자·시행일자·제개정구분명
+  - lawservice_*.xml.gz — 전문 1콜 XML: 조문단위 853(산안기준규칙) + 별표단위 23 전부 포함
+    = 스냅샷 의미론 확정(R7-M3 ①: annex 부분 fetch 실패 개념 없음 — 같은 응답에 없는
+    별표 = 삭제 간주 가능). 별표번호+별표가지번호 = 구조화 필드(R7-M3 ② — suffix 문자열
+    파싱 불요, version_key 합성은 이 필드 기반. PR② fetch_version 소관).
+  - 조문 취득 방식 판정(R2-m1): 전문 1콜 + 로컬 파싱 확정 — lawjosub 조 단위 호출이면
+    산안기준규칙(853조)은 개정당 호출 폭증. lawjosub fixture 는 보조 박제.
+
+주의: 응답의 '법령상세링크' 필드에 OC 키가 포함됨 — fixture/로그에 raw 응답을 남길 때
+새니타이즈 필수 (repo fixture 는 __OC_REDACTED__ 처리됨).
+"""
+
+import asyncio
+import os
+import xml.etree.ElementTree as ET
+
+import httpx
+
+from core.crawl_politeness import CRAWL_UA
+from core.utils import setup_logger
+from workers.statute_adapters import ChangeEvent, VersionPayload
+
+logger = setup_logger("statute_kr")
+
+JURISDICTION = "KR"
+SOURCE_API = "law.go.kr"
+
+LAW_SEARCH_URL = "https://www.law.go.kr/DRF/lawSearch.do"
+LAW_SERVICE_URL = "https://www.law.go.kr/DRF/lawService.do"
+
+# 같은 도메인 연속 호출 간격 (일 1회 x 26콜 — 보수적)
+_POLL_DELAY_S = 1.5
+
+
+def _oc() -> str:
+    oc = os.getenv("LAW_OC", "")
+    if not oc:
+        raise RuntimeError("LAW_OC 미설정 — statute KR 어댑터 사용 불가")
+    return oc
+
+
+def parse_search_hit(xml_text: str, official_title: str) -> dict | None:
+    """lawSearch XML 에서 정식 법령명 exact match 1건 추출 (순수 함수 — fixture 테스트 대상).
+
+    정식명 기준 exact match — 워치리스트 title 이 정식명(가운뎃점 포함)이므로 안전.
+    (law_monitor 의 하드코딩 '유해위험작업...'(점 없음)이 영구 미매칭이던 함정의 교훈:
+    조회 키는 반드시 레지스트리의 정식명을 쓴다.)
+    """
+    root = ET.fromstring(xml_text)
+    for law in root.findall(".//law"):
+        if (law.findtext("법령명한글") or "").strip() != official_title:
+            continue
+        mst = (law.findtext("법령일련번호") or "").strip()
+        if not mst:
+            continue
+        return {
+            "mst": mst,
+            "law_id": (law.findtext("법령ID") or "").strip(),
+            "promulgation_date": (law.findtext("공포일자") or "").strip() or None,
+            "effective_date": (law.findtext("시행일자") or "").strip() or None,
+            "revision_type": (law.findtext("제개정구분명") or "").strip() or None,
+            "status_code": (law.findtext("현행연혁코드") or "").strip() or None,
+        }
+    return None
+
+
+def detect_change(hit: dict | None, act_family_id: str, act_title: str,
+                  watermark: str | None) -> ChangeEvent | None:
+    """목록 hit + 워터마크 → ChangeEvent (순수 함수 — fixture 테스트 대상).
+
+    - hit 없음 = 감지 불가 (None — 호출측이 fail-loud 로그. 폐지 단정 금지:
+      검색 누락/표기 변경 가능성과 구분 불가하므로 repeal 은 제개정구분명 기준만)
+    - MST == watermark = 변경 없음
+    - 제개정구분명에 '폐지' = repeal, 그 외 = amend
+    """
+    if hit is None:
+        return None
+    if watermark and hit["mst"] == watermark:
+        return None
+    kind = "repeal" if (hit.get("revision_type") or "").find("폐지") >= 0 else "amend"
+    return ChangeEvent(
+        family_id=act_family_id,
+        kind=kind,
+        new_version_key=hit["mst"],
+        title=act_title,
+        promulgation_date=hit.get("promulgation_date"),
+        effective_date=hit.get("effective_date"),
+        revision_type=hit.get("revision_type"),
+    )
+
+
+def _article_markdown(art: ET.Element) -> str:
+    """조문단위 1건 → 텍스트. 조문내용(이미 '제N조(제목) ...' 형태) + 항/호/목 전체.
+
+    메타 필드(조문번호/조문여부/조문시행일자 등)는 제외 — 조문내용과 항 서브트리만.
+    """
+    parts = []
+    body = (art.findtext("조문내용") or "").strip()
+    if body:
+        parts.append(body)
+    for hang in art.findall("항"):
+        text = "\n".join(t.strip() for t in hang.itertext() if t.strip())
+        if text:
+            parts.append(text)
+    return "\n".join(parts)
+
+
+def parse_service_payloads(xml_text: str, official_title: str, mst: str) -> list[VersionPayload]:
+    """lawService 전문 XML → VersionPayload 리스트 (순수 함수 — fixture 테스트 대상).
+
+    스냅샷 의미론: 응답에 있는 별표가 그 버전의 별표 전체 (R7-M3 fixture 판정).
+    - primary 1건: 전 조문 markdown (조문여부 != '조문' 행 = 장/절 헤더 → '## ' 처리)
+    - annex N건: 별표단위별 — version_key = 'MST|{별표번호}-{가지번호}' (zero-padded 그대로)
+    """
+    root = ET.fromstring(xml_text)
+    base = root.find(".//기본정보")
+    prom = (base.findtext("공포일자") or "").strip() or None if base is not None else None
+    eff = (base.findtext("시행일자") or "").strip() or None if base is not None else None
+
+    lines: list[str] = [f"# {official_title}", ""]
+    for art in root.findall(".//조문단위"):
+        is_article = (art.findtext("조문여부") or "").strip() == "조문"
+        text = _article_markdown(art)
+        if not text:
+            continue
+        if is_article:
+            lines.append(f"### {text}" if not text.startswith("제") else text)
+        else:
+            lines.append(f"## {text}")
+        lines.append("")
+    primary_content = "\n".join(lines).strip()
+
+    payloads = [VersionPayload(
+        law_doc_kind="primary",
+        version_key=mst,
+        title=official_title,
+        content=primary_content,
+        promulgation_date=prom,
+        effective_date=eff,
+    )]
+
+    for annex in root.findall(".//별표단위"):
+        no = (annex.findtext("별표번호") or "").strip()
+        sub = (annex.findtext("별표가지번호") or "").strip() or "00"
+        kind = (annex.findtext("별표구분") or "별표").strip()   # 별표 / 서식 — 별도 차원!
+        a_title = (annex.findtext("별표제목") or "").strip()
+        a_body = (annex.findtext("별표내용") or "").strip()
+        if not no:
+            continue
+        # 삭제 tombstone — KR 은 별표/서식 삭제가 absence 가 아니라 '삭제 <날짜>' 명시 행
+        # (fixture 실측: 산안기준규칙 서식1·2). 내용 없는 tombstone 은 적재 skip.
+        # 시리즈의 구버전 current 잔존 처리 = PR③ 관찰 후보 (absence 추론은 불요 확정).
+        if a_title.startswith("삭제") and len(a_body) < 50:
+            continue
+        label = f"{kind}{int(no)}" + (f"의{int(sub)}" if sub not in ("", "0", "00") else "")
+        payloads.append(VersionPayload(
+            law_doc_kind="annex",
+            # 구분 차원 포함 — (번호,가지)만으로는 별표1 vs 서식1 충돌 (fixture 실측)
+            version_key=f"{mst}|{kind}{no}-{sub}",
+            title=f"{official_title} {label} {a_title}".strip(),
+            content=f"# {official_title} {label}\n## {a_title}\n\n{a_body}".strip(),
+            promulgation_date=prom,
+            effective_date=eff,
+            annex_label=label,
+        ))
+    return payloads
+
+
+async def fetch_version(client: httpx.AsyncClient, act, change: ChangeEvent) -> list[VersionPayload]:
+    """전문 1콜 → payload 리스트 (R2-m1 판정: lawjosub 조 단위 호출 안 함 — 853조 폭증 회피)."""
+    resp = await client.get(
+        LAW_SERVICE_URL,
+        params={"OC": _oc(), "target": "law", "MST": change.new_version_key, "type": "XML"},
+        headers={"User-Agent": CRAWL_UA},
+    )
+    resp.raise_for_status()
+    payloads = parse_service_payloads(resp.text, act.title, change.new_version_key)
+    if not payloads or len(payloads[0].content) < 200:
+        # 파싱 검증 floor — 미달 시 예외 = 워터마크 미영속 (재시도 가능 상태 유지)
+        raise ValueError(f"전문 파싱 결과 빈약 ({act.family_id}): payloads={len(payloads)}")
+    return payloads
+
+
+async def poll_changes(client: httpx.AsyncClient, watch_rows: list) -> list[ChangeEvent]:
+    """워치리스트 행별 lawSearch diff. 행 단위 실패 격리 (한 법령 실패가 나머지를 막지 않음)."""
+    oc = _oc()
+    events: list[ChangeEvent] = []
+    for act in watch_rows:
+        try:
+            resp = await client.get(
+                LAW_SEARCH_URL,
+                params={"OC": oc, "target": "law", "type": "XML", "query": act.title},
+                headers={"User-Agent": CRAWL_UA},
+            )
+            resp.raise_for_status()
+            hit = parse_search_hit(resp.text, act.title)
+            if hit is None:
+                # fail-loud: 정식명 미매칭 = 표기 변경/검색 누락 의심 — 침묵 skip 금지
+                logger.warning(f"[statute-kr] 목록 미매칭: {act.family_id} {act.title!r}")
+            else:
+                ev = detect_change(hit, act.family_id, act.title, act.watermark)
+                if ev:
+                    events.append(ev)
+        except Exception as e:
+            logger.error(f"[statute-kr] poll 실패 ({act.family_id}): {type(e).__name__}: {e!r}")
+        await asyncio.sleep(_POLL_DELAY_S)
+    return events
@@ -0,0 +1,381 @@
+"""statute_collector — 법령 수집 코어 (plan safety-library-1 B-1, PR②).
+
+구성 (잡 코드 통째 — R8-B1: 승격과 스윕의 PR 분리 = 배포 갭 이중 노출 윈도):
+  poll_changes(어댑터) → fetch_version(전문 1콜, payload 리스트) → ingest(전 버전
+  pending 적재 + 4축 주입) → 생애주기 잡(버전 시리즈 단위 승격·supersede + 상태 기반
+  레거시 스윕 + repeal — 단일 트랜잭션, KST 기준).
+
+핵심 계약 (카드 = 스펙):
+  - 워터마크 영속 = ingest 파싱 검증 통과 후에만 (실패 시 다음 폴링이 재감지)
+  - 승격·supersede 단위 = 버전 시리즈 = (family_id, law_doc_kind, annex 식별자)
+    — R7-B1: family 단위 구현 금지 (annex 승격이 primary 를 소거하는 본문 소실 경로)
+  - 레거시 스윕 = 상태 기반: 매 잡 실행, primary 시리즈 current 보유 + repeal 미감지
+    family 의 법령명 매핑 레거시(law_monitor 스냅샷) 청크 in_corpus=false (멱등)
+  - 매핑 = 정확 일치 가정 금지: title 의 '법령명 (YYYYMMDD)' 패턴에서 법령명 추출 후
+    정규화(공백·가운뎃점 변형 흡수) **동등** 비교 — prefix 비교 금지 ('산업안전보건법'이
+    '산업안전보건법 시행령' 레거시를 오폭하는 경로 차단)
+  - ingest 4축 (R8-M1): material_type='law' / jurisdiction=어댑터 상수 /
+    published_date=COALESCE(시행일, 공포일) / license=public_domain(저작권법 제7조)
+  - 부트스트랩(--bootstrap) = kind='bootstrap' 합성 이벤트, amend 와 동일 경로 +
+    extract_meta.backfill=true (E-1 게이트 집계 제외 마커)
+  - 가시성: source_health 성공/실패 기록 (HC.io 는 2026-05-30 알림 레이어 폐기로 부재 —
+    silent-skip 가드 정신은 crawl-health 보드 + health 행으로 대체)
+
+실행:
+  스케줄 = daily 07:00 KST (main.py — 구 law_monitor 슬롯 승계)
+  수동   = docker compose exec -T fastapi python -m workers.statute_collector [--bootstrap]
+"""
+
+import argparse
+import asyncio
+import hashlib
+import re
+import unicodedata
+from datetime import date, datetime, timezone
+from zoneinfo import ZoneInfo
+
+import httpx
+from sqlalchemy import select, update
+
+from core.database import async_session
+from core.utils import setup_logger
+from models.chunk import DocumentChunk
+from models.document import Document
+from models.legal_act import LegalAct, LegalMeta
+from models.news_source import NewsSource
+from models.queue import enqueue_stage
+from workers.news_collector import _get_or_create_health, _record_failure, _record_success
+from workers.statute_adapters import ChangeEvent, VersionPayload
+from workers.statute_adapters import kr
+
+logger = setup_logger("statute_collector")
+
+_KST = ZoneInfo("Asia/Seoul")
+_SOURCE_NAME = "KR 법령 (law.go.kr)"
+_LICENSE = {"scheme": "public_domain", "redistribute": True, "attribution": "국가법령정보센터"}
+_FETCH_DELAY_S = 2.5   # lawService 전문(최대 ~1.3MB) 연속 호출 간격
+
+# jurisdiction → 어댑터 모듈 (Phase 1 = KR 단독, 해외는 B-5 게이트 뒤)
+_ADAPTERS = {"KR": kr}
+
+
+# ─── 법령명 매핑 (R8-m1: 정확 일치 가정 금지 — 변형 흡수 정규화 + 동등 비교) ───
+
+_LEGACY_TITLE_RE = re.compile(r"^(.*?)\s*\((\d{8})\)")
+
+
+def normalize_law_name(name: str) -> str:
+    """공백·가운뎃점 변형 흡수 — NFC 정규화 후 공백/ㆍ·・ 제거."""
+    s = unicodedata.normalize("NFC", name or "")
+    return re.sub(r"[\sㆍ·・]", "", s)
+
+
+def legacy_law_name(title: str) -> str | None:
+    """레거시 law_monitor title('법령명 (YYYYMMDD) 섹션')에서 법령명 추출."""
+    m = _LEGACY_TITLE_RE.match(title or "")
+    return m.group(1).strip() if m else None
+
+
+def series_suffix(version_key: str) -> str | None:
+    """버전 시리즈의 annex 식별자 — version_key 'MST|NNNN-SS' 의 '|' 뒤 (primary=None)."""
+    return version_key.split("|", 1)[1] if "|" in version_key else None
+
+
+def _to_date(ymd: str | None) -> date | None:
+    digits = re.sub(r"\D", "", ymd or "")
+    if len(digits) != 8:
+        return None
+    try:
+        return date(int(digits[:4]), int(digits[4:6]), int(digits[6:8]))
+    except ValueError:
+        return None
+
+
+# ─── ingest (전 버전 pending 적재 — R2-B2/R3 계약) ──────────────────────────────
+
+async def _ingest_payload(session, act: LegalAct, ev: ChangeEvent,
+                          payload: VersionPayload, backfill: bool) -> bool:
+    """payload 1건 → Document + legal_meta(pending). 반환 = 신규 여부 (dedup 멱등)."""
+    fhash = hashlib.sha256(
+        f"statute|{act.jurisdiction}|{act.native_id}|{payload.version_key}".encode()
+    ).hexdigest()[:32]
+    existing = await session.execute(
+        select(Document.id).where(Document.file_hash == fhash).limit(1)
+    )
+    if existing.scalars().first():
+        return False
+
+    prom = _to_date(payload.promulgation_date or ev.promulgation_date)
+    eff = _to_date(payload.effective_date or ev.effective_date)
+    now = datetime.now(timezone.utc)
+    extra = {"backfill": True} if backfill else {}
+    doc = Document(
+        file_path=f"crawl/statute/{act.family_id}/{payload.version_key.replace('|', '_')}",
+        file_hash=fhash,
+        file_format="article",
+        file_size=len(payload.content.encode()),
+        file_type="note",
+        title=f"{payload.title} ({payload.promulgation_date or ev.promulgation_date or ''})".strip(),
+        extracted_text=payload.content,
+        extracted_at=now,
+        extractor_version="statute_kr@law.go.kr",
+        md_status="skipped",
+        md_extraction_error="statute: 텍스트 네이티브, markdown 변환 비대상",
+        source_channel="crawl",
+        data_origin="external",
+        review_status="approved",
+        ai_domain="법령",
+        ai_sub_group=act.title,
+        ai_tags=[f"법령/KR/{act.title}"],
+        # 안전 자료실 ingest 4축 (R8-M1 — classify-skip 경로라 ingest 시점 필수)
+        material_type="law",
+        jurisdiction=kr.JURISDICTION,
+        published_date=eff or prom,
+        extract_meta={
+            "statute": {"family_id": act.family_id, "law_id": act.native_id,
+                        "kind": payload.law_doc_kind, "version_key": payload.version_key,
+                        "annex_label": payload.annex_label,
+                        "event_kind": ev.kind, "revision_type": ev.revision_type},
+            "license": dict(_LICENSE),
+            **extra,
+        },
+    )
+    session.add(doc)
+    await session.flush()
+
+    session.add(LegalMeta(
+        document_id=doc.id,
+        family_id=act.family_id,
+        law_doc_kind=payload.law_doc_kind,
+        version_key=payload.version_key,
+        promulgation_date=prom,
+        effective_date=eff,
+        version_status="pending",   # 전 버전 pending 적재 — 승격은 생애주기 잡만
+    ))
+    # summarize 안 함 (조문 자체가 정본 — 맥미니 부하 0), embed+chunk 만
+    await enqueue_stage(session, doc.id, "embed")
+    await enqueue_stage(session, doc.id, "chunk")
+    return True
+
+
+# ─── 생애주기 잡 (전이·supersede·스윕·repeal 의 유일한 코드 지점) ────────────────
+
+async def _flip_chunks(session, doc_ids: list[int]) -> int:
+    if not doc_ids:
+        return 0
+    result = await session.execute(
+        update(DocumentChunk)
+        .where(DocumentChunk.doc_id.in_(doc_ids), DocumentChunk.in_corpus.is_(True))
+        .values(in_corpus=False)
+    )
+    return result.rowcount or 0
+
+
+async def _legacy_doc_ids(session, act: LegalAct) -> list[int]:
+    """법령명 매핑 레거시(law_monitor) 문서 id — 정규화 동등 비교 (prefix 금지)."""
+    result = await session.execute(
+        select(Document.id, Document.title).where(
+            Document.source_channel == "law_monitor",
+            Document.deleted_at.is_(None),
+        )
+    )
+    want = normalize_law_name(act.title)
+    ids = []
+    for doc_id, title in result.all():
+        name = legacy_law_name(title or "")
+        if name and normalize_law_name(name) == want:
+            ids.append(doc_id)
+    return ids
+
+
+async def run_lifecycle(session) -> dict:
+    """일 1회 생애주기 잡 — 호출측이 단일 트랜잭션 commit. KST 기준, 멱등."""
+    today = datetime.now(_KST).date()
+    stats = {"promoted": 0, "superseded": 0, "repealed": 0,
+             "legacy_flipped_docs": 0, "legacy_flipped_chunks": 0}
+
+    acts_result = await session.execute(select(LegalAct).where(LegalAct.watch.is_(True)))
+    acts = {a.family_id: a for a in acts_result.scalars().all()}
+
+    lm_result = await session.execute(
+        select(LegalMeta).where(LegalMeta.family_id.in_(list(acts.keys())))
+    )
+    metas = lm_result.scalars().all()
+
+    # 1) repeal — 마킹된 family: current+pending 전부 repealed + 청크 flip + 레거시 flip (R7-M2)
+    repeal_families = {fid for fid, a in acts.items() if a.repeal_detected_at is not None}
+    for fid in repeal_families:
+        rows = [m for m in metas if m.family_id == fid and m.version_status in ("pending", "current")]
+        for m in rows:
+            m.version_status = "repealed"
+            stats["repealed"] += 1
+        await _flip_chunks(session, [m.document_id for m in rows])
+        legacy_ids = await _legacy_doc_ids(session, acts[fid])
+        stats["legacy_flipped_chunks"] += await _flip_chunks(session, legacy_ids)
+
+    # 2) 승격 + supersede — 버전 시리즈 단위 (R7-B1 a: family 단위 금지)
+    series: dict[tuple, list[LegalMeta]] = {}
+    for m in metas:
+        if m.family_id in repeal_families:
+            continue
+        series.setdefault(
+            (m.family_id, m.law_doc_kind, series_suffix(m.version_key)), []
+        ).append(m)
+
+    for key, rows in series.items():
+        due = sorted(
+            (m for m in rows if m.version_status == "pending"
+             and (m.effective_date or m.promulgation_date)
+             and (m.effective_date or m.promulgation_date) <= today),
+            key=lambda m: (m.effective_date or m.promulgation_date),
+        )
+        for m in due:
+            prev = [c for c in rows if c.version_status == "current" and c is not m]
+            for c in prev:
+                c.version_status = "superseded"
+                stats["superseded"] += 1
+            await _flip_chunks(session, [c.document_id for c in prev])
+            m.version_status = "current"
+            stats["promoted"] += 1
+
+    # 3) 레거시 스윕 — 상태 기반 (R6-B1 a / R7-B1 b: primary 시리즈 current 보유 한정)
+    for fid, act in acts.items():
+        if fid in repeal_families:
+            continue
+        has_primary_current = any(
+            m.family_id == fid and m.law_doc_kind == "primary" and m.version_status == "current"
+            for m in metas
+        )
+        if not has_primary_current:
+            continue   # R3-B1 ② 내장 — fetch 실패 family 의 레거시 보존
+        legacy_ids = await _legacy_doc_ids(session, act)
+        flipped = await _flip_chunks(session, legacy_ids)
+        if flipped:
+            stats["legacy_flipped_docs"] += len(legacy_ids)
+            stats["legacy_flipped_chunks"] += flipped
+
+    return stats
+
+
+# ─── 메인 런 ─────────────────────────────────────────────────────────────────────
+
+async def run(bootstrap: bool = False) -> None:
+    """poll → fetch → ingest(가족 단위 커밋) → 생애주기 잡. 가족 단위 실패 격리."""
+    async with async_session() as session:
+        result = await session.execute(
+            select(LegalAct).where(LegalAct.watch.is_(True)).order_by(LegalAct.family_id)
+        )
+        rows = result.scalars().all()
+        if not rows:
+            logger.warning("[statute] 워치리스트 비어 있음 — 시드(migration 356) 미적용?")
+            return
+        source = await _get_source(session)
+        await session.commit()
+        source_id = source.id
+
+    ingested = 0
+    failed = 0
+    by_jur: dict[str, list] = {}
+    for row in rows:
+        by_jur.setdefault(row.jurisdiction, []).append(row)
+
+    async with httpx.AsyncClient(timeout=60) as client:
+        for jur, acts in by_jur.items():
+            adapter = _ADAPTERS.get(jur)
+            if adapter is None:
+                logger.warning(f"[statute] 어댑터 없는 jurisdiction skip: {jur}")
+                continue
+            assert adapter.JURISDICTION == jur, \
+                f"어댑터/행 jurisdiction 불일치: {adapter.JURISDICTION} != {jur}"
+
+            events = await adapter.poll_changes(client, acts)
+            acts_by_id = {a.family_id: a for a in acts}
+            for ev in events:
+                if bootstrap:
+                    ev.kind = "bootstrap"   # 합성 이벤트 — amend 와 동일 경로 (R6-m2)
+                act_ref = acts_by_id[ev.family_id]
+                try:
+                    payloads = await adapter.fetch_version(client, act_ref, ev)
+                    async with async_session() as session:
+                        act = await session.get(LegalAct, ev.family_id)
+                        new_docs = 0
+                        for p in payloads:
+                            if await _ingest_payload(session, act, ev, p, backfill=bootstrap):
+                                new_docs += 1
+                        # 워터마크 영속 = 파싱 검증(payload floor) 통과 후에만
+                        act.watermark = ev.new_version_key
+                        if ev.kind == "repeal":
+                            act.repeal_detected_at = datetime.now(timezone.utc)
+                        await session.commit()
+                    ingested += new_docs
+                    logger.info(f"[statute] ingest {ev.family_id} ({ev.kind}): "
+                                f"payload {len(payloads)}건 중 신규 {new_docs}건")
+                except Exception as e:
+                    failed += 1
+                    logger.error(f"[statute] ingest 실패 ({ev.family_id}): "
+                                 f"{type(e).__name__}: {e!r} — 워터마크 미영속, 다음 폴링 재감지")
+                await asyncio.sleep(_FETCH_DELAY_S)
+
+    # 생애주기 잡 — 수집 사이클 직후, 단일 트랜잭션 (0-2 ②)
+    async with async_session() as session:
+        stats = await run_lifecycle(session)
+        await session.commit()
+    logger.info(f"[statute] lifecycle: {stats}")
+
+    # health — fail-loud 가시성 (HC.io 폐기로 보드/health 행이 1차 관측면)
+    async with async_session() as session:
+        h = await _get_or_create_health(session, source_id)
+        now = datetime.now(timezone.utc)
+        if failed:
+            _record_failure(h, f"ingest 실패 {failed}건", now)
+        else:
+            _record_success(h, ingested, False, now)
+        await session.commit()
+
+    logger.info(f"[statute] run 완료 — 신규 문서 {ingested}건, 실패 {failed}건"
+                + (" (bootstrap)" if bootstrap else ""))
+
+
+async def _get_source(session) -> NewsSource:
+    result = await session.execute(select(NewsSource).where(NewsSource.name == _SOURCE_NAME))
+    source = result.scalars().first()
+    if source is None:
+        source = NewsSource(
+            name=_SOURCE_NAME, feed_url=kr.LAW_SEARCH_URL, feed_type="rss",
+            fetch_method="api", fulltext_policy="none", source_channel="crawl",
+            category="Safety", language="ko", country="KR",
+            enabled=False,   # 6h 뉴스 사이클 비대상 — 본 워커가 daily 폴링
+        )
+        session.add(source)
+        await session.flush()
+    return source
+
+
+async def poll_once() -> int:
+    """관찰 전용 폴링 (PR① 잔존 CLI — 상태 변경 0)."""
+    async with async_session() as session:
+        result = await session.execute(
+            select(LegalAct).where(LegalAct.watch.is_(True)).order_by(LegalAct.family_id)
+        )
+        rows = result.scalars().all()
+    total = 0
+    async with httpx.AsyncClient(timeout=30) as client:
+        events = await kr.poll_changes(client, [r for r in rows if r.jurisdiction == "KR"])
+        for ev in events:
+            logger.info(f"[statute] 변경 감지 ({ev.kind}): {ev.family_id} {ev.title} "
+                        f"MST={ev.new_version_key}")
+        total = len(events)
+    logger.info(f"[statute] poll 완료 — 변경 {total}건 (관찰 전용)")
+    return total
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--bootstrap", action="store_true",
+                        help="26 family 현행판 1회 부트스트랩 (backfill 마커, R4-M1)")
+    parser.add_argument("--poll-only", action="store_true", help="관찰 전용 폴링")
+    args = parser.parse_args()
+    if args.poll_only:
+        asyncio.run(poll_once())
+    else:
+        asyncio.run(run(bootstrap=args.bootstrap))
@@ -14,6 +14,7 @@ from datetime import datetime, timedelta, timezone
 from sqlalchemy import select, update
 from sqlalchemy.exc import SQLAlchemyError

+from core.config import settings
 from core.database import async_session
 from core.utils import setup_logger
 from models.study_memo_card_job import StudyMemoCardJob
@@ -50,6 +51,10 @@ async def reset_stale_card_jobs() -> None:

 async def consume_study_memo_card_queue() -> None:
    """APScheduler 진입점. pending card_extract job 을 BATCH_SIZE 만큼 처리."""
+    # 생성 LLM 홀드: claim 자체를 하지 않음 (1분 주기라 로그는 debug).
+    if "study_memo_card" in settings.pipeline_held_stages:
+        logger.debug("study_memo_card 보류 (pipeline.held_stages)")
+        return
    await reset_stale_card_jobs()

    async with async_session() as session:
@@ -59,6 +59,11 @@ async def reset_stale_study_jobs() -> None:

 async def consume_study_queue() -> None:
    """APScheduler 진입점. pending job BATCH_SIZE 만큼 처리."""
+    # 생성 LLM 홀드: env(study_explanation_enabled) 와 별개의 self-contained 게이트.
+    # pending 은 그대로 유지 (Mac mini derived-worker 흡수 경로도 본 게이트와 무관).
+    if "study_explanation" in settings.pipeline_held_stages:
+        logger.debug("study_explanation 보류 (pipeline.held_stages)")
+        return
    await reset_stale_study_jobs()

    async with async_session() as session:
@@ -12,6 +12,7 @@ from datetime import datetime, timedelta, timezone
 from sqlalchemy import select, update
 from sqlalchemy.exc import SQLAlchemyError

+from core.config import settings
 from core.database import async_session
 from core.utils import setup_logger
 from models.study_quiz_session_job import StudyQuizSessionJob
@@ -48,6 +49,10 @@ async def reset_stale_session_jobs() -> None:

 async def consume_study_session_queue() -> None:
    """APScheduler 진입점. pending session_jobs 를 BATCH_SIZE 만큼 처리."""
+    # 생성 LLM 홀드: claim 자체를 하지 않음 (1분 주기라 로그는 debug).
+    if "study_session_analysis" in settings.pipeline_held_stages:
+        logger.debug("study_session_analysis 보류 (pipeline.held_stages)")
+        return
    await reset_stale_session_jobs()

    async with async_session() as session:
@@ -2,27 +2,37 @@

 P3 of family-adaptive-bengio (2026-05-23): 50k 초과 input 은 sliding window
 (cumulative carry-over) 로 분할 처리. 50k 이하 input 은 기존 동작 유지.
+
+ds-macbook-offload-1: use_deep=True (queue_drain 전용) 시 맥북 M5 Max deep 슬롯으로
+호출 — 맥미니 백로그를 사용자가 의도적으로 분담시키는 수동 레버. 기본(consumer) 경로는
+use_deep=False 로 기존 동작 그대로. 맥북 불가 시 StageDeferred (강등 0, 부분 쓰기 0).
 """

 from datetime import datetime, timezone

 from sqlalchemy.ext.asyncio import AsyncSession

-from ai.client import AIClient, strip_thinking
+from ai.client import AIClient, call_deep_or_defer, strip_thinking
 from core.utils import setup_logger
 from models.document import Document

 logger = setup_logger("summarize_worker")

 CHUNK_SIZE = 50000
+# client.summarize() 의 단일 프롬프트와 동일 문구 — deep 경로가 같은 과업을 수행하도록 고정
+SUMMARY_PROMPT_SINGLE = "다음 문서를 500자 이내로 요약해주세요:\n\n{text}"
 SUMMARY_PROMPT_CONTINUATION = (
    "이전 부분 요약:\n{prior}\n\n다음 부분:\n{text}\n\n"
    "위 두 정보를 합쳐 전체 문서를 500자 이내로 요약해주세요."
 )


-async def process(document_id: int, session: AsyncSession) -> None:
-    """문서 AI 요약 생성 (분류 없이 요약만)"""
+async def process(document_id: int, session: AsyncSession, *, use_deep: bool = False) -> None:
+    """문서 AI 요약 생성 (분류 없이 요약만).
+
+    use_deep: queue_drain 전용 — deep 슬롯(맥북) 경유. 슬롯 미구성 시 명시 에러
+    (silent 강등 금지). consumer 기본 경로는 False (기존 동작 무변경).
+    """
    doc = await session.get(Document, document_id)
    if not doc:
        raise ValueError(f"문서 ID {document_id}를 찾을 수 없음")
@@ -35,13 +45,29 @@ async def process(document_id: int, session: AsyncSession) -> None:
        return

    client = AIClient()
+    if use_deep and client.ai.deep is None:
+        await client.close()
+        raise ValueError("use_deep=True 인데 config.yaml ai.models.deep 슬롯 미구성 — silent 강등 금지")
+    used_cfg = client.ai.deep if use_deep else client.ai.primary
+
+    async def _summarize_first(text_part: str) -> str:
+        if use_deep:
+            return await call_deep_or_defer(client, SUMMARY_PROMPT_SINGLE.format(text=text_part))
+        return await client.summarize(text_part)
+
+    async def _summarize_continuation(prompt: str) -> str:
+        if use_deep:
+            return await call_deep_or_defer(client, prompt)
+        return await client.call_primary(prompt)
+
    try:
        text = doc.extracted_text
        total_chars = len(text)
        if total_chars <= CHUNK_SIZE:
-            summary = await client.summarize(text)
+            summary = await _summarize_first(text)
            logger.info(
                f"[요약] document_id={document_id}: single chunk ({total_chars}자)"
+                + (" via deep(맥북)" if use_deep else "")
            )
        else:
            chunks = [text[i:i + CHUNK_SIZE] for i in range(0, total_chars, CHUNK_SIZE)]
@@ -52,10 +78,10 @@ async def process(document_id: int, session: AsyncSession) -> None:
            carry = ""
            for idx, chunk in enumerate(chunks):
                if idx == 0:
-                    partial = await client.summarize(chunk)
+                    partial = await _summarize_first(chunk)
                else:
                    prompt = SUMMARY_PROMPT_CONTINUATION.format(prior=carry, text=chunk)
-                    partial = await client.call_primary(prompt)
+                    partial = await _summarize_continuation(prompt)
                carry = strip_thinking(partial)
                logger.info(
                    f"[요약] document_id={document_id}: chunk {idx + 1}/{len(chunks)} done "
@@ -63,8 +89,10 @@ async def process(document_id: int, session: AsyncSession) -> None:
                )
            summary = carry

+        # sleep-안전 불변식: 쓰기는 전체 완주 후에만 — 중간 절단은 StageDeferred 로 빠져
+        # 이 지점에 도달하지 않는다 (carry 는 로컬 변수, doc 무변경).
        doc.ai_summary = strip_thinking(summary)
-        doc.ai_model_version = client.ai.primary.model
+        doc.ai_model_version = used_cfg.model
        doc.ai_processed_at = datetime.now(timezone.utc)
        logger.info(
            f"[요약] document_id={document_id}: {len(doc.ai_summary)}자 final"
@@ -6,25 +6,40 @@ ai:

  models:
    # ─── 단일 generation 호스트 routing (2026-05-14 GPU LLM 제거) ───
-    # GPU Ollama gemma4:e4b-it-q8_0 제거. Mac mini 26B-A4B 가 triage + primary + classifier 모두 흡수.
-    # fallback 은 Claude Sonnet 4 API (Mac mini 다운 시 자동 trigger, premium 과 budget 공유).
-    # plan: ~/.claude/plans/rosy-launching-otter.md §C/§D/§E
+    # 2026-06-11 B안: 맥미니 모델 = Gemma 26B-A4B → Qwen3.6-27B-6bit 풀교체 (사용자 결정).
+    # dense 27B 라 디코드 ~13 tok/s 급 (a4b ~42 대비 감속) → timeout 상향 (triage 30→120, primary 180→300).
+    # fallback 은 Claude Sonnet 4 API (CLAUDE_API_KEY 미주입 = 비활성).
+    # plan: ~/.claude/plans/rosy-launching-otter.md §C/§D/§E + project_macmini_model_decision

-    # triage: 상시 분류·요약·근거 선별. Mac mini 26B (primary 와 동일 endpoint, 짧은 max_tokens).
+    # triage: 상시 분류·요약·근거 선별. Mac mini Qwen 27B (primary 와 동일 endpoint, 짧은 max_tokens).
    triage:
      endpoint: "http://100.76.254.116:8801/v1/chat/completions"
-      model: "mlx-community/gemma-4-26b-a4b-it-8bit"
+      model: "mlx-community/Qwen3.6-27B-6bit"
      max_tokens: 4096
-      timeout: 30
+      timeout: 480  # 프리필 실측 ~112 tok/s — 120K자 장문 커버 (2026-06-11)
      context_char_limit: 120000
      temperature: 0.0

-    # primary: 에스컬레이션 전용. 26B MLX (맥미니 Semaphore(1) 보호 대상).
+    # primary: 에스컬레이션 전용. Qwen 27B MLX (맥미니 Semaphore(1) 보호 대상).
    primary:
      endpoint: "http://100.76.254.116:8801/v1/chat/completions"
-      model: "mlx-community/gemma-4-26b-a4b-it-8bit"
+      model: "mlx-community/Qwen3.6-27B-6bit"
      max_tokens: 8192
-      timeout: 180
+      timeout: 900  # 프리필 실측 ~112 tok/s — 260K자 상한 장문 커버 (2026-06-11)
+      context_char_limit: 260000
+      temperature: 0.3
+      top_p: 0.9
+
+    # deep: 야간 night-drain 전용 — 맥북 M5 Max Qwen3.6-27B-6bit (llm-router :8890 경유,
+    # model=qwen-macbook alias). 2026-06-11 재도입 (사용자: 자기 전 night-drain 으로 백로그 분담).
+    # 맥북 불가(503/연결/절단) = StageDeferred 보류 — 맥미니/cloud 강등 없음, attempts 미소모.
+    # consumer 의 deep_summary 도 슬롯 존재 시 맥북 경유 (잠들어 있으면 30분 백오프 보류 = 무해).
+    # 슬롯 제거 시 deep_summary 는 primary(맥미니) 경로 복귀.
+    deep:
+      endpoint: "http://100.76.254.116:8890/v1/chat/completions"
+      model: "qwen-macbook"
+      max_tokens: 8192
+      timeout: 900
      context_char_limit: 260000
      temperature: 0.3
      top_p: 0.9
@@ -58,9 +73,9 @@ ai:
    # classifier_service 가 hasattr 체크로 optional 이므로 이 섹션 제거 시 classifier gate 는 자동 skip (score-only).
    classifier:
      endpoint: "http://100.76.254.116:8801/v1/chat/completions"
-      model: "mlx-community/gemma-4-26b-a4b-it-8bit"
+      model: "mlx-community/Qwen3.6-27B-6bit"  # 2026-06-11 B안 동승 — gemma id 잔존 시 mlx 서버가 Gemma 를 재로드(이중 적재) 위험
      max_tokens: 512
-      timeout: 30  # 2026-05-17: 15s 도 동시 부하 시 elapsed 14.4s 직전이라 tight — 30s 로 2x 마진 (Mac mini 26B concurrent load). classifier_service.LLM_TIMEOUT_MS=30000 와 align
+      timeout: 30  # 2026-05-17: 15s 도 동시 부하 시 elapsed 14.4s 직전이라 tight — 30s 로 2x 마진. classifier_service.LLM_TIMEOUT_MS=30000 와 align (초과 = score-only skip, graceful)
    # 제거: vision (미사용)

  # ─── deep_summary enqueue 폭발 억제 (B-1 R2) ───
@@ -84,7 +99,7 @@ search:
      macbook_url: "http://100.118.112.84:8810"  # MacBook M5 Max Tailscale interface bind
      macbook_model: "mlx-community/Qwen3.6-27B-8bit"
      timeout_connect_s: 1   # MacBook sleep/wake 빠른 감지 (자동 fallback 부재 → 빠른 503)
-      timeout_read_s: 30     # synthesis_service.LLM_TIMEOUT_MS=30000 와 align
+      timeout_read_s: 120    # 2026-06-11 Qwen 27B(디코드 ~11.7 tok/s) — synthesis_service.LLM_TIMEOUT_MS=120000 와 align
    # PR-DocSrv-Ask-ToolCalling-ReAct-1: /api/search/ask/react ReAct loop (qwen-macbook only)
    react:
      enabled: true
@@ -176,3 +191,16 @@ schedule:
  daily_digest: "20:00"
  file_watcher_interval_minutes: 5
  queue_consumer_interval_minutes: 10
+
+# 생성 LLM 홀드 게이트 (2026-06-11 신설): held_stages 에 든 이름의 컨슈머/워커는 claim 자체를
+# 하지 않는다 (attempts 미소모, pending 적체). 유효 키 8 = classify/summarize/deep_summary(큐) +
+# digest/briefing(cron) + study_explanation/study_session_analysis/study_memo_card(컨슈머).
+# 그 외 문자열은 무동작(오타 주의). 적용/해제 = 리스트 수정 후 fastapi 재기동.
+# 이력: 2026-06-11 맥미니 모델 확정까지 8키 홀드 → 同日 Qwen3.6-27B-6bit 전환과 함께 해제([]).
+pipeline:
+  held_stages: []
+  # mlx gate 동시 실행 상한 (2026-06-12 fair-share): 구 "1 고정" 룰의 전제(single-inference
+  # 서버)가 소멸 — 현 mlx_vlm 은 continuous batching (2026-06-11 밤 6~8 concurrent 실측 정상).
+  # 2 = 워커 LLM 호출과 인터랙티브(ask/eid)가 서로 안 막힘 + 집계 throughput ~1.8배.
+  # 게이트(상한+우선순위)는 유지 — thundering herd 방지. 1 로 되돌리면 구 동작.
+  mlx_gate_concurrency: 2
@@ -64,6 +64,11 @@ services:
    environment:
      - HF_HOME=/models/huggingface
      - TORCH_HOME=/models/torch
+      # D-1 (crawl-24x7): idle-unload 전환 — 영구 점유(~3.5GB) 해제가 90% 봉투의 전제.
+      # /ready 는 idle 에서도 200 (fastapi depends_on service_healthy 유지).
+      # 롤백 = MARKER_PRELOAD=1 + MARKER_IDLE_UNLOAD_MINUTES=0.
+      - MARKER_PRELOAD=0
+      - MARKER_IDLE_UNLOAD_MINUTES=${MARKER_IDLE_UNLOAD_MINUTES:-30}
    volumes:
      - ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents:ro
      - marker_models:/models
@@ -97,6 +102,11 @@ services:
      - WHISPER_MODEL=${WHISPER_MODEL:-large-v3}
      - WHISPER_DEVICE=${WHISPER_DEVICE:-cuda}
      - WHISPER_COMPUTE_TYPE=${WHISPER_COMPUTE_TYPE:-float16}
+      # D-1 (crawl-24x7): idle-unload 전환 — 영구 점유(~4GB) 해제가 90% 봉투의 전제.
+      # 콜드로드 수초~수십 초는 배치 작업이라 무방 (stt_worker read=1800s 가 흡수).
+      # 롤백 = STT_PRELOAD=1 + STT_IDLE_UNLOAD_MINUTES=0.
+      - STT_PRELOAD=0
+      - STT_IDLE_UNLOAD_MINUTES=${STT_IDLE_UNLOAD_MINUTES:-30}
    deploy:
      resources:
        reservations:
@@ -105,9 +115,9 @@ services:
              count: 1
              capabilities: [gpu]
    healthcheck:
-      # /ready: CUDA 디바이스 + 모델 적재 둘 다 확인. ready=true 만 healthy 처리.
-      # /health 는 단순 liveness 라 모델 미적재 상태도 healthy 로 잡혀 운영 신호로 부적합.
-      test: ["CMD", "python3", "-c", "import json,urllib.request,sys; r=urllib.request.urlopen('http://localhost:3300/ready'); sys.exit(0 if json.load(r).get('ready') else 1)"]
+      # D-1: idle-unload 도입으로 '모델 적재' 는 더 이상 상시 상태가 아님 — cuda 가용성만
+      # healthy 기준. 모델 적재 여부는 /ready 의 models_loaded 필드로 관측(정보성).
+      test: ["CMD", "python3", "-c", "import json,urllib.request,sys; r=urllib.request.urlopen('http://localhost:3300/ready'); sys.exit(0 if json.load(r).get('cuda') else 1)"]
      interval: 30s
      timeout: 10s
      retries: 3
@@ -229,6 +239,31 @@ services:
      - fastapi
    restart: unless-stopped

+  # crawl-24x7 A-8 1차: 전 소스 헬스 패널 — 내부 전용 (읽기 전용 SELECT 만).
+  # '내부 전용' 성립 구현 = 별도 바인딩뿐 (r4 결정): Tailscale 인터페이스에만 publish.
+  # 기존 SvelteKit 라우트(vhost=Host 헤더 검사=앱 가드 환원)나 프록시 경로 차단(경로 가드
+  # 회귀)으로 옮기지 말 것. caddy/home-caddy 라우트 추가 금지. fastapi/postgres 바인딩 선례.
+  crawl-health:
+    build: ./services/crawl-health
+    ports:
+      - "100.110.63.63:8765:8765"
+    environment:
+      - CRAWL_HEALTH_DSN=postgresql://pkm:${POSTGRES_PASSWORD}@postgres:5432/pkm
+    depends_on:
+      postgres:
+        condition: service_healthy
+    restart: unless-stopped
+
+  # crawl-24x7 B-3: 구독 세션 Playwright fetch 격리 — internal-only (host 포트·caddy 라우트 금지).
+  # 브라우저 hang/크래시가 fastapi APScheduler 를 잠식하지 않게 별도 컨테이너 + mem cap.
+  # 세션 파일(쿠키=credential 등가물)은 repo 밖 호스트 경로 ro mount (600, gitignore 무관 영역).
+  playwright-fetcher:
+    build: ./services/playwright-fetcher
+    volumes:
+      - /home/hyungi/.local/share/crawl-auth:/auth:ro
+    mem_limit: 2g
+    restart: unless-stopped
+
  caddy:
    image: caddy:2
    ports:
@@ -172,6 +172,61 @@ export async function api<T = unknown>(
  return res.json();
 }

+/**
+ * Raw fetch 헬퍼 — SSE/스트리밍 등 JSON 일괄 파싱이 부적합한 endpoint 전용.
+ *
+ * api<T>() 와 동일한 정책을 공유한다:
+ * - access token 자동 첨부
+ * - 401 → refresh 1회 재시도 (실패 시 handleTokenRefresh 가 강제 logout)
+ * - JSON body 면 Content-Type 자동 설정
+ *
+ * 차이: Response 를 그대로 반환한다 (status 판단 / body 소비는 호출자 책임).
+ * PR-Eid-Chat: `/api/eid/chat` SSE 스트림이 첫 소비자. additive export only —
+ * 기존 api()/uploadFile() 동작은 변경하지 않는다.
+ */
+export async function apiFetchRaw(
+  path: string,
+  options: RequestInit = {},
+): Promise<Response> {
+  const headers: Record<string, string> = {
+    ...(options.headers as Record<string, string> || {}),
+  };
+
+  if (accessToken) {
+    headers['Authorization'] = `Bearer ${accessToken}`;
+  }
+  if (options.body && !(options.body instanceof FormData)) {
+    headers['Content-Type'] = 'application/json';
+  }
+
+  const res = await fetch(`${API_BASE}${path}`, {
+    ...options,
+    headers,
+    credentials: 'include',
+  });
+
+  // 401 → refresh 1회 시도 (api() 와 같은 정책, auth endpoint 제외)
+  const isAuthEndpoint = path.startsWith('/auth/login') || path.startsWith('/auth/refresh');
+  if (res.status === 401 && accessToken && !isAuthEndpoint) {
+    try {
+      await handleTokenRefresh();
+    } catch {
+      // refresh 실패 — handleTokenRefresh 가 강제 logout(리다이렉트) 처리.
+      // api() 와 일관되게 원본 401 Response 를 그대로 반환해 호출자가
+      // 네트워크 에러로 오인하지 않게 한다 (body 미소비 상태라 재사용 가능).
+      return res;
+    }
+    headers['Authorization'] = `Bearer ${accessToken}`;
+    return fetch(`${API_BASE}${path}`, {
+      ...options,
+      headers,
+      credentials: 'include',
+    });
+  }
+
+  return res;
+}
+
 /**
 * 업로드 전용 헬퍼 — XMLHttpRequest 기반.
 *
@@ -10,7 +10,7 @@
  import SectionOutline from '$lib/components/SectionOutline.svelte';
  import { getViewerType } from '$lib/utils/viewerType';
  import { isMdSuccess } from '$lib/utils/mdStatus';
-  import { buildAnchorMap } from '$lib/utils/outlineAnchors';
+  import { resolveAnchorMap } from '$lib/utils/resolveAnchorMap';
  import { cleanHeading } from '$lib/utils/headingPath';

  // 편집 미리보기 전용 plain marked (본문 렌더는 MarkdownDoc 가 담당).
@@ -109,7 +109,7 @@
      (s) => !!(cleanHeading(s.section_title) || cleanHeading((s.heading_path || '').split('>').pop() || '')),
    ),
  );
-  // MarkdownDoc 가 실제 렌더하는 텍스트(anchor offset 기준과 일치해야 함).
+  // MarkdownDoc 가 실제 렌더하는 텍스트(rail 표시 게이트용).
  let mdRenderText = $derived.by(() => {
    if (!fullDoc) return '';
    if (viewerType === 'pdf') return pdfViewMode === 'markdown' && canShowMarkdown ? (fullDoc.md_content || '') : '';
@@ -117,7 +117,26 @@
    if (viewerType === 'hwp-markdown' || viewerType === 'article') return fullDoc.md_content || fullDoc.extracted_text || '';
    return '';
  });
-  let anchorMap = $derived(sections.length && mdRenderText ? buildAnchorMap(mdRenderText, sections).anchors : {});
+  // [g5-t3] basis 는 RENDER SITE 별. anchorMap 을 basis 별로 분리 — 같은 component 가 두 basis 를
+  // 공유하면(md_content vs extracted_text) trustBE 가 어긋난다.
+  //   - md_content site(pdf-markdown): trustBE=true (BE char_start 1순위, 비면 내부 string-match 폴백).
+  //   - extracted_text site(3-pane markdown): trustBE=false (char_start 는 md_content offset 이라 무효 → 무조건 폴백).
+  let mdBasisText = $derived.by(() => {
+    if (!fullDoc) return '';
+    if (viewerType === 'pdf') return pdfViewMode === 'markdown' && canShowMarkdown ? (fullDoc.md_content || '') : '';
+    return '';
+  });
+  let extractedBasisText = $derived.by(() => {
+    if (!fullDoc) return '';
+    if (viewerType === 'markdown') return fullDoc.extracted_text || rawMarkdown || '';
+    return '';
+  });
+  let anchorMapMd = $derived(
+    sections.length && mdBasisText ? resolveAnchorMap(mdBasisText, sections, { trustBE: true }).anchors : {},
+  );
+  let anchorMapExtracted = $derived(
+    sections.length && extractedBasisText ? resolveAnchorMap(extractedBasisText, sections, { trustBE: false }).anchors : {},
+  );
  let showRail = $derived(outlineSections.length > 0 && !!mdRenderText);

  let scrollEl = $state();
@@ -128,7 +147,8 @@
  }
  // scroll-spy: scrollEl 내 .md-anchor 중 컨테이너 상단(+120) 지난 마지막 = 현재 절.
  $effect(() => {
-    void anchorMap;
+    void anchorMapMd;
+    void anchorMapExtracted;
    const el = scrollEl;
    if (!el) return;
    let raf = 0;
@@ -255,7 +275,7 @@
              mdStatus={fullDoc.md_status}
              mdExtractionError={fullDoc.md_extraction_error}
              mdExtractionQuality={fullDoc.md_extraction_quality}
-              anchorMap={anchorMap}
+              anchorMap={anchorMapExtracted}
              extractedText={fullDoc.extracted_text || rawMarkdown}
              class={PROSE}
            />
@@ -280,7 +300,7 @@
              mdStatus={fullDoc.md_status}
              mdExtractionError={fullDoc.md_extraction_error}
              mdExtractionQuality={fullDoc.md_extraction_quality}
-              anchorMap={anchorMap}
+              anchorMap={anchorMapMd}
              extractedText={fullDoc.extracted_text}
              class={PROSE}
            />
@@ -50,7 +50,9 @@
  }: Props = $props();

  // 개요 anchor 주입: body 의 각 offset(내림차순)에 빈 <span id="sec-N"> 삽입(점프 타깃).
-  // offset 은 buildAnchorMap 이 body 와 동일 문자열 기준으로 산출했어야 함(호출측 책임).
+  // [C3 불변식] char_start(BE) 는 호출측이 넘긴 md_content(raw, untransformed)에 대한 UTF-16 offset 이다.
+  //   이 함수는 그 동일 문자열을 'out' 으로 받아 trim/CRLF-normalize/replace 없이 slice 해야 한다 —
+  //   prop→out 사이 어떤 변환도 char_start 를 drift 시킨다. (현재 out = text(=body=mdContent prop) 무변환.)
  function spliceAnchors(text: string, map: Record<number, number> | null): string {
    if (!map) return text;
    const ents = Object.entries(map)
@@ -0,0 +1,527 @@
+<script lang="ts">
+  // 처리 머신 보드 v3 — 통합안 (plan ds-board-merged: C2 머신레인 + C3 번다운/정직ETA).
+  //   · 머신 3레인(GPU/맥미니/맥북) = "누가 일하나" + 요약 오프로드(맥북 합류) 가시화
+  //   · 지배 백로그 번다운 패널 = "언제 끝나나" + 유입 차감한 정직 ETA(summarize_eta)
+  //   · 신선도 '갱신 N초 전' + stale 경고 / 실패 드로어·상세 패널은 v2 자산 재사용.
+  // 데이터 = GET /api/queue/overview (60s 폴링 store) + GET /api/queue/failed (드로어).
+  import { api } from '$lib/api';
+  import { refreshQueueOverview, queueUpdatedAt } from '$lib/stores/queueOverview';
+  import { addToast } from '$lib/stores/toast';
+  import {
+    AUX_NODES,
+    FLOW_NODES,
+    MACHINE_META,
+    type FlowNodeDef,
+    type FlowMachine,
+    etaShort,
+    flowStageLabel,
+    formatAgeSec,
+    formatRate,
+  } from '$lib/utils/queueDisplay';
+  import type {
+    FailedItem,
+    FailedListResponse,
+    MachineCurrentItem,
+    MachineOverview,
+    QueueOverview,
+    QueueStageRow,
+    RetryResponse,
+    SkipResponse,
+  } from '$lib/types/queue';
+
+  let { overview }: { overview: QueueOverview } = $props();
+
+  // ─── 노드 통계 합성 ───
+  interface NodeStats {
+    def: FlowNodeDef;
+    /** 다중 stage 노드(청크·임베딩)는 같은 문서가 양쪽 큐에 있어 max — 합산 = 이중계산 */
+    pending: number;
+    processing: number;
+    failed: number; // 실패는 행 단위 사실이라 합산
+    done1h: number;
+    created1h: number;
+    doneToday: number;
+    oldestAgeSec: number | null;
+    etaMinutes: number | null;
+    inflowDominant: boolean;
+    perStage: QueueStageRow[];
+  }
+
+  const stageBy = $derived(new Map(overview.stages.map((s) => [s.stage, s])));
+
+  function nodeStats(def: FlowNodeDef): NodeStats {
+    const rows = def.stages
+      .map((s) => stageBy.get(s))
+      .filter((r): r is QueueStageRow => r != null);
+    const pending = rows.reduce((m, r) => Math.max(m, r.pending), 0);
+    const done1h = rows.reduce((m, r) => Math.max(m, r.done_1h), 0);
+    const created1h = rows.reduce((m, r) => Math.max(m, r.created_1h), 0);
+    const oldest = rows.reduce<number | null>(
+      (m, r) => (r.oldest_pending_age_sec == null ? m : Math.max(m ?? 0, r.oldest_pending_age_sec)),
+      null,
+    );
+    return {
+      def,
+      pending,
+      processing: rows.reduce((s, r) => s + r.processing, 0),
+      failed: rows.reduce((s, r) => s + r.failed, 0),
+      done1h,
+      created1h,
+      doneToday: rows.reduce((m, r) => Math.max(m, r.done_today), 0),
+      oldestAgeSec: oldest,
+      etaMinutes: pending > 0 && done1h > 0 ? Math.round((pending / done1h) * 60) : null,
+      inflowDominant: pending > 0 && created1h > done1h,
+      perStage: rows,
+    };
+  }
+
+  const mainNodes = $derived(FLOW_NODES.map(nodeStats));
+  const auxAll = $derived(AUX_NODES.map(nodeStats));
+  const auxActive = $derived(
+    auxAll.filter((n) => n.pending + n.processing + n.failed + n.doneToday > 0),
+  );
+  const auxIdle = $derived(
+    auxAll.filter((n) => n.pending + n.processing + n.failed + n.doneToday === 0),
+  );
+  const totalFailed = $derived(overview.totals.failed);
+
+  // ─── 선택 상태 (노드 상세 / 실패 드로어 — 동시에 하나만) ───
+  let selected = $state<string | null>(null);
+  let failOpen = $state(false);
+
+  function toggleNode(key: string) {
+    selected = selected === key ? null : key;
+    if (selected) failOpen = false;
+  }
+
+  const selectedNode = $derived(
+    [...mainNodes, ...auxAll].find((n) => n.def.key === selected) ?? null,
+  );
+
+  function nodeCurrent(def: FlowNodeDef): MachineCurrentItem[] {
+    return overview.machines.flatMap((m) => m.current.filter((c) => def.stages.includes(c.stage)));
+  }
+
+  // ─── 실패 드로어 ───
+  let failItems = $state<FailedItem[]>([]);
+  let failLoading = $state(false);
+  let busy = $state(false);
+  let expanded = $state<Record<string, boolean>>({});
+
+  async function openFailures() {
+    failOpen = true;
+    selected = null;
+    await loadFailures();
+  }
+
+  async function loadFailures() {
+    failLoading = true;
+    try {
+      const r = await api<FailedListResponse>('/queue/failed');
+      failItems = r.items;
+    } catch {
+      addToast('error', '실패 목록을 불러오지 못했습니다');
+    } finally {
+      failLoading = false;
+    }
+  }
+
+  interface FailGroup {
+    key: string;
+    stage: string;
+    pattern: string;
+    items: FailedItem[];
+  }
+
+  // 그룹핑 = stage + 에러 메시지 prefix(36자) — 같은 원인(ReadTimeout 등) 묶음
+  const failGroups = $derived.by(() => {
+    const map = new Map<string, FailGroup>();
+    for (const it of failItems) {
+      const pattern = (it.error_message ?? '(메시지 없음)').slice(0, 36);
+      const key = `${it.stage}::${pattern}`;
+      const g = map.get(key);
+      if (g) g.items.push(it);
+      else map.set(key, { key, stage: it.stage, pattern, items: [it] });
+    }
+    return [...map.values()].sort(
+      (a, b) => a.stage.localeCompare(b.stage) || b.items.length - a.items.length,
+    );
+  });
+
+  async function retryIds(ids: number[]) {
+    if (busy || ids.length === 0) return;
+    busy = true;
+    try {
+      const r = await api<RetryResponse>('/queue/retry', {
+        method: 'POST',
+        body: JSON.stringify({ ids }),
+      });
+      addToast(
+        'success',
+        `재시도 ${r.retried}건 큐 재진입${r.not_retried > 0 ? ` (${r.not_retried}건 제외 — 이미 활성/처리됨)` : ''}`,
+      );
+      await afterAction();
+    } catch {
+      addToast('error', '재시도 요청 실패');
+    } finally {
+      busy = false;
+    }
+  }
+
+  async function skipIds(ids: number[]) {
+    if (busy || ids.length === 0) return;
+    busy = true;
+    try {
+      const r = await api<SkipResponse>('/queue/skip', {
+        method: 'POST',
+        body: JSON.stringify({ ids }),
+      });
+      addToast('success', `건너뛰기 ${r.skipped}건 처리 (해당 단계 제외)`);
+      await afterAction();
+    } catch {
+      addToast('error', '건너뛰기 요청 실패');
+    } finally {
+      busy = false;
+    }
+  }
+
+  async function afterAction() {
+    await Promise.all([loadFailures(), refreshQueueOverview()]);
+  }
+
+  // ─── 머신 레인 (C2) — mainNodes 를 머신별로 그룹 + 머신 카드(state/처리율) 결합 ───
+  const machineByKey = $derived(
+    new Map<FlowMachine, MachineOverview>(overview.machines.map((m) => [m.key as FlowMachine, m])),
+  );
+  const LANE_ORDER: FlowMachine[] = ['gpu', 'macmini', 'macbook'];
+  const lanes = $derived(
+    LANE_ORDER.map((key) => ({
+      key,
+      meta: MACHINE_META[key],
+      card: machineByKey.get(key) ?? null,
+      nodes: mainNodes.filter((n) => n.def.machine === key),
+    })),
+  );
+
+  // 요약 오프로드 분담 — 맥미니 vs 맥북 (A-1 summarize_by_machine)
+  const split = $derived(overview.summarize_by_machine);
+  const splitTotal1h = $derived(Math.max(1, split.macmini.done_1h + split.macbook.done_1h));
+  const macbookSharePct = $derived(Math.round((split.macbook.done_1h / splitTotal1h) * 100));
+  // 맥북이 요약을 실제로 가져가는 중인가 (합류 표식 게이트)
+  const offloadActive = $derived(split.macbook.done_1h > 0);
+
+  // ─── 지배 백로그 = 요약. 정직 ETA(유입 차감) — summarize_eta ───
+  const eta = $derived(overview.summarize_eta);
+  // 정직 ETA 라벨: eta_minutes null = 유입이 소화를 앞섬(소진 불가)
+  const honestEtaLabel = $derived(
+    eta.pending === 0
+      ? '비어 있음'
+      : eta.eta_minutes != null
+        ? etaShort(eta.eta_minutes)
+        : '소진 불가',
+  );
+  const honestEtaWarn = $derived(eta.pending > 0 && eta.eta_minutes == null);
+
+  /** 단계별 정직 ETA(순소화율) — 노드용. 유입>소화면 null(소진 불가) */
+  function netEtaLabel(n: NodeStats): string | null {
+    if (n.pending === 0) return '한가';
+    const net = n.done1h - n.created1h;
+    if (net > 0) return etaShort(Math.round((n.pending / net) * 60));
+    if (n.created1h > n.done1h) return '유입 우세';
+    return null;
+  }
+
+  // ─── 신선도 (B-4) — '갱신 N초 전' + stale 경고 (폴링 60s) ───
+  let now = $state(Date.now());
+  $effect(() => {
+    const id = setInterval(() => (now = Date.now()), 1000);
+    return () => clearInterval(id);
+  });
+  const ageSec = $derived(
+    $queueUpdatedAt != null ? Math.max(0, Math.round((now - $queueUpdatedAt) / 1000)) : null,
+  );
+  const stale = $derived(ageSec != null && ageSec > 90);
+  const freshLabel = $derived(
+    ageSec == null
+      ? '갱신 대기'
+      : ageSec < 60
+        ? `갱신 ${ageSec}초 전`
+        : `갱신 ${Math.round(ageSec / 60)}분 전`,
+  );
+
+  // ─── 24h 번다운 (C3) — 요약 유입 vs 소화 + 맥북 합류 변곡점 마커 ───
+  const burn = $derived.by(() => {
+    const t = overview.trend_24h;
+    if (!t || t.length === 0) return null;
+    const max = Math.max(1, ...t.map((b) => Math.max(b.inflow, b.done)));
+    const w = 300;
+    const h = 64;
+    const step = w / Math.max(1, t.length - 1);
+    const y = (v: number) => (h - (v / max) * (h - 8) + 4).toFixed(1);
+    const line = (sel: (b: (typeof t)[number]) => number) =>
+      t.map((b, i) => `${(i * step).toFixed(1)},${y(sel(b))}`).join(' ');
+    const doneLine = line((b) => b.done);
+    const area = `0,${h} ${doneLine} ${w.toFixed(1)},${h}`;
+    // 합류 변곡점 = done 최대 버킷 (맥북 야간 drain 합류 추정)
+    let mi = 0;
+    t.forEach((b, i) => {
+      if (b.done > t[mi].done) mi = i;
+    });
+    return {
+      w,
+      h,
+      area,
+      doneLine,
+      inflowLine: line((b) => b.inflow),
+      markX: (mi * step).toFixed(1),
+      markHour: t[mi].hour,
+      markDone: t[mi].done,
+      peak: max,
+    };
+  });
+
+  // 머신 상태 dot 색 클래스
+  function dotClass(state: string): string {
+    return state === 'active' ? 'bg-success' : state === 'deferred' ? 'bg-warning' : 'bg-faint';
+  }
+</script>
+
+<div class="mt-5">
+  <!-- 헤더: 타이틀 + 신선도 + 실패 합계 -->
+  <div class="flex items-center justify-between gap-3 mb-3">
+    <div class="text-[11px] font-bold text-dim uppercase tracking-wider">처리 머신</div>
+    <div class="flex items-center gap-3">
+      {#if totalFailed > 0}
+        <button
+          class="text-[11px] font-semibold text-error hover:underline cursor-pointer"
+          onclick={openFailures}
+        >실패 {totalFailed}건 처리</button>
+      {/if}
+      <span class="flex items-center gap-1.5 text-[10px] tabular-nums {stale ? 'text-warning' : 'text-faint'}" title="60초 폴링">
+        <span class="w-1.5 h-1.5 rounded-full {stale ? 'bg-warning' : 'bg-success'}"></span>
+        {freshLabel}{#if stale} · 갱신 지연{/if}
+      </span>
+    </div>
+  </div>
+
+  <!-- 지배 백로그 스트립 (요약) + 정직 ETA -->
+  <div class="flex items-center flex-wrap gap-x-3 gap-y-1 bg-surface border border-warning/50 rounded-card px-3.5 py-2 mb-3">
+    <span class="text-[9px] font-bold text-warning border border-warning/60 rounded-full px-2 py-px">지배 백로그</span>
+    <span class="text-xs font-bold text-text">요약</span>
+    <span class="text-[11px] text-dim tabular-nums">대기 <b class="text-text">{eta.pending.toLocaleString()}</b> · 순소화 <b class="text-text">{formatRate(eta.done_rate_1h)}</b>/h · 유입 {formatRate(eta.inflow_rate_1h)}/h</span>
+    <span class="ml-auto flex items-center gap-1.5 border rounded-full px-2.5 py-0.5 {honestEtaWarn ? 'border-warning text-warning' : 'border-accent text-accent'}">
+      <span class="text-[10px] font-semibold">정직 ETA</span>
+      <span class="text-xs font-bold tabular-nums">{honestEtaLabel}</span>
+    </span>
+  </div>
+
+  <!-- 머신 레인 (누가 일하나 + 요약 오프로드) -->
+  <div class="grid gap-2 mb-3">
+    {#each lanes as lane (lane.key)}
+      <div class="bg-surface border border-default rounded-card px-3.5 py-2.5">
+        <div class="flex items-center gap-2 flex-wrap mb-2">
+          <span class="w-2 h-2 rounded-full shrink-0 {dotClass(lane.card?.state ?? 'idle')}"></span>
+          <span class="text-[9px] font-bold rounded px-1.5 py-px mtag-{lane.key}">{lane.meta.label}</span>
+          <span class="text-[10px] text-faint font-mono">{lane.meta.model}</span>
+          <span class="text-[11px] text-dim tabular-nums ml-1">{formatRate(lane.card?.done_1h ?? 0)}/h</span>
+          {#if lane.key === 'macbook' && (lane.card?.deferred_pending ?? 0) > 0}
+            <span class="text-[10px] font-semibold text-warning tabular-nums">보류 {lane.card?.deferred_pending}</span>
+          {/if}
+          {#if lane.card?.state === 'deferred'}
+            <span class="text-[9px] text-warning">잠듦 — 요약은 맥미니로 복귀</span>
+          {/if}
+        </div>
+        <div class="flex items-stretch gap-1.5 flex-wrap">
+          {#each lane.nodes as n (n.def.key)}
+            {@const idle = n.pending + n.processing + n.doneToday + n.failed === 0}
+            <button
+              class="relative text-left rounded-lg border px-2.5 py-1.5 transition-colors cursor-pointer hover:bg-surface-hover min-w-[96px]
+                {idle ? 'border-dashed border-default opacity-55' : n.inflowDominant ? 'border-warning' : 'border-default'}
+                {selected === n.def.key ? 'node-sel' : ''}"
+              onclick={() => toggleNode(n.def.key)}
+              title="{n.def.label} — 클릭하면 상세"
+            >
+              {#if n.failed > 0}
+                <span class="absolute -top-1.5 -right-1 text-[9px] font-extrabold bg-error text-white rounded-full px-1.5">{n.failed}</span>
+              {/if}
+              <div class="flex items-center gap-1 text-[11px] font-semibold text-text whitespace-nowrap">
+                {n.def.label}
+                {#if n.processing > 0}<span class="inline-block w-1.5 h-1.5 rounded-full bg-accent animate-pulse"></span>{/if}
+              </div>
+              <div class="text-sm font-extrabold tabular-nums leading-tight text-text">{n.pending.toLocaleString()}<span class="text-[9px] text-faint font-normal ml-0.5">대기</span></div>
+              <div class="text-[9px] text-dim tabular-nums whitespace-nowrap">{formatRate(n.done1h)}/h · 오늘 {n.doneToday.toLocaleString()}</div>
+              {#if n.def.key === 'summarize'}
+                <div class="mt-1 h-1 w-full rounded-full overflow-hidden flex" title="맥미니 {split.macmini.done_1h}/h · 맥북 {split.macbook.done_1h}/h">
+                  <span class="block h-full mtag-macmini-bar" style="width:{100 - macbookSharePct}%"></span>
+                  <span class="block h-full mtag-macbook-bar" style="width:{macbookSharePct}%"></span>
+                </div>
+                <div class="text-[9px] text-faint tabular-nums whitespace-nowrap mt-0.5">맥미니 {split.macmini.done_1h} · 맥북 {split.macbook.done_1h}/h</div>
+              {/if}
+            </button>
+          {/each}
+          {#if lane.key === 'macbook' && offloadActive}
+            <button
+              class="text-left rounded-lg border border-dashed border-warning/50 px-2.5 py-1.5 cursor-pointer hover:bg-surface-hover min-w-[96px]"
+              onclick={() => toggleNode('summarize')}
+              title="맥북이 요약을 맥미니에서 가져와 처리 중"
+            >
+              <div class="flex items-center gap-1 text-[11px] font-semibold text-text whitespace-nowrap">요약 합류 <span class="text-[8px] font-bold text-warning">OFFLOAD</span></div>
+              <div class="text-sm font-extrabold tabular-nums leading-tight text-text">{split.macbook.done_1h}<span class="text-[9px] text-faint font-normal ml-0.5">/h</span></div>
+              <div class="text-[9px] text-dim tabular-nums whitespace-nowrap">요약의 {macbookSharePct}% 담당</div>
+            </button>
+          {/if}
+        </div>
+      </div>
+    {/each}
+  </div>
+
+  <!-- 번다운 / ETA 패널 -->
+  {#if burn}
+    <div class="bg-surface border border-default rounded-card px-3.5 py-3 mb-1">
+      <div class="flex items-center gap-2 mb-2">
+        <span class="text-[11px] font-bold text-text">요약 백로그 24시간</span>
+        <span class="text-[9px] text-faint">유입(회색) vs 소화(녹색)</span>
+        {#if offloadActive}<span class="text-[9px] text-warning ml-auto">맥북 합류 {burn.markHour} — 소화 급증</span>{/if}
+      </div>
+      <svg viewBox="0 0 {burn.w} {burn.h}" class="block w-full" style="height:64px" preserveAspectRatio="none" role="img" aria-label="요약 백로그 24시간 번다운">
+        <polygon points={burn.area} fill="currentColor" class="text-success" opacity="0.12" />
+        <polyline points={burn.inflowLine} fill="none" stroke="currentColor" stroke-width="1.2" class="text-faint" />
+        <polyline points={burn.doneLine} fill="none" stroke="currentColor" stroke-width="1.6" class="text-success" />
+        {#if offloadActive}
+          <line x1={burn.markX} y1="0" x2={burn.markX} y2={burn.h} stroke="currentColor" stroke-width="1" stroke-dasharray="2 2" class="text-warning" opacity="0.7" />
+        {/if}
+      </svg>
+      <div class="flex flex-wrap gap-x-4 gap-y-1 mt-2 pt-2 border-t border-default text-[10px] text-dim tabular-nums">
+        {#each mainNodes.filter((n) => n.pending > 0 && n.def.key !== 'summarize') as n (n.def.key)}
+          <span class="whitespace-nowrap">{n.def.label} 대기 <b class="text-text">{n.pending.toLocaleString()}</b>{#if netEtaLabel(n)} · <span class="text-accent font-semibold">{netEtaLabel(n)}</span>{/if}</span>
+        {/each}
+      </div>
+    </div>
+  {/if}
+
+  <!-- 보조 라인 -->
+  <p class="text-[10px] text-faint mt-1.5 tabular-nums">
+    {#each auxActive as n, i (n.def.key)}
+      {i > 0 ? ' · ' : '보조: '}{n.def.label}({n.def.engine}) 대기 {n.pending.toLocaleString()} · {formatRate(n.done1h)}/h{n.failed > 0 ? ` · 실패 ${n.failed}` : ''}
+    {/each}
+    {#if auxIdle.length > 0}
+      {auxActive.length > 0 ? ' — ' : ''}한가: {auxIdle.map((n) => n.def.label).join(' · ')}
+    {/if}
+    — 뉴스 등 일부 소스는 분류/추출을 건너뜀 (흐름 그림은 대표 경로)
+  </p>
+
+  <!-- 상세 패널 (노드 클릭) -->
+  {#if selectedNode}
+    <div class="border rounded-card mt-3 overflow-hidden bg-surface detail-frame">
+      <div class="flex items-center gap-2.5 px-4 py-2.5 text-xs font-bold detail-head">
+        {selectedNode.def.label} — {selectedNode.def.engine}
+        <span class="text-[10px] font-mono font-medium text-dim bg-surface border border-default rounded px-1.5">{selectedNode.def.sub} · {MACHINE_META[selectedNode.def.machine].label}</span>
+        <button class="ml-auto text-[11px] text-dim font-normal cursor-pointer hover:text-text" onclick={() => (selected = null)}>닫기</button>
+      </div>
+      <div class="px-4 pb-3.5">
+        <div class="grid grid-cols-2 md:grid-cols-4 gap-2.5 my-2.5">
+          <div class="bg-bg border border-default rounded-card px-3 py-2">
+            <div class="text-[9px] text-faint uppercase tracking-wide">대기</div>
+            <div class="text-lg font-extrabold tabular-nums text-text">{selectedNode.pending.toLocaleString()}</div>
+          </div>
+          <div class="bg-bg border border-default rounded-card px-3 py-2">
+            <div class="text-[9px] text-faint uppercase tracking-wide">처리율 (1h)</div>
+            <div class="text-lg font-extrabold tabular-nums text-text">{formatRate(selectedNode.done1h)}<span class="text-[11px] text-dim font-semibold">/h</span></div>
+          </div>
+          <div class="bg-bg border border-default rounded-card px-3 py-2">
+            <div class="text-[9px] text-faint uppercase tracking-wide">오늘 완료</div>
+            <div class="text-lg font-extrabold tabular-nums text-text">{selectedNode.doneToday.toLocaleString()}</div>
+          </div>
+          <div class="bg-bg border border-default rounded-card px-3 py-2">
+            <div class="text-[9px] text-faint uppercase tracking-wide">소진 예상</div>
+            <div class="text-lg font-extrabold tabular-nums {selectedNode.inflowDominant ? 'text-warning' : 'text-accent'}">
+              {#if selectedNode.inflowDominant}유입 우세{:else if selectedNode.etaMinutes != null}{etaShort(selectedNode.etaMinutes)}{:else if selectedNode.pending === 0}한가{:else}—{/if}
+            </div>
+          </div>
+        </div>
+        {#if selectedNode.perStage.length > 1}
+          {#each selectedNode.perStage as row (row.stage)}
+            <div class="flex items-center gap-2.5 py-1.5 border-t border-default text-xs">
+              <span class="font-semibold text-text min-w-[72px]">{flowStageLabel(row.stage)}</span>
+              <span class="ml-auto text-dim tabular-nums">
+                대기 <strong class="text-text">{row.pending.toLocaleString()}</strong>
+                · {formatRate(row.done_1h)}/h · 오늘 {row.done_today.toLocaleString()}
+                {#if row.failed > 0}· <span class="text-error font-semibold">실패 {row.failed}</span>{/if}
+              </span>
+            </div>
+          {/each}
+        {/if}
+        <div class="text-[11px] text-dim border-t border-dashed border-default mt-2 pt-2 tabular-nums">
+          {#if selectedNode.oldestAgeSec != null && selectedNode.oldestAgeSec > 600}
+            가장 오래 기다린 항목 {formatAgeSec(selectedNode.oldestAgeSec)}
+          {/if}
+          {#each nodeCurrent(selectedNode.def) as c, i (c.document_id + c.stage)}
+            {i === 0 && !(selectedNode.oldestAgeSec != null && selectedNode.oldestAgeSec > 600) ? '' : ' · '}지금: {c.title} ({flowStageLabel(c.stage)})
+          {/each}
+          {#if selectedNode.failed > 0}
+            · <button class="text-error font-semibold cursor-pointer hover:underline" onclick={openFailures}>실패 {selectedNode.failed}건 처리</button>
+          {/if}
+        </div>
+      </div>
+    </div>
+  {/if}
+
+  <!-- 실패 처리 드로어 -->
+  {#if failOpen}
+    <div class="border border-error/40 rounded-card mt-3 overflow-hidden bg-surface">
+      <div class="flex items-center gap-2.5 px-4 py-2.5 bg-error/5 text-xs font-bold text-text">
+        실패 처리
+        <span class="text-[10px] font-semibold text-error">영구 실패 {failItems.length}건 — 자동 재시도 3회 소진, 수동 조치 대기</span>
+        <button class="ml-auto text-[11px] text-dim font-normal cursor-pointer hover:text-text" onclick={() => (failOpen = false)}>닫기</button>
+      </div>
+      {#if failLoading}
+        <p class="text-xs text-dim text-center py-4">불러오는 중…</p>
+      {:else if failItems.length === 0}
+        <p class="text-xs text-dim text-center py-4">영구 실패 항목 없음</p>
+      {:else}
+        {#each failGroups as g (g.key)}
+          <div class="px-4 py-2.5 border-t border-default">
+            <div class="flex items-center gap-2 flex-wrap text-xs font-bold text-text mb-1">
+              {flowStageLabel(g.stage)} {g.items.length}건
+              <span class="text-[10px] font-mono font-medium text-error bg-error/10 rounded px-1.5 py-px">{g.pattern}{g.items[0]?.error_message && g.items[0].error_message.length > 36 ? '…' : ''}</span>
+            </div>
+            {#each expanded[g.key] ? g.items : g.items.slice(0, 4) as it (it.id)}
+              <div class="flex items-center gap-2.5 py-1 border-t border-dashed border-default/60 text-xs">
+                <span class="flex-1 min-w-0 truncate text-text" title={it.title}>{it.title}</span>
+                <span class="text-[10px] font-mono text-faint shrink-0 tabular-nums">시도 {it.attempts}/{it.max_attempts}</span>
+                <span class="text-[10px] font-mono text-error shrink-0 max-w-[260px] truncate" title={it.error_message ?? ''}>{it.error_message ?? ''}</span>
+                <button class="text-[10px] font-bold border border-accent text-accent rounded px-2 py-0.5 shrink-0 cursor-pointer hover:bg-accent/10 disabled:opacity-40" disabled={busy} onclick={() => retryIds([it.id])}>재시도</button>
+                <button class="text-[10px] font-bold border border-default text-faint rounded px-2 py-0.5 shrink-0 cursor-pointer hover:bg-surface-hover disabled:opacity-40" disabled={busy} onclick={() => skipIds([it.id])}>건너뛰기</button>
+              </div>
+            {/each}
+            {#if g.items.length > 4 && !expanded[g.key]}
+              <button class="text-[10px] text-dim cursor-pointer hover:text-text mt-1" onclick={() => (expanded = { ...expanded, [g.key]: true })}>… 외 {g.items.length - 4}건 펼치기</button>
+            {/if}
+            {#if g.items.length > 1}
+              <div class="flex gap-2 mt-1.5">
+                <button class="text-[10px] font-bold border border-accent text-accent rounded px-2.5 py-0.5 cursor-pointer hover:bg-accent/10 disabled:opacity-40" disabled={busy} onclick={() => retryIds(g.items.map((x) => x.id))}>그룹 전체 재시도 ({g.items.length})</button>
+                <button class="text-[10px] font-bold border border-default text-faint rounded px-2.5 py-0.5 cursor-pointer hover:bg-surface-hover disabled:opacity-40" disabled={busy} onclick={() => skipIds(g.items.map((x) => x.id))}>그룹 전체 건너뛰기</button>
+              </div>
+            {/if}
+          </div>
+        {/each}
+        <p class="text-[10px] text-faint px-4 py-2 border-t border-default">
+          재시도 = 시도 횟수 리셋 후 큐 재진입 (자동 재시도 3회 새로 부여) · 건너뛰기 = 이 단계 완료 처리(후속 단계 연쇄 없음, 감사 마킹) · 같은 오류가 반복되는 항목(빈 텍스트 등)은 건너뛰기 권장
+        </p>
+      {/if}
+    </div>
+  {/if}
+</div>
+
+<style>
+  /* 머신 색 — 디자인 토큰 외 3색 (gpu 청/macmini 보라/macbook 황) — 이 컴포넌트 한정 */
+  .mtag-gpu { background: #e7eef6; color: #3b6ea5; }
+  .mtag-macmini { background: #efe9f7; color: #8a5fbf; }
+  .mtag-macbook { background: #f7eedd; color: #b07a10; }
+  /* 요약 오프로드 분담 막대 채움 (맥미니 보라 / 맥북 황) */
+  .mtag-macmini-bar { background: #8a5fbf; }
+  .mtag-macbook-bar { background: #b07a10; }
+  .node-sel { outline: 2px solid #3b6ea5; outline-offset: 1px; }
+  .detail-frame { border-color: #3b6ea5; }
+  .detail-head { background: #e7eef6; }
+</style>
@@ -0,0 +1,106 @@
+<script lang="ts">
+  // 처리 현황 드로어 (안6 라이트) — 전 페이지 상태 스트립 클릭 시 우측에서 열림.
+  // 머신 미니카드 3 + ETA 한 줄 + 실패 합계 + 홈 링크 축약본. 상세는 홈 보드가 담당.
+  // 데이터 = queueOverview store 공유 (60s 폴링, 실패 시 null → 안내문으로 degrade).
+  // 열림 상태는 uiState 단일 drawer slot('queue') — 사이드바 드로어와 동시 오픈 차단.
+  import { X } from 'lucide-svelte';
+  import { ui } from '$lib/stores/uiState.svelte';
+  import { queueOverview } from '$lib/stores/queueOverview';
+  import {
+    MACHINE_STATE_LABEL, machineChipClass, machineDotClass, formatRate, etaPhrase,
+  } from '$lib/utils/queueDisplay';
+  import IconButton from '$lib/components/ui/IconButton.svelte';
+
+  let open = $derived(ui.isDrawerOpen('queue'));
+  let data = $derived($queueOverview);
+
+  function close() {
+    ui.closeDrawer();
+  }
+
+  // ESC 닫기 — 레이아웃 전역 핸들러(ui.handleEscape)와 중복돼도 무해(멱등).
+  // modal stack 이 열려 있으면 modal 우선 (전역 우선순위와 동일).
+  function onWindowKeydown(e: KeyboardEvent) {
+    if (e.key === 'Escape' && open && ui.modalStack.length === 0) close();
+  }
+</script>
+
+<svelte:window onkeydown={onWindowKeydown} />
+
+{#if open}
+  <div class="fixed inset-0 z-drawer">
+    <!-- 스크림 — 클릭 시 닫기 -->
+    <button
+      type="button"
+      onclick={close}
+      class="absolute inset-0 bg-scrim transition-opacity"
+      aria-label="드로어 닫기"
+    ></button>
+
+    <!-- 패널 — div + role="dialog" (aside 는 interactive role 불가, a11y 경고) -->
+    <div
+      role="dialog"
+      aria-modal="true"
+      aria-label="처리 현황"
+      class="absolute right-0 top-0 bottom-0 w-rail max-w-full bg-sidebar shadow-xl overflow-y-auto"
+    >
+      <div class="flex items-center justify-between px-4 h-12 border-b border-default">
+        <span class="text-sm font-bold text-text">처리 현황</span>
+        <IconButton icon={X} size="sm" aria-label="닫기" onclick={close} />
+      </div>
+
+      <div class="p-4 space-y-3">
+        {#if data}
+          <!-- 머신 미니카드 3 -->
+          {#each data.machines as m (m.key)}
+            <div class="bg-surface border border-default rounded-lg px-3.5 py-2.5">
+              <div class="flex items-center justify-between gap-2">
+                <span class="flex items-center gap-2 text-[13px] font-semibold text-text min-w-0">
+                  <span class="w-2 h-2 rounded-full shrink-0 {machineDotClass(m.state)}"></span>
+                  <span class="truncate">{m.label}</span>
+                </span>
+                <span class="text-[10px] font-bold rounded-full px-2 py-0.5 shrink-0 {machineChipClass(m.state)}">
+                  {MACHINE_STATE_LABEL[m.state]}
+                </span>
+              </div>
+              <div class="text-[11px] text-dim mt-1 tabular-nums">
+                대기 <strong class="text-text">{m.pending.toLocaleString()}</strong>
+                · 오늘 <strong class="text-text">{m.done_today.toLocaleString()}</strong>건 처리
+              </div>
+            </div>
+          {/each}
+
+          <!-- ETA 한 줄 (안5 라이트 — 추정치) -->
+          <div
+            class="text-[11px] text-dim leading-relaxed tabular-nums"
+            title="현재 페이스 기반 추정치 — 유입 변동 시 달라질 수 있습니다"
+          >
+            요약 대기 <strong class="text-text">{data.summarize_eta.pending.toLocaleString()}건</strong>
+            — 소화 {formatRate(data.summarize_eta.done_rate_1h)}/h
+            · 유입 {formatRate(data.summarize_eta.inflow_rate_1h)}/h
+            {#if data.summarize_eta.eta_minutes != null}
+              · <span class="text-accent font-semibold">{etaPhrase(data.summarize_eta.eta_minutes)}</span>
+            {:else}
+              · 유입 우세(백필 중)
+            {/if}
+          </div>
+
+          <!-- 실패 합계 -->
+          {#if data.totals.failed > 0}
+            <div class="text-[11px] font-semibold text-error bg-error/10 rounded-md px-2.5 py-1.5 tabular-nums">
+              실패 {data.totals.failed.toLocaleString()}건 — 확인 필요
+            </div>
+          {/if}
+        {:else}
+          <p class="text-xs text-dim">처리 현황을 불러오지 못했습니다.</p>
+        {/if}
+
+        <a
+          href="/"
+          onclick={close}
+          class="block text-xs text-accent font-semibold hover:underline pt-1"
+        >홈에서 자세히 →</a>
+      </div>
+    </div>
+  </div>
+{/if}
@@ -43,14 +43,17 @@
  {@const open = selectedId === s.chunk_id}
  {@const active = activeKey != null && activeKey === s.chunk_id}
  {@const typeLabel = sectionTypeLabel(s.section_type)}
+  {@const depth = Math.max(0, (s.level ?? 1) - 1)}
  <li>
    <button
      type="button"
      onclick={() => { toggle(item); onJump?.(s.chunk_id); }}
      aria-expanded={open}
      aria-current={active ? 'true' : undefined}
+      style="padding-left:{8 + depth * 13}px"
      class={[
-        'w-full text-left px-2 py-1.5 rounded-md text-xs flex items-start gap-1.5 transition-colors border-l-2',
+        'w-full text-left pr-2 py-1.5 rounded-md text-xs flex items-start gap-1.5 transition-colors border-l-2',
+        depth > 0 ? 'text-[11px]' : '',
        open ? 'bg-surface-active text-text border-accent' : active ? 'bg-surface text-accent-hover border-accent' : 'text-dim hover:bg-surface hover:text-text border-transparent',
      ].join(' ')}
    >
@@ -2,7 +2,7 @@
  import { page } from '$app/stores';
  import { goto } from '$app/navigation';
  import { api } from '$lib/api';
-  import { ChevronRight, ChevronDown, FolderOpen, FolderTree, Inbox, Clock, Mail, Scale, StickyNote, GraduationCap, CalendarCheck } from 'lucide-svelte';
+  import { ChevronRight, ChevronDown, FolderOpen, FolderTree, Inbox, Clock, Mail, Scale, StickyNote, GraduationCap, CalendarCheck, MessageCircle } from 'lucide-svelte';

  let tree = $state([]);
  let loading = $state(true);
@@ -229,6 +229,16 @@
        공부
      </span>
    </a>
+    <a
+      href="/chat"
+      class="flex items-center justify-between px-3 py-2 rounded-md text-sm transition-colors
+        {$page.url.pathname.startsWith('/chat') ? 'bg-accent/15 text-accent' : 'text-text hover:bg-surface'}"
+    >
+      <span class="flex items-center gap-2">
+        <MessageCircle size={16} />
+        이드
+      </span>
+    </a>
    <a
      href="/inbox"
      class="flex items-center justify-between px-3 py-2 rounded-md text-sm text-text hover:bg-surface transition-colors"
@@ -0,0 +1,31 @@
+<!--
+  EidEvidenceCard — 이드 채팅 deep(검색) 답변의 근거 카드 (ds-eid-ask-absorb P1).
+
+  ReactResult.sources = {id, doc_id, title, score} (citation 번호 n 없음 — /ask 의 Citation 과
+  다름) → 순서 기반 번호([1],[2]...). 1단계 카드 = 제목·출처·점수 (스니펫은 react_loop
+  _result_payload items_src 에 없음 — 2단계 후보). 접이식 <details> 로 채팅 흐름 보존.
+  디자인 토큰만 (CLAUDE.md lint:tokens).
+-->
+<script lang="ts">
+  type EidSource = { id?: number; doc_id?: number; title?: string; score?: number };
+  let { sources, partial = false }: { sources: EidSource[]; partial?: boolean } = $props();
+</script>
+
+{#if sources.length}
+  <details class="mt-2 rounded-lg border border-default bg-surface text-xs max-w-[85%] sm:max-w-[75%]">
+    <summary class="cursor-pointer px-3 py-2 text-dim hover:text-text select-none font-semibold">
+      근거 {sources.length}개{partial ? ' · 부분 답변 (확정 근거 부족)' : ''}
+    </summary>
+    <ul class="px-3 pb-2.5 flex flex-col gap-1.5">
+      {#each sources as src, i (src.id ?? i)}
+        <li class="flex items-start gap-2">
+          <span class="text-accent font-bold shrink-0">[{i + 1}]</span>
+          <span class="flex-1 min-w-0 text-text break-words">{src.title || `문서 ${src.doc_id ?? '?'}`}</span>
+          {#if typeof src.score === 'number'}
+            <span class="text-faint shrink-0 tabular-nums">{src.score.toFixed(2)}</span>
+          {/if}
+        </li>
+      {/each}
+    </ul>
+  </details>
+{/if}
@@ -0,0 +1,8 @@
+/**
+ * 이드 채팅(/chat) 공유 상수 (PR-Eid-Chat).
+ *
+ * localStorage 이력 키 — 채팅 표면(routes/chat/+page.svelte)의 저장/복원과
+ * logout(stores/auth.ts)의 이력 제거가 같은 키를 보도록 단일 상수로 공유한다
+ * (본문 무로깅 posture 정합: 로그아웃 시 브라우저에 대화 본문을 남기지 않음).
+ */
+export const EID_CHAT_STORAGE_KEY = 'eid_chat:v1';
@@ -1,5 +1,6 @@
 import { writable } from 'svelte/store';
 import { api, setAccessToken } from '$lib/api';
+import { EID_CHAT_STORAGE_KEY } from '$lib/eidChat';

 interface User {
  id: number;
@@ -39,6 +40,14 @@ export async function logout() {
  setAccessToken(null);
  user.set(null);
  isAuthenticated.set(false);
+  // 본문 무로깅 posture 정합 — 로그아웃 시 이드 대화 이력도 브라우저에서 제거
+  if (typeof window !== 'undefined') {
+    try {
+      window.localStorage.removeItem(EID_CHAT_STORAGE_KEY);
+    } catch {
+      // 이력 제거 실패가 logout 자체를 막지는 않음
+    }
+  }
 }

 export async function tryRefresh() {
@@ -0,0 +1,70 @@
+// 처리 큐 overview store — GET /api/queue/overview 를 60초 주기로 폴링.
+// system.ts 의 dashboardSummary 와 같은 구독 기반 패턴 (첫 subscribe 시 시작).
+//
+// 의도적으로 api() 헬퍼를 쓰지 않는다 — 폴링 경로의 401 이 refresh 실패 →
+// window.location='/login' 강제 logout 부수효과를 일으키면 안 됨 (eid 리뷰
+// finding 재발 방지). 백엔드 미배포(404)/401/네트워크 실패 전부 silent 하게
+// null 로 수렴하고, 소비자(스트립/보드/드로어)는 null 이면 스스로 숨는다.
+
+import { writable } from 'svelte/store';
+import { browser } from '$app/environment';
+import { getAccessToken } from '$lib/api';
+import type { QueueOverview } from '$lib/types/queue';
+
+const POLL_INTERVAL_MS = 60_000;
+
+let pollHandle: ReturnType<typeof setInterval> | null = null;
+let subscriberCount = 0;
+let inFlight: Promise<void> | null = null;
+
+// 마지막 성공 갱신 시각(epoch ms) — 보드 신선도 '갱신 N초 전' + stale 경고용
+// (ds-board-merged B-4). 실패(null 수렴) 시엔 갱신 안 함 → age 가 늘어 stale 로 드러남.
+const updatedAt = writable<number | null>(null);
+export const queueUpdatedAt = { subscribe: updatedAt.subscribe };
+
+const internal = writable<QueueOverview | null>(null, (_set) => {
+  subscriberCount += 1;
+  if (subscriberCount === 1 && browser) {
+    void refreshQueueOverview();
+    pollHandle = setInterval(() => void refreshQueueOverview(), POLL_INTERVAL_MS);
+  }
+  return () => {
+    subscriberCount -= 1;
+    if (subscriberCount === 0 && pollHandle) {
+      clearInterval(pollHandle);
+      pollHandle = null;
+    }
+  };
+});
+
+export const queueOverview = { subscribe: internal.subscribe };
+
+/** 경량 fetch — 실패는 전부 null (silent 비차단, 강제 logout 경로 없음) */
+async function fetchOverview(): Promise<QueueOverview | null> {
+  try {
+    const headers: Record<string, string> = {};
+    const token = getAccessToken();
+    if (token) headers['Authorization'] = `Bearer ${token}`;
+    const res = await fetch('/api/queue/overview', { headers, credentials: 'include' });
+    if (!res.ok) return null;
+    return (await res.json()) as QueueOverview;
+  } catch {
+    return null;
+  }
+}
+
+/** 수동/추가 폴링용 — 홈은 자체 30s interval 로 이 함수를 호출 (동시 fetch 합치기) */
+export async function refreshQueueOverview(): Promise<void> {
+  if (!browser) return;
+  if (inFlight) return inFlight;
+  inFlight = (async () => {
+    try {
+      const ov = await fetchOverview();
+      internal.set(ov);
+      if (ov) updatedAt.set(Date.now()); // 성공 시에만 신선도 갱신 (실패=stale 유지)
+    } finally {
+      inFlight = null;
+    }
+  })();
+  return inFlight;
+}
@@ -3,7 +3,9 @@
 // (toast는 별도 store. drawer가 persistent inline panel(예: xl+ meta rail)일 때는
 //  여기 시스템 밖이다 — 그저 레이아웃의 일부.)

-type Drawer = { id: 'sidebar' | 'meta' } | null;
+// 'queue' = 처리 현황 드로어 (상태 스트립 클릭 시 우측) — 단일 slot 규칙 동일
+export type DrawerId = 'sidebar' | 'meta' | 'queue';
+type Drawer = { id: DrawerId } | null;
 type Modal = { id: string };

 class UIState {
@@ -11,14 +13,14 @@ class UIState {
  modalStack = $state<Modal[]>([]);

  // ── Drawer (단일 slot) ──────────────────────────────
-  openDrawer(id: 'sidebar' | 'meta') {
+  openDrawer(id: DrawerId) {
    // 새 drawer 열면 이전 drawer는 자동으로 사라진다 (단일 slot)
    this.drawer = { id };
  }
  closeDrawer() {
    this.drawer = null;
  }
-  isDrawerOpen(id: 'sidebar' | 'meta') {
+  isDrawerOpen(id: DrawerId) {
    return this.drawer?.id === id;
  }

@@ -0,0 +1,115 @@
+/**
+ * GET /api/queue/overview 응답 타입.
+ *
+ * Backend 는 병렬 트랙에서 구현 중 — 계약 고정 (feat/ds-processing-board).
+ * 필드 변경 시 양쪽 동시 수정 필수.
+ */
+
+export type MachineKey = 'gpu' | 'macmini' | 'macbook';
+
+/** 머신 상태 — active(가동) / deferred(보류) / idle(대기) */
+export type MachineState = 'active' | 'deferred' | 'idle';
+
+/** 머신이 지금 처리 중인 문서 1건 */
+export interface MachineCurrentItem {
+  document_id: number;
+  title: string;
+  stage: string;
+}
+
+export interface MachineOverview {
+  key: MachineKey;
+  label: string;
+  state: MachineState;
+  /** 담당 단계 키 목록 (extract/classify/... — 홈 STAGE_LABEL 로 한글화) */
+  stages: string[];
+  pending: number;
+  processing: number;
+  failed: number;
+  /** 최근 1시간 완료 건수 (처리율 N/h 표기) */
+  done_1h: number;
+  done_today: number;
+  /** 보류 건수 — 맥북 sleep 등으로 자동 재개 대기 중 */
+  deferred_pending: number;
+  current: MachineCurrentItem[];
+}
+
+/** 요약 백로그 ETA (안5 라이트) — 추정치, 유입 변동 시 오차 */
+export interface SummarizeEta {
+  pending: number;
+  done_rate_1h: number;
+  inflow_rate_1h: number;
+  /** null = 유입이 소화를 앞섬 (백필 중) — 소진 예상 불가 */
+  eta_minutes: number | null;
+}
+
+/** 시간당 유입 vs 소화 (요약 24h 추이) */
+export interface TrendPoint {
+  hour: string;
+  inflow: number;
+  done: number;
+}
+
+/** summarize 머신별 완료 실적 분담 (오프로드 가시화 — ds-board-merged A-1) */
+export interface SummarizeByMachine {
+  macmini: { done_1h: number; done_today: number };
+  macbook: { done_1h: number; done_today: number };
+}
+
+export interface QueueTotals {
+  pending: number;
+  processing: number;
+  failed: number;
+}
+
+export interface QueueStageRow {
+  stage: string;
+  pending: number;
+  processing: number;
+  failed: number;
+  /** 최근 1시간 완료 — 노드 처리율·ETA 재료 (ds-board-engines-1) */
+  done_1h: number;
+  /** 최근 1시간 유입 — 유입 우세 판정 재료 (ds-board-engines-1) */
+  created_1h: number;
+  done_today: number;
+  oldest_pending_age_sec: number | null;
+}
+
+export interface QueueOverview {
+  machines: MachineOverview[];
+  summarize_eta: SummarizeEta;
+  summarize_by_machine: SummarizeByMachine;
+  trend_24h: TrendPoint[];
+  stages: QueueStageRow[];
+  totals: QueueTotals;
+}
+
+/** ─── 실패 처리 (ds-board-engines-1) — GET /api/queue/failed · POST /retry|/skip ─── */
+
+export interface FailedItem {
+  id: number;
+  stage: string;
+  document_id: number;
+  title: string;
+  attempts: number;
+  max_attempts: number;
+  error_message: string | null;
+  failed_at: string | null;
+}
+
+export interface FailedListResponse {
+  items: FailedItem[];
+  total: number;
+}
+
+export interface RetryResponse {
+  requested: number;
+  retried: number;
+  not_retried: number;
+}
+
+export interface SkipResponse {
+  requested: number;
+  skipped: number;
+  not_skipped: number;
+}
@@ -69,6 +69,20 @@ test('collapseWindows: 연속 동일 heading window 만 dedupe, 순서 유지',
  );
 });

+test('[C2] collapseWindows: split-parent + window 들 → rail 1행, 대표=split-parent(char_start 보유)', () => {
+  const input = [
+    sec({ section_title: 'Article 5', heading_path: 'Article 5', node_type: 'chapter_split', is_leaf: false, char_start: 120 }),
+    sec({ section_title: 'Article 5', heading_path: 'Article 5', node_type: 'window', is_leaf: true, char_start: null }),
+    sec({ section_title: 'Article 5', heading_path: 'Article 5', node_type: 'window', is_leaf: true, char_start: null }),
+  ];
+  const out = collapseWindows(input);
+  assert.equal(out.length, 1, 'split-parent + 2 window → rail 1행');
+  // 대표 = split-parent (char_start 보유) → jump 성립
+  assert.equal(out[0].section.node_type, 'chapter_split');
+  assert.equal(out[0].section.char_start, 120);
+  assert.equal(out[0].fragmentCount, 2, 'window 조각 수 = 2 (split-parent 자신 제외)');
+});
+
 test('groupOrFlat: 적은 그룹 + 낮은 기타% → group (5140-류)', () => {
  // 3 top segment × 4 = 12절, window 없음 → group_count 3, 기타 0%
  const sections: DocumentSection[] = [];
@@ -12,8 +12,10 @@ export interface DocumentSection {
  section_title: string | null;
  heading_path: string | null;
  level: number | null;
-  node_type: string | null; // 'window' | 'section_split' | null
+  node_type: string | null; // 'window' | 'chapter_split' | 'clause_split' | 'section_split' | null
  is_leaf: boolean;
+  /** md_content 내 heading offset(UTF-16). jump-target 만 값, window-child/preamble/Path A = null (Path B). */
+  char_start?: number | null;
  section_type: string | null;
  summary: string | null;
  confidence: number | null;
@@ -87,32 +89,38 @@ export function pathSegments(hp: string | null | undefined): string[] {
    .filter(Boolean);
 }

-/** 그룹 키: window/section_split(인공 조각) 또는 path 없음/깨짐 → OTHER. */
+/** 그룹 키: window/%_split(인공 조각·windowed split-parent) 또는 path 없음/깨짐 → OTHER. */
 function topSegment(s: DocumentSection): string {
-  if (s.node_type === 'window' || s.node_type === 'section_split') return OTHER;
+  if (s.node_type === 'window' || !!s.node_type?.endsWith('_split')) return OTHER;
  const segs = pathSegments(s.heading_path);
  return segs.length === 0 ? OTHER : segs[0];
 }

 /**
 * 서버 chunk_index 순서를 유지한 채(정렬 변경 금지), 연속된 동일 cleaned heading_path 의
- * node_type='window' 절을 1 항목으로 dedupe. 대표 = 첫 조각(요약 사용), fragmentCount 누적.
+ * node_type='window' 절을 1 항목으로 dedupe. fragmentCount = window 조각 수.
+ *
+ * [C2] g4-t2 가 split-parent(%_split, char_start 보유)를 그 window child 들보다 먼저(낮은 chunk_index)
+ *   노출하므로, 후속 window child 를 직전 split-parent(또는 legacy window 대표)에 흡수해 rail 1행으로 만든다.
+ *   merged row 의 대표 section = split-parent 여야 jump(anchorMap[split-parent char_start])가 성립한다 —
+ *   window-child(char_start NULL, anchorMap 부재)가 대표면 windowed section 이 점프 안 됨.
+ *   fragmentCount: split-parent 대표는 0 에서 시작(자신은 조각 아님) + 흡수 child 수 = 실제 조각 수;
+ *   legacy window 대표는 1 에서 시작(자신이 첫 조각).
 */
 export function collapseWindows(sections: DocumentSection[]): OutlineItem[] {
  const out: OutlineItem[] = [];
  for (const s of sections) {
    const prev = out[out.length - 1];
    const h = cleanHeading(s.heading_path);
-    if (
-      s.node_type === 'window' &&
+    const prevAbsorbs =
      prev &&
-      prev.section.node_type === 'window' &&
+      (prev.section.node_type === 'window' || !!prev.section.node_type?.endsWith('_split')) &&
      h !== '' &&
-      cleanHeading(prev.section.heading_path) === h
-    ) {
-      prev.fragmentCount += 1;
+      cleanHeading(prev.section.heading_path) === h;
+    if (s.node_type === 'window' && prevAbsorbs) {
+      prev!.fragmentCount += 1; // window child 흡수 — 대표(split-parent 우선)는 그대로 유지
    } else {
-      out.push({ section: s, fragmentCount: 1 });
+      out.push({ section: s, fragmentCount: s.node_type?.endsWith('_split') ? 0 : 1 });
    }
  }
  return out;
@@ -69,8 +69,9 @@ export function buildAnchorMap(
  let matched = 0;

  for (const s of sections) {
-    // window/section_split 조각은 자체 heading 없음(부모 제목 상속) → 건너뜀.
-    if (s.node_type === 'window' || s.node_type === 'section_split') continue;
+    // window 조각 + %_split parent(chapter_split/clause_split/section_split)는 string-match 대상 아님 →
+    // 건너뜀. (split-parent jump 은 Path B 의 BE char_start 로만 성립; Path A 폴백선 windowed 절 무점프=무회귀.)
+    if (s.node_type === 'window' || s.node_type?.endsWith('_split')) continue;
    let nt = norm(s.section_title);
    if (!nt && s.heading_path) {
      const last = s.heading_path.split('>').pop();
@@ -0,0 +1,121 @@
+// 처리 머신 보드 / 상태 스트립 / 드로어 공용 표시 헬퍼.
+// 상태 표현은 dot + 칩 (이모지 금지 원칙) — 토큰 클래스만 사용.
+
+import type { MachineState } from '$lib/types/queue';
+
+/** 머신 상태 한글 라벨 */
+export const MACHINE_STATE_LABEL: Record<MachineState, string> = {
+  active: '가동',
+  deferred: '보류',
+  idle: '대기',
+};
+
+/** 상태 dot 색 — 가동=success / 보류=warning / 대기=faint */
+export function machineDotClass(state: MachineState): string {
+  if (state === 'active') return 'bg-success';
+  if (state === 'deferred') return 'bg-warning';
+  return 'bg-faint';
+}
+
+/** 상태 칩 톤 — 가동=accent / 보류=warn / 대기=dim */
+export function machineChipClass(state: MachineState): string {
+  if (state === 'active') return 'bg-accent/10 text-accent';
+  if (state === 'deferred') return 'bg-warning/10 text-warning';
+  return 'bg-surface-hover text-faint';
+}
+
+/** 처리율 표기 — 정수는 그대로, 소수는 한 자리 */
+export function formatRate(n: number): string {
+  return Number.isInteger(n) ? n.toLocaleString() : n.toFixed(1);
+}
+
+/** ETA 분 → "약 N분/N시간 후 소진 예상" (추정치 — title 로 명시는 호출부 책임) */
+export function etaPhrase(minutes: number): string {
+  if (minutes < 60) return `약 ${Math.max(1, Math.round(minutes))}분 후 소진 예상`;
+  const hours = minutes / 60;
+  const text = hours >= 10 ? String(Math.round(hours)) : String(Math.round(hours * 10) / 10);
+  return `약 ${text}시간 후 소진 예상`;
+}
+
+/** ETA 분 → 칩용 짧은 표기 ("약 12분" / "약 4.6시간" / 48h+ = "약 5.5일") */
+export function etaShort(minutes: number): string {
+  if (minutes < 60) return `약 ${Math.max(1, Math.round(minutes))}분`;
+  const hours = minutes / 60;
+  if (hours >= 48) {
+    const days = hours / 24;
+    return `약 ${days >= 10 ? Math.round(days) : Math.round(days * 10) / 10}일`;
+  }
+  const text = hours >= 10 ? String(Math.round(hours)) : String(Math.round(hours * 10) / 10);
+  return `약 ${text}시간`;
+}
+
+/** 경과 초 → "N분 전 / N시간 전 / N일 전" */
+export function formatAgeSec(sec: number): string {
+  if (sec < 3600) return `${Math.max(1, Math.round(sec / 60))}분 전`;
+  if (sec < 86400) return `${Math.round(sec / 3600)}시간 전`;
+  return `${Math.round(sec / 86400)}일 전`;
+}
+
+/* ─── 흐름 보드 정적 매핑 (plan ds-board-engines-1) ───────────────────────────
+ * stage → 흐름 노드 / 엔진(모델) / 소속 머신. API 는 머신 label 과 단계 사실만
+ * 주고(raw 모델명 노출 금지 계약), 엔진·모델 표기는 여기 단일 지점이 책임진다.
+ * ★ 모델/엔진 교체 시 이 블록 1곳만 수정 (예: 맥미니 모델 스왑).
+ */
+
+export type FlowMachine = 'gpu' | 'macmini' | 'macbook';
+
+export interface FlowNodeDef {
+  key: string;
+  /** 노드 표시명 */
+  label: string;
+  /** 합산할 stage 키 (다중 = 같은 엔진 공유) */
+  stages: string[];
+  machine: FlowMachine;
+  /** 엔진/모델 표시명 (FE 정적 — 모델 교체 시 여기 수정) */
+  engine: string;
+  /** 보조 표기 (서비스/워커명) */
+  sub: string;
+}
+
+/** 메인 흐름 (문서 진행 순서). 뉴스 등 소스별 스킵 경로는 그림에 안 그림 — 단순화 한계. */
+export const FLOW_NODES: FlowNodeDef[] = [
+  { key: 'extract', label: '추출', stages: ['extract'], machine: 'gpu', engine: 'Surya OCR', sub: 'ocr-service' },
+  { key: 'markdown', label: '마크다운', stages: ['markdown'], machine: 'gpu', engine: 'Marker', sub: 'marker-service' },
+  { key: 'classify', label: '분류', stages: ['classify'], machine: 'macmini', engine: 'Qwen3.6-27B', sub: 'classify + triage' },
+  { key: 'summarize', label: '요약', stages: ['summarize'], machine: 'macmini', engine: 'Qwen3.6-27B', sub: 'summarize' },
+  { key: 'chunkembed', label: '청크 · 임베딩', stages: ['chunk', 'embed'], machine: 'gpu', engine: 'TEI bge-m3', sub: 'text-embeddings-inference' },
+  { key: 'deep', label: '심층분석', stages: ['deep_summary'], machine: 'macbook', engine: 'Qwen3.6-27B', sub: 'deep_summary' },
+];
+
+/** 보조 노드 — 메인 흐름 밖 (활동 있을 때만 보조 라인에 표시) */
+export const AUX_NODES: FlowNodeDef[] = [
+  { key: 'fulltext', label: '전문 수집', stages: ['fulltext'], machine: 'gpu', engine: 'Playwright', sub: 'playwright-fetcher' },
+  { key: 'stt', label: '전사', stages: ['stt'], machine: 'gpu', engine: 'Whisper', sub: 'stt-service' },
+  { key: 'util', label: '미리보기 · 썸네일', stages: ['preview', 'thumbnail'], machine: 'gpu', engine: '유틸', sub: 'ffmpeg' },
+];
+
+/** 머신 스트립 메타 — 모델 표기 단일 지점 */
+export const MACHINE_META: Record<FlowMachine, { label: string; model: string }> = {
+  gpu: { label: 'GPU 서버', model: '특화 엔진' },
+  macmini: { label: '맥미니', model: 'Qwen3.6-27B-6bit · 24/7' },
+  macbook: { label: '맥북 M5 Max', model: 'Qwen3.6-27B · 야간 drain' },
+};
+
+/** 흐름 보드 단계 라벨 (드로어/상세 행 표기) */
+export const FLOW_STAGE_LABEL: Record<string, string> = {
+  extract: '추출',
+  classify: '분류',
+  summarize: '요약',
+  embed: '임베딩',
+  chunk: '청크',
+  preview: '미리보기',
+  stt: '전사',
+  thumbnail: '썸네일',
+  deep_summary: '심층분석',
+  markdown: '마크다운',
+  fulltext: '전문',
+};
+
+export function flowStageLabel(stage: string): string {
+  return FLOW_STAGE_LABEL[stage] ?? stage;
+}
@@ -0,0 +1,95 @@
+// resolveAnchorMap 회귀 테스트 (플랜 ds-outline-anchor-b5 g5-t1 / NEW-5 / B4 / C1).
+// 실행: node --test src/lib/utils/resolveAnchorMap.test.ts
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
+import { resolveAnchorMap, isJumpTargetCandidate } from './resolveAnchorMap.ts';
+import { type DocumentSection } from './headingPath.ts';
+
+let _id = 0;
+function sec(p: Partial<DocumentSection>): DocumentSection {
+  return {
+    chunk_id: ++_id,
+    section_title: null,
+    heading_path: null,
+    level: null,
+    node_type: null,
+    is_leaf: true,
+    char_start: null,
+    section_type: null,
+    summary: null,
+    confidence: null,
+    ...p,
+  };
+}
+
+const LONG = 'x'.repeat(500);
+
+test('trustBE=false → 무조건 string-match 폴백(fellBack=true)', () => {
+  const md = '# Alpha\nbody\n# Beta\nx';
+  const secs = [sec({ section_title: 'Alpha', char_start: 999 }), sec({ section_title: 'Beta', char_start: 999 })];
+  const r = resolveAnchorMap(md, secs, { trustBE: false });
+  assert.equal(r.fellBack, true);
+  // char_start(999) 무시하고 string-match offset 사용
+  assert.ok(Object.values(r.anchors).every((o) => o < 50));
+});
+
+test('trustBE=true + 모든 jump-target candidate char_start 보유 → BE 채택(fellBack=false)', () => {
+  const secs = [
+    sec({ section_title: 'A', char_start: 5, is_leaf: true }),
+    sec({ section_title: 'B', char_start: 42, is_leaf: true }),
+  ];
+  const r = resolveAnchorMap(LONG, secs, { trustBE: true });
+  assert.equal(r.fellBack, false);
+  assert.equal(r.anchors[secs[0].chunk_id], 5);
+  assert.equal(r.anchors[secs[1].chunk_id], 42);
+  assert.equal(r.matched, 2);
+});
+
+test('[NEW-5] windowed doc — window-child char_start NULL 이 폴백을 유발하지 않음(split-parent BE 사용)', () => {
+  const secs = [
+    sec({ section_title: 'Big', heading_path: 'Big', node_type: 'chapter_split', is_leaf: false, char_start: 10 }),
+    sec({ section_title: 'Big', heading_path: 'Big', node_type: 'window', is_leaf: true, char_start: null }),
+    sec({ section_title: 'Big', heading_path: 'Big', node_type: 'window', is_leaf: true, char_start: null }),
+  ];
+  const r = resolveAnchorMap(LONG, secs, { trustBE: true });
+  // window-child NULL 은 candidate 가 아니므로 트리거 안 됨 → BE 사용, split-parent 점프 보존
+  assert.equal(r.fellBack, false, 'window-child NULL 이 whole-doc 폴백을 유발하면 안 됨(NEW-5)');
+  assert.equal(r.anchors[secs[0].chunk_id], 10, 'split-parent char_start 가 BE 맵에 있어야 함');
+  // window-child 는 anchor 없음
+  assert.equal(r.anchors[secs[1].chunk_id], undefined);
+});
+
+test('[B4] non-PASS doc — jump-target candidate char_start NULL → string-match 폴백', () => {
+  const md = '# Gamma\nbody text here\n# Delta\nmore';
+  const secs = [
+    sec({ section_title: 'Gamma', is_leaf: true, char_start: null }),
+    sec({ section_title: 'Delta', is_leaf: true, char_start: null }),
+  ];
+  const r = resolveAnchorMap(md, secs, { trustBE: true });
+  assert.equal(r.fellBack, true, 'candidate char_start NULL 이면 폴백해야 함(BE-first not BE-only)');
+  // string-match 로 실제 jump 산출(0 아님)
+  assert.ok(r.matched >= 1, 'md-aligned doc 는 폴백 string-match 로 jump 비-0');
+});
+
+test('char_start > splicedText.length → 그 anchor 만 비활성, 폴백 안 함', () => {
+  const secs = [
+    sec({ section_title: 'A', char_start: 3, is_leaf: true }),
+    sec({ section_title: 'B', char_start: 100000, is_leaf: true }), // 범위 초과(truncated tail)
+  ];
+  const short = 'hello world';
+  const r = resolveAnchorMap(short, secs, { trustBE: true });
+  assert.equal(r.fellBack, false, '범위 초과는 폴백 트리거 아님(candidate char_start NOT NULL)');
+  assert.equal(r.anchors[secs[0].chunk_id], 3);
+  assert.equal(r.anchors[secs[1].chunk_id], undefined, '초과 anchor 는 비활성');
+});
+
+test('preamble(title 없음, is_leaf) char_start NULL 은 candidate 아님 → 폴백 유발 X', () => {
+  const secs = [
+    sec({ section_title: null, heading_path: null, is_leaf: true, char_start: null }), // preamble
+    sec({ section_title: 'Real', is_leaf: true, char_start: 7 }),
+  ];
+  const r = resolveAnchorMap(LONG, secs, { trustBE: true });
+  assert.equal(isJumpTargetCandidate(secs[0]), false, 'preamble 은 candidate 아님');
+  assert.equal(r.fellBack, false);
+  assert.equal(r.anchors[secs[1].chunk_id], 7);
+});
@@ -0,0 +1,82 @@
+// 개요(절 목차) → 본문 점프 anchor 산출 공유 헬퍼 (경로 B: BE char_start primary + string-match 폴백).
+//
+// render-site 가 md_content 를 splice 할 때(trustBE=true)는 BE 가 builder 단계에서 박은 char_start 를
+// 1순위로 쓰고, 비-md basis(3-pane extracted_text 등, trustBE=false)는 무조건 string-match(buildAnchorMap)로
+// 폴백한다. char_start 가 비어 있으면(non-PASS doc, 또는 multi-night 재처리 중 아직 미백필 PASS doc) BE-only
+// 가 아니라 string-match 로 graceful degrade 한다(B4: BE-first, NOT BE-only).
+//
+// ★ NEW-5 (must-not-miss): 폴백 트리거는 JUMP-TARGET-CANDIDATE 한정이다.
+//   window-child(node_type='window')와 preamble(title 없음)은 char_start=NULL **BY DESIGN**(g2).
+//   트리거가 'NULL char_start 가 하나라도 있으면 whole-doc 폴백' 이면, window-child 를 항상 보유한 windowed
+//   doc 은 매번 폴백 → split-parent char_start(windowed 절의 단일 jump target)를 영영 안 쓰고 →
+//   buildAnchorMap 은 split-parent 를 skip → windowed 코어 절이 영원히 점프 안 됨 = 이 플랜이 겨냥한
+//   바로 그 절에서 Path A 0% 회귀. 따라서 트리거 분모 = jump-target-candidate 뿐.
+
+import { buildAnchorMap } from './outlineAnchors.ts';
+import { cleanHeading, type DocumentSection } from './headingPath.ts';
+
+export interface ResolveResult {
+  /** chunk_id → splicedText 내 char offset (UTF-16). */
+  anchors: Record<number, number>;
+  /** jump-target candidate 수(BE 경로) 또는 buildAnchorMap.total(폴백). */
+  total: number;
+  /** 실제 anchor 부여 수. */
+  matched: number;
+  /** string-match(buildAnchorMap) 로 폴백했는지 — V-rail/검증용. */
+  fellBack: boolean;
+}
+
+/** 표시 가능한 제목(또는 heading_path 말단)이 있는가. */
+function hasTitle(s: DocumentSection): boolean {
+  if (cleanHeading(s.section_title)) return true;
+  const last = (s.heading_path || '').split('>').pop() || '';
+  return !!cleanHeading(last);
+}
+
+/**
+ * jump-target candidate = char_start 를 받아야 하는 절.
+ *   = (비-window leaf) OR (%_split parent), 그리고 제목 보유.
+ * window-child(node_type='window')·preamble(제목 없음)은 설계상 char_start NULL → candidate 아님(NEW-5).
+ */
+export function isJumpTargetCandidate(s: DocumentSection): boolean {
+  const structural = (s.is_leaf && s.node_type !== 'window') || !!s.node_type?.endsWith('_split');
+  return structural && hasTitle(s);
+}
+
+export function resolveAnchorMap(
+  splicedText: string | null | undefined,
+  sections: DocumentSection[] | null | undefined,
+  opts: { trustBE: boolean },
+): ResolveResult {
+  const secs = sections ?? [];
+
+  // basis 불일치(extracted_text 3-pane 등) → 무조건 string-match.
+  if (!opts.trustBE) {
+    const r = buildAnchorMap(splicedText, secs);
+    return { ...r, fellBack: true };
+  }
+
+  // [B4 + NEW-5] BE-first: jump-target candidate 가 비었거나, candidate 중 char_start NULL 이 있으면 폴백.
+  //   window-child/preamble NULL 은 candidate 가 아니라 트리거에 안 들어간다.
+  const candidates = secs.filter(isJumpTargetCandidate);
+  const beUnusable = candidates.length === 0 || candidates.some((s) => s.char_start == null);
+  if (beUnusable) {
+    const r = buildAnchorMap(splicedText, secs);
+    return { ...r, fellBack: true };
+  }
+
+  // BE char_start 채택 (C1: window/null/no-title 제외 = candidate 집합과 동일).
+  const anchors: Record<number, number> = {};
+  const limit = (splicedText ?? '').length;
+  let matched = 0;
+  for (const s of candidates) {
+    const cs = s.char_start as number;
+    // char_start<=splicedText.length 가드(MarkdownDoc.svelte:58). 초과 = FE serve-truncate tail →
+    // 그 anchor 만 비활성(폴백 안 함 — string-match 도 truncated tail 은 못 찾음).
+    if (Number.isFinite(cs) && cs >= 0 && cs <= limit) {
+      anchors[s.chunk_id] = cs;
+      matched++;
+    }
+  }
+  return { anchors, total: candidates.length, matched, fellBack: false };
+}
@@ -3,13 +3,16 @@
  import { browser } from '$app/environment';
  import { page } from '$app/stores';
  import { goto } from '$app/navigation';
-  import { Menu, EllipsisVertical, ChevronDown, FileText, Newspaper, HelpCircle, StickyNote, Inbox, PanelLeft } from 'lucide-svelte';
+  import { Menu, EllipsisVertical, ChevronDown, FileText, Newspaper, HelpCircle, StickyNote, Inbox, PanelLeft, MessageCircle } from 'lucide-svelte';
  import { isAuthenticated, user, tryRefresh, logout } from '$lib/stores/auth';
  import { toasts, removeToast } from '$lib/stores/toast';
  import { refresh as refreshPublicConfig } from '$lib/stores/config';
  import { ui } from '$lib/stores/uiState.svelte';
+  import { queueOverview } from '$lib/stores/queueOverview';
+  import { MACHINE_STATE_LABEL, machineChipClass } from '$lib/utils/queueDisplay';
  import Sidebar from '$lib/components/Sidebar.svelte';
  import SystemStatusDot from '$lib/components/SystemStatusDot.svelte';
+  import QueueDrawer from '$lib/components/QueueDrawer.svelte';
  import QuickMemoButton from '$lib/components/QuickMemoButton.svelte';
  import IconButton from '$lib/components/ui/IconButton.svelte';
  import Drawer from '$lib/components/ui/Drawer.svelte';
@@ -65,6 +68,15 @@
  let showChrome = $derived($isAuthenticated && !NO_CHROME_PATHS.some(p => $page.url.pathname.startsWith(p)));
  let showSidebar = $derived(showChrome && !NO_SIDEBAR_PATHS.some(p => $page.url.pathname.startsWith(p)));

+  // 처리 현황 스트립 (안6 라이트) — 60s 폴링 store 공유. fetch 실패/401 시
+  // store 가 null → 스트립 자체를 숨김 (silent 비차단, 로그인 페이지 동일).
+  let queue = $derived($queueOverview);
+  let queueMacbook = $derived(queue?.machines?.find((m) => m.key === 'macbook') ?? null);
+  function toggleQueueDrawer() {
+    if (ui.isDrawerOpen('queue')) ui.closeDrawer();
+    else ui.openDrawer('queue');
+  }
+
  function handleKeydown(e) {
    if (e.key === '/' && !['INPUT', 'TEXTAREA'].includes(document.activeElement?.tagName)) {
      e.preventDefault();
@@ -140,6 +152,7 @@
          </div>

          <a href="/ask" class="px-3 py-1.5 rounded-md text-sm font-semibold transition-colors {isActive('/ask') ? 'text-accent bg-accent/12' : 'text-dim hover:text-text hover:bg-surface'}">질문</a>
+          <a href="/chat" class="px-3 py-1.5 rounded-md text-sm font-semibold transition-colors {isActive('/chat') ? 'text-accent bg-accent/12' : 'text-dim hover:text-text hover:bg-surface'}">이드</a>
          <SystemStatusDot />
        </div>

@@ -161,6 +174,28 @@
        </div>
      </nav>

+      <!-- 전 페이지 상태 스트립 (안6 라이트) — 클릭 시 우측 처리 현황 드로어 토글 -->
+      {#if queue}
+        <button
+          type="button"
+          onclick={toggleQueueDrawer}
+          aria-expanded={ui.isDrawerOpen('queue')}
+          aria-label="처리 현황 자세히 보기"
+          class="flex items-center gap-3 px-4 py-1.5 border-b border-default bg-surface text-[11px] text-dim shrink-0 text-left hover:bg-surface-hover transition-colors overflow-x-auto"
+        >
+          <span class="flex items-center gap-1.5 shrink-0">
+            <span class="w-2 h-2 rounded-full {queue.totals.processing > 0 ? 'bg-success' : 'bg-faint'}"></span>
+            <strong class="text-text font-semibold tabular-nums">처리 중 {queue.totals.processing.toLocaleString()}</strong>
+          </span>
+          <span class="tabular-nums shrink-0">대기 <strong class="text-text">{queue.totals.pending.toLocaleString()}</strong></span>
+          <span class="tabular-nums shrink-0 {queue.totals.failed > 0 ? 'text-error font-semibold' : ''}">실패 <strong class={queue.totals.failed > 0 ? '' : 'text-text'}>{queue.totals.failed.toLocaleString()}</strong></span>
+          {#if queueMacbook}
+            <span class="text-[10px] font-bold rounded-full px-2 py-0.5 shrink-0 {machineChipClass(queueMacbook.state)}">맥북 {MACHINE_STATE_LABEL[queueMacbook.state]}</span>
+          {/if}
+          <span class="ml-auto flex items-center gap-0.5 text-faint shrink-0">자세히 <ChevronDown size={11} /></span>
+        </button>
+      {/if}
+
      <!-- 메인: 데스크탑 상시 사이드바 + 콘텐츠 -->
      <div class="flex-1 min-h-0 flex">
        {#if showSidebar}
@@ -178,6 +213,7 @@
        <a href="/documents" aria-current={docsActive ? 'page' : undefined} class="flex-1 flex flex-col items-center justify-center gap-1 py-2 text-[10px] font-semibold transition-colors {docsActive ? 'text-accent' : 'text-dim'}"><FileText size={18} strokeWidth={1.9} /> 문서</a>
        <a href="/news" aria-current={newsActive ? 'page' : undefined} class="flex-1 flex flex-col items-center justify-center gap-1 py-2 text-[10px] font-semibold transition-colors {newsActive ? 'text-accent' : 'text-dim'}"><Newspaper size={18} strokeWidth={1.9} /> 뉴스</a>
        <a href="/ask" aria-current={isActive('/ask') ? 'page' : undefined} class="flex-1 flex flex-col items-center justify-center gap-1 py-2 text-[10px] font-semibold transition-colors {isActive('/ask') ? 'text-accent' : 'text-dim'}"><HelpCircle size={18} strokeWidth={1.9} /> 질문</a>
+        <a href="/chat" aria-current={isActive('/chat') ? 'page' : undefined} class="flex-1 flex flex-col items-center justify-center gap-1 py-2 text-[10px] font-semibold transition-colors {isActive('/chat') ? 'text-accent' : 'text-dim'}"><MessageCircle size={18} strokeWidth={1.9} /> 이드</a>
        <a href="/memos" aria-current={isActive('/memos') ? 'page' : undefined} class="flex-1 flex flex-col items-center justify-center gap-1 py-2 text-[10px] font-semibold transition-colors {isActive('/memos') ? 'text-accent' : 'text-dim'}"><StickyNote size={18} strokeWidth={1.9} /> 메모</a>
        <button onclick={() => ui.openDrawer('sidebar')} class="flex-1 flex flex-col items-center justify-center gap-1 py-2 text-[10px] font-semibold text-dim"><Menu size={18} strokeWidth={1.9} /> 더보기</button>
      </nav>
@@ -189,6 +225,9 @@
        </Drawer>
      </div>

+      <!-- 처리 현황 드로어 (안6 라이트, 스트립 클릭 시 우측) -->
+      <QueueDrawer />
+
      <!-- 빠른 메모 FAB -->
      <QuickMemoButton />
    </div>
@@ -13,10 +13,13 @@
  import { domainBgClass, domainLabel } from '$lib/utils/domainSlug';
  import { user } from '$lib/stores/auth';
  import { api } from '$lib/api';
+  import { queueOverview, refreshQueueOverview } from '$lib/stores/queueOverview';
+  import ProcessingFlowBoard from '$lib/components/ProcessingFlowBoard.svelte';
+  import type { QueueOverview } from '$lib/types/queue';
  import EmptyState from '$lib/components/ui/EmptyState.svelte';
  import Skeleton from '$lib/components/ui/Skeleton.svelte';
  import {
-    Scale, FileText, Pin, ChevronRight, GraduationCap, Upload, Newspaper,
+    Scale, FileText, Pin, GraduationCap, Upload, Newspaper,
  } from 'lucide-svelte';
  import { addToast } from '$lib/stores/toast';

@@ -125,6 +128,17 @@
    preview: '미리보기', thumbnail: '썸네일',
  };

+  // ─── 처리 머신 보드 (안2) + ETA (안5 라이트) — GET /api/queue/overview ───
+  // 홈은 30s 폴링 (store 기본 60s 위에 추가 — inFlight 합치기로 중복 호출 0).
+  // 백엔드 미배포/실패 시 store=null → 보드 자체가 조용히 생략 (silent 비차단).
+  let queue = $derived<QueueOverview | null>($queueOverview);
+
+  onMount(() => {
+    void refreshQueueOverview();
+    const handle = setInterval(() => void refreshQueueOverview(), 30_000);
+    return () => clearInterval(handle);
+  });
+
  interface PipelineRow {
    stage: string; label: string;
    pending: number; processing: number; failed: number; total: number;
@@ -166,22 +180,10 @@
  let pipelineRows = $derived(
    summary ? buildPipelineRows(summary.pipeline_status, summary.queue_lag ?? []) : []
  );
-  let pipelineMax = $derived(Math.max(1, ...pipelineRows.map((r) => r.total)));
  let totalFailed = $derived(summary?.failed_count ?? 0);
  let totalPending = $derived(pipelineRows.reduce((s, r) => s + r.pending, 0));
  let totalProcessing = $derived(pipelineRows.reduce((s, r) => s + r.processing, 0));

-  let pipelineManualClosed = $state(false);
-  let pipelineOpen = $derived(pipelineManualClosed ? false : totalFailed > 0);
-
-  function formatAge(sec: number | null): string {
-    if (sec == null || sec <= 0) return '';
-    if (sec < 60) return `${sec}초 전`;
-    if (sec < 3600) return `${Math.floor(sec / 60)}분 전`;
-    if (sec < 86400) return `${Math.floor(sec / 3600)}시간 전`;
-    return `${Math.floor(sec / 86400)}일 전`;
-  }
-
  function formatTime(dateStr: string) {
    const d = new Date(dateStr);
    if (isNaN(d.getTime())) return ''; // 빈 문자열/유효하지 않은 created_at → 'Invalid Date' 회피
@@ -420,56 +422,10 @@
        </div>
      </div>

-      <!-- ═══ 파이프라인 상세 (실패 있을 때 자동 펼침) ═══ -->
-      <details
-        class="mt-5"
-        open={pipelineOpen}
-        ontoggle={(e) => { if (!e.currentTarget.open) pipelineManualClosed = true; }}
-      >
-        <summary class="flex items-center justify-between px-5 py-3.5 bg-surface border border-default rounded-card cursor-pointer hover:bg-surface-hover transition-colors select-none list-none">
-          <span class="text-sm font-semibold text-text flex items-center gap-2">
-            <ChevronRight size={14} class="transition-transform details-chevron" />
-            파이프라인 상세
-          </span>
-          <span class="text-xs text-dim flex items-center gap-2.5">
-            {#if totalFailed > 0}<span class="text-error font-medium">실패 {totalFailed}</span>{/if}
-            {#if totalPending > 0}<span>대기 {totalPending}</span>{/if}
-            {#if totalFailed === 0 && totalPending === 0}<span>처리 완료</span>{/if}
-          </span>
-        </summary>
-
-        <div class="mt-2 px-5 py-4 bg-surface border border-default rounded-card">
-          <p class="text-xs text-dim mb-3">최근 24시간</p>
-          {#if pipelineRows.length > 0}
-            <div class="space-y-3">
-              {#each pipelineRows as row (row.stage)}
-                <div>
-                  <div class="flex items-center justify-between text-xs mb-1.5">
-                    <span class="text-dim">
-                      {row.label}
-                      {#if row.oldestPendingAgeSec && row.oldestPendingAgeSec > 600}
-                        <span class="ml-1 text-warning" title="가장 오래된 pending 의 경과 시간">({formatAge(row.oldestPendingAgeSec)})</span>
-                      {/if}
-                    </span>
-                    <span class="text-dim tabular-nums">
-                      대기 <span class="text-text">{row.pending}</span> ·
-                      처리 <span class="text-text">{row.processing}</span> ·
-                      실패 <span class={row.failed > 0 ? 'text-error font-medium' : ''}>{row.failed}</span>
-                    </span>
-                  </div>
-                  <div class="flex h-1.5 w-full overflow-hidden rounded-sm bg-bg">
-                    {#if row.pending > 0}<div class="bg-warning h-full" style="width: {(row.pending / pipelineMax) * 100}%"></div>{/if}
-                    {#if row.processing > 0}<div class="bg-accent h-full" style="width: {(row.processing / pipelineMax) * 100}%"></div>{/if}
-                    {#if row.failed > 0}<div class="bg-error h-full" style="width: {(row.failed / pipelineMax) * 100}%"></div>{/if}
-                  </div>
-                </div>
-              {/each}
-            </div>
-          {:else}
-            <p class="text-xs text-dim text-center py-3">처리 작업 없음</p>
-          {/if}
-        </div>
-      </details>
+      <!-- ═══ 처리 머신 보드 v2 — 파이프라인 흐름 + 상세 패널 + 실패 드로어 (ds-board-engines-1) ═══ -->
+      {#if queue}
+        <ProcessingFlowBoard overview={queue} />
+      {/if}

    {/if}
  </div>
@@ -482,7 +438,3 @@
  </div>
 {/snippet}

-<style>
-  details[open] :global(.details-chevron) { transform: rotate(90deg); }
-  details summary::-webkit-details-marker { display: none; }
-</style>
@@ -0,0 +1,751 @@
+<!--
+  /chat — 이드 채팅 표면 (PR-Eid-Chat).
+
+  표면 문법: 페이지 정체성 = "이드". 모델명·머신명·alias 비노출
+  (persona model-agnostic 원칙 — 프로토콜 레이어도 동일: SSE payload 의
+  model 필드는 서버에서 mode 값으로 치환되고 usage 는 제거됨).
+  클라이언트는 mode('daily'|'deep') 만 보내고 alias 매핑은 서버(/api/eid/chat) 책임.
+
+  - 모드: 일상(daily) / 심층(deep) segmented 토글. 심층 = 장문·무거운 질문,
+    잠들어 있으면 자동 기동(처음 최대 ~1분) — 기계중립 표현만 사용.
+  - 스트리밍: POST /api/eid/chat → SSE. api<T>() 는 JSON 전용이라 raw fetch
+    (apiFetchRaw, 토큰 첨부 + 401 refresh 1회 공유) 사용. 라인 버퍼로 청크
+    경계 분리, "data:" 라인만, [DONE] 종료, choices[0].delta.content 누적
+    (fixture 2종 — 26B tool_calls 배열 / 27B reasoning·logprobs null — 모두
+    content 만 읽으면 동일 처리).
+  - 에러: error_reason 매핑 (warming / editor_busy / upstream_cold /
+    macbook_unavailable / substrate_degraded / 기타 detail). 자동 fallback
+    금지 — 다른 모드로 자동 전환하지 않는다. 스트림 도중 중단 = 받은 부분
+    유지 + 표시.
+  - 대기 표시(첫 바이트 전): 경과 타이머 1초 갱신 + 3초 후 GET /api/eid/status
+    1회·이후 8초 간격 재조회(실패는 조용히 무시 — 기능 비차단)로 "대기"와
+    "고장"을 정직하게 구분. daily.busy=true 면 줄 서는 중 안내. 15초 경과 +
+    daily 모드면 [심층으로 전환]/[취소] 버튼 노출 — 전환은 명시 클릭만
+    (자동 fallback 금지 정책 위반 아님). 첫 바이트 도착/스트림 종료 시
+    타이머·폴링 즉시 정리.
+  - 이력: localStorage `eid_chat:v1` (키 상수는 $lib/eidChat — logout 시 제거와 공유).
+    전송 payload 는 마지막 20턴(40 messages) cap.
+  - 입력 한도: 메시지당 8,000자 클라 선차단(서버 422 검증과 동일 한도).
+    422 수신 시 detail 을 한 줄로 정규화 + 방금 push 한 user 턴 pop 으로
+    payload 오염 고리 차단.
+-->
+<script lang="ts">
+  import { onMount, onDestroy } from 'svelte';
+  import { api, apiFetchRaw } from '$lib/api';
+  import { EID_CHAT_STORAGE_KEY } from '$lib/eidChat';
+  import Button from '$lib/components/ui/Button.svelte';
+  import EmptyState from '$lib/components/ui/EmptyState.svelte';
+  import EidEvidenceCard from '$lib/components/eid/EidEvidenceCard.svelte';
+  import { MessageCircle, SendHorizontal, RotateCcw, AlertCircle } from 'lucide-svelte';
+
+  type ChatMode = 'daily' | 'deep';
+  // deep(검색) 답변은 sources(근거)·partial 동반. daily 답변은 없음.
+  type EidSource = { id?: number; doc_id?: number; title?: string; score?: number };
+  type ChatMessage = {
+    role: 'user' | 'assistant';
+    content: string;
+    sources?: EidSource[];
+    partial?: boolean;
+  };
+  type Notice = { kind: 'warn' | 'error'; message: string; retryable: boolean };
+  // GET /api/eid/status 응답 — 대기 중 바쁨 신호 조회에 필요한 필드만 좁게 정의
+  type EidStatus = { daily?: { busy?: boolean } };
+
+  // 이력 키 — logout(stores/auth.ts) 의 이력 제거와 단일 상수 공유
+  const STORAGE_KEY = EID_CHAT_STORAGE_KEY;
+  // 전송 payload cap: 마지막 20턴(40 messages)
+  const MAX_PAYLOAD_MESSAGES = 40;
+  // localStorage 보존 cap (payload cap 과 별개 — 화면 표시용 이력)
+  const MAX_STORED_MESSAGES = 200;
+  // 메시지당 입력 한도 — 서버(eid_chat.py) 422 검증과 동일 한도, 클라에서 선차단
+  const MAX_MESSAGE_CHARS = 8000;
+  // 한도 근접 카운터 노출 시작점
+  const COUNTER_THRESHOLD = 7500;
+  // 대기 표시(첫 바이트 전): 상태 폴링 시작 시점(초) / 재조회 간격(초) / 행동 버튼 노출 시점(초)
+  const STATUS_POLL_START_SEC = 3;
+  const STATUS_POLL_INTERVAL_SEC = 8;
+  const WAIT_ACTIONS_SEC = 15;
+
+  const DEEP_CAPTION =
+    '장문·무거운 질문에 적합 — 잠들어 있으면 자동 기동 (처음 응답까지 최대 ~1분)';
+
+  // 프리셋 칩: 입력창 prefix 채움
+  const PRESETS: Array<{ label: string; prefix: string }> = [
+    { label: '번역 한→영', prefix: '다음을 영어로 번역해줘.\n\n' },
+    { label: '번역 영→한', prefix: '다음을 한국어로 번역해줘.\n\n' },
+    { label: '요약', prefix: '다음 내용을 핵심만 간결히 요약해줘.\n\n' },
+    { label: '글 다듬기', prefix: '다음 글을 뜻은 유지하면서 자연스럽게 다듬어줘.\n\n' },
+  ];
+
+  // ── state ───────────────────────────────────────────
+  let mode = $state<ChatMode>('daily');
+  let messages = $state<ChatMessage[]>([]);
+  let input = $state('');
+  let streaming = $state(false);
+  let streamingText = $state('');
+  let notice = $state<Notice | null>(null);
+  // deep(검색) 모드 첫 바이트 전 단계 — 'searching' 이면 대기 표시를 "근거 검색 중"으로
+  let deepPhase = $state<'searching' | null>(null);
+
+  let scrollEl: HTMLDivElement | undefined = $state();
+  let textareaEl: HTMLTextAreaElement | undefined = $state();
+  let abortCtrl: AbortController | null = null;
+
+  // ── 대기 추적 (첫 바이트 전) ────────────────────────
+  // 경과 초 + daily 엔진 바쁨 여부(null = 미확인). 토큰(세대 카운터)으로
+  // 스트림별 소유를 구분 — abort 직후 즉시 재전송(심층 전환) 경로에서
+  // 이전 스트림의 늦은 정리가 새 스트림의 타이머를 죽이지 않게 한다.
+  let waitSeconds = $state(0);
+  let dailyBusy = $state<boolean | null>(null);
+  let waitIntervalId: ReturnType<typeof setInterval> | null = null;
+  let waitTokenSeq = 0;
+  let waitToken = 0; // 현재 활성 추적 토큰 (0 = 추적 없음)
+
+  function startWaitTracking(streamMode: ChatMode): number {
+    // 이전 추적 잔여 정리 (전환 재전송처럼 stop 전에 start 가 오는 경로 방어)
+    if (waitIntervalId !== null) {
+      clearInterval(waitIntervalId);
+      waitIntervalId = null;
+    }
+    const token = ++waitTokenSeq;
+    waitToken = token;
+    waitSeconds = 0;
+    dailyBusy = null;
+    waitIntervalId = setInterval(() => {
+      if (waitToken !== token) return; // 정리 누락 방어 — 무해 no-op
+      waitSeconds += 1;
+      // 바쁨 신호 폴링: 3초 경과 시 1회 + 이후 8초 간격 (3, 11, 19, ...).
+      // daily 모드 전용 — deep 대기는 기존 wake 안내 + 경과 타이머만.
+      if (
+        streamMode === 'daily' &&
+        waitSeconds >= STATUS_POLL_START_SEC &&
+        (waitSeconds - STATUS_POLL_START_SEC) % STATUS_POLL_INTERVAL_SEC === 0
+      ) {
+        void pollEidStatus(token);
+      }
+    }, 1000);
+    return token;
+  }
+
+  // token 가드: 본인 소유 추적만 정리 — 다른 스트림이 이어받았으면 no-op
+  function stopWaitTracking(token: number) {
+    if (token !== waitToken) return;
+    waitToken = 0;
+    if (waitIntervalId !== null) {
+      clearInterval(waitIntervalId);
+      waitIntervalId = null;
+    }
+    waitSeconds = 0;
+    dailyBusy = null;
+  }
+
+  // 상태 조회 — 실패는 조용히 무시 (대기 표시는 타이머만으로 유지, 기능 비차단)
+  async function pollEidStatus(token: number) {
+    try {
+      const status = await api<EidStatus>('/eid/status');
+      if (token !== waitToken) return; // 스트림 종료/교체 후 도착한 늦은 응답 폐기
+      dailyBusy = status?.daily?.busy === true;
+    } catch {
+      // 무시 — 바쁨 신호는 부가 정보일 뿐 채팅 기능을 차단하지 않는다
+    }
+  }
+
+  // ── localStorage 이력 ───────────────────────────────
+  function persist() {
+    if (typeof window === 'undefined') return;
+    try {
+      const trimmed = messages.slice(-MAX_STORED_MESSAGES);
+      window.localStorage.setItem(STORAGE_KEY, JSON.stringify({ mode, messages: trimmed }));
+    } catch {
+      // quota 초과 등 — 이력 저장 실패는 치명적이지 않음
+    }
+  }
+
+  function restore() {
+    if (typeof window === 'undefined') return;
+    try {
+      const raw = window.localStorage.getItem(STORAGE_KEY);
+      if (!raw) return;
+      const parsed = JSON.parse(raw) as { mode?: unknown; messages?: unknown };
+      if (parsed.mode === 'daily' || parsed.mode === 'deep') mode = parsed.mode;
+      if (Array.isArray(parsed.messages)) {
+        messages = parsed.messages
+          .filter(
+            (m): m is ChatMessage =>
+              !!m &&
+              typeof m === 'object' &&
+              ((m as ChatMessage).role === 'user' || (m as ChatMessage).role === 'assistant') &&
+              typeof (m as ChatMessage).content === 'string'
+          )
+          // 배열 크기 가드 + content 8,000자 clamp — 외부에서 손상/비대해진
+          // 이력이 전송 payload 를 오염시키지 않도록 복원 시점에 정규화.
+          // sources/partial(deep 답변 근거)은 보존 — 전송 payload 엔 안 실림(runStream map 이 role/content 만).
+          .slice(-MAX_STORED_MESSAGES)
+          .map((m) => ({
+            role: m.role,
+            content: m.content.slice(0, MAX_MESSAGE_CHARS),
+            sources: Array.isArray((m as ChatMessage).sources) ? (m as ChatMessage).sources : undefined,
+            partial: (m as ChatMessage).partial === true || undefined,
+          }));
+      }
+    } catch {
+      // 손상된 이력은 무시 (새 대화로 시작)
+    }
+  }
+
+  onMount(() => restore());
+  onDestroy(() => {
+    abortCtrl?.abort();
+    // 페이지 이탈 시 대기 타이머/폴링 정리 (abort 의 finally 와 이중이어도 무해)
+    if (waitIntervalId !== null) clearInterval(waitIntervalId);
+  });
+
+  // ── 자동 스크롤 (새 메시지 / 스트림 청크마다 하단 고정) ──
+  $effect(() => {
+    void messages.length;
+    void streamingText;
+    if (scrollEl) scrollEl.scrollTop = scrollEl.scrollHeight;
+  });
+
+  // ── 입력 textarea auto-grow ─────────────────────────
+  $effect(() => {
+    void input;
+    if (!textareaEl) return;
+    textareaEl.style.height = 'auto';
+    textareaEl.style.height = Math.min(textareaEl.scrollHeight, 160) + 'px';
+  });
+
+  function applyPreset(prefix: string) {
+    if (!input.startsWith(prefix)) input = prefix + input;
+    textareaEl?.focus();
+  }
+
+  function newConversation() {
+    abortCtrl?.abort();
+    messages = [];
+    notice = null;
+    streamingText = '';
+    streaming = false;
+    persist();
+    textareaEl?.focus();
+  }
+
+  // ── error_reason → 안내 메시지 매핑 ──────────────────
+  // 자동 fallback 금지 ([[feedback_no_silent_fallback_explicit_opt_in]]):
+  // 어떤 사유든 다른 모드로 자동 전환하지 않고 명시 표시만 한다.
+  function mapErrorReason(reason: string | undefined, detail: string): Notice {
+    switch (reason) {
+      case 'warming':
+        return {
+          kind: 'warn',
+          message: '심층 엔진 기동 중입니다 — 잠시 후 다시 시도하세요.',
+          retryable: true,
+        };
+      case 'editor_busy':
+        return {
+          kind: 'warn',
+          message: '편집 작업 보호로 잠시 사용할 수 없습니다.',
+          retryable: false,
+        };
+      case 'upstream_cold':
+      case 'macbook_unavailable':
+        return {
+          kind: 'warn',
+          message: '심층 엔진이 잠들어 있습니다 — 다시 시도하면 기동을 시작합니다.',
+          retryable: true,
+        };
+      case 'substrate_degraded':
+        return {
+          kind: 'error',
+          message: '운영 규칙이 적재되지 않았습니다 — 관리자 확인이 필요합니다.',
+          retryable: false,
+        };
+      default:
+        return { kind: 'error', message: detail || '응답 생성에 실패했습니다.', retryable: true };
+    }
+  }
+
+  // 비-200 응답 body 파싱: {detail, error_reason} — detail 은 string 또는
+  // {message} 객체 가능 (api.ts parseDetail 과 같은 정규화 규칙의 축소판).
+  async function parseErrorBody(res: Response): Promise<Notice> {
+    const body = (await res.json().catch(() => null)) as
+      | { detail?: unknown; error_reason?: unknown }
+      | null;
+    const reason = typeof body?.error_reason === 'string' ? body.error_reason : undefined;
+    let detail = '';
+    if (typeof body?.detail === 'string') detail = body.detail;
+    else if (body?.detail && typeof body.detail === 'object') {
+      const obj = body.detail as { message?: string; error_reason?: string };
+      detail = obj.message || '';
+      // error_reason 이 detail 객체 안에 중첩된 경우도 수용
+      return mapErrorReason(reason ?? obj.error_reason, detail || res.statusText);
+    }
+    return mapErrorReason(reason, detail || res.statusText);
+  }
+
+  // 422: FastAPI validation detail(배열 shape — [{loc, msg, type}, ...]) 을
+  // 사람이 읽을 한 줄로 정규화. 길이 한도 위반(메시지당 8,000자 / 총량 cap)
+  // 은 친화 메시지로 치환. pydantic v2 의 "Value error, " prefix 는 제거.
+  function normalizeValidationDetail(detail: unknown): string {
+    const first = (Array.isArray(detail) ? detail[0] : undefined) as
+      | { msg?: unknown }
+      | undefined;
+    const msg =
+      typeof first?.msg === 'string' ? first.msg.replace(/^Value error,\s*/i, '') : '';
+    if (/at most|too.?long|초과|깁니다/i.test(msg)) {
+      return '입력이 너무 깁니다 — 메시지는 8,000자 이내로 줄이거나, 대화가 길면 새 대화로 시작하세요.';
+    }
+    if (msg) return `요청 형식 오류: ${msg}`;
+    return '요청 형식이 올바르지 않습니다 — 입력을 줄이거나 새 대화로 시작하세요.';
+  }
+
+  // ── 전송 / 재시도 ───────────────────────────────────
+  function sendMessage() {
+    const text = input.trim();
+    if (!text || streaming) return;
+    // 메시지당 8,000자 클라 선차단 — 한도 초과 payload 를 422 전에 막는다
+    // (입력바 하단 카운터가 같은 안내를 인라인으로 상시 표시)
+    if (text.length > MAX_MESSAGE_CHARS) {
+      notice = {
+        kind: 'error',
+        message: '입력이 너무 깁니다 — 8,000자 이내로 줄여주세요.',
+        retryable: false,
+      };
+      return;
+    }
+    messages.push({ role: 'user', content: text });
+    input = '';
+    persist();
+    void runStream();
+  }
+
+  // 재시도: 이력 끝의 user 메시지를 그대로 재전송 (user 턴 중복 추가 X)
+  function retry() {
+    if (streaming) return;
+    if (messages.length === 0 || messages[messages.length - 1].role !== 'user') return;
+    void runStream();
+  }
+
+  // ── 대기 중 행동 버튼 (daily + 15초 경과) ────────────
+  // [심층으로 전환] — 명시 클릭에 의한 모드 전환 (자동 fallback 금지 정책
+  // 위반 아님). 현재 fetch abort → 같은 user 턴을 mode=deep 으로 즉시 재전송.
+  // abort 된 이전 스트림의 finally 는 abortCtrl 비교 + 대기 token 가드로
+  // 새 스트림 상태를 건드리지 않는다 (새 대화 abort race 가드와 동일 구조).
+  function switchToDeep() {
+    if (!streaming || mode !== 'daily') return;
+    mode = 'deep'; // 모드 토글 상태도 deep 으로 갱신
+    abortCtrl?.abort();
+    void runStream();
+  }
+
+  // [취소] — abort 후 방금 push 한 user 턴 pop + 입력창 본문 복원
+  // (422 처리와 동일 패턴: 이력 오염 차단 + localStorage 재저장).
+  // placeholder 제거는 abort 된 스트림의 finally(streaming=false)가 처리.
+  function cancelWait() {
+    if (!streaming) return;
+    abortCtrl?.abort();
+    if (messages.length > 0 && messages[messages.length - 1].role === 'user') {
+      const popped = messages.pop();
+      if (popped && !input) input = popped.content;
+      persist();
+    }
+  }
+
+  async function runStream() {
+    notice = null;
+    streaming = true;
+    streamingText = '';
+    const ctrl = new AbortController();
+    abortCtrl = ctrl;
+    // 첫 바이트 전 대기 추적 시작 — 본 스트림 소유 토큰으로 정리 시점 제어
+    const waitTok = startWaitTracking(mode);
+
+    const payload = {
+      mode,
+      messages: messages
+        .slice(-MAX_PAYLOAD_MESSAGES)
+        .map((m) => ({ role: m.role, content: m.content })),
+    };
+
+    let acc = '';
+    let sawDone = false;
+    // deep(검색) 답변 동반 데이터 — daily 는 안 옴
+    let accSources: EidSource[] = [];
+    let accPartial = false;
+
+    try {
+      const res = await apiFetchRaw('/eid/chat', {
+        method: 'POST',
+        body: JSON.stringify(payload),
+        signal: ctrl.signal,
+      });
+
+      if (!res.ok) {
+        if (res.status === 422) {
+          // validation 거부 — detail 정규화 + 방금 push 한 user 턴 pop.
+          // 한도 초과 턴이 이력에 남으면 이후 모든 전송 payload 가 계속
+          // 422 를 맞는 오염 고리가 되므로 여기서 끊는다 (localStorage 재저장).
+          const body = (await res.json().catch(() => null)) as { detail?: unknown } | null;
+          notice = {
+            kind: 'error',
+            message: normalizeValidationDetail(body?.detail),
+            retryable: false,
+          };
+          if (messages.length > 0 && messages[messages.length - 1].role === 'user') {
+            const popped = messages.pop();
+            // 입력창이 비어 있으면 본문을 돌려놓아 줄여서 재전송할 수 있게 한다
+            if (popped && !input) input = popped.content;
+            persist();
+          }
+          return;
+        }
+        notice = await parseErrorBody(res);
+        return;
+      }
+      if (!res.body) {
+        notice = { kind: 'error', message: '스트림을 열 수 없습니다.', retryable: true };
+        return;
+      }
+
+      // SSE 라인 버퍼 파싱 — 청크 경계에서 라인이 잘릴 수 있으므로
+      // 마지막 불완전 라인은 buf 에 남겨 다음 청크와 이어붙인다.
+      const reader = res.body.getReader();
+      const decoder = new TextDecoder();
+      let buf = '';
+
+      // data: 라인 1개 처리 — [DONE] 이면 true (acc/streamingText 누적은 closure)
+      const processLine = (rawLine: string): boolean => {
+        const line = rawLine.trim();
+        if (!line.startsWith('data:')) return false;
+        const data = line.slice(5).trim();
+        if (data === '[DONE]') return true;
+        try {
+          const obj = JSON.parse(data) as {
+            choices?: Array<{ delta?: { content?: unknown } }>;
+            phase?: string;
+            error_reason?: string;
+            eid_sources?: EidSource[];
+            partial?: boolean;
+          };
+          // deep(검색) envelope 분기 — daily 응답엔 없음
+          if (obj?.phase === 'ping') return false; // heartbeat — 무시
+          if (obj?.phase === 'searching') {
+            deepPhase = 'searching'; // 대기 표시를 "근거 검색 중"으로
+            return false;
+          }
+          if (obj?.phase === 'error') {
+            // in-stream 미가용/실패 — 받은 부분 유지 + 명시 표시 (자동 fallback 0).
+            // 뒤따르는 [DONE] 이 sawDone 처리하므로 '중단' 오경보 없음.
+            notice = mapErrorReason(obj.error_reason, '');
+            return false;
+          }
+          if (Array.isArray(obj?.eid_sources)) {
+            accSources = obj.eid_sources;
+            accPartial = obj.partial === true;
+            return false;
+          }
+          const piece = obj?.choices?.[0]?.delta?.content;
+          if (typeof piece === 'string' && piece) {
+            // 첫 바이트 도착 — 대기 타이머/폴링 제거, 기존 스트리밍 표시로 전환
+            if (!acc) {
+              stopWaitTracking(waitTok);
+              deepPhase = null;
+            }
+            acc += piece;
+            streamingText = acc;
+          }
+        } catch {
+          // 불완전/비 JSON data 라인 무시
+        }
+        return false;
+      };
+
+      while (true) {
+        const { value, done } = await reader.read();
+        if (done) {
+          // 종단 flush — decoder 내부 잔여 바이트 + 개행 없이 끝난 마지막
+          // 라인을 1회 처리. 마지막 data:/[DONE] 라인이 \n 없이 끝나면 buf 에
+          // 남아 '응답이 중단되었습니다' 오경보가 나던 경로의 해소 지점.
+          buf += decoder.decode();
+          for (const rawLine of buf.split('\n')) {
+            if (processLine(rawLine)) {
+              sawDone = true;
+              break;
+            }
+          }
+          break;
+        }
+        buf += decoder.decode(value, { stream: true });
+        const lines = buf.split('\n');
+        buf = lines.pop() ?? '';
+        for (const rawLine of lines) {
+          if (processLine(rawLine)) {
+            sawDone = true;
+            break;
+          }
+        }
+        if (sawDone) {
+          // [DONE] 수신 — 잔여 스트림 lock 해제 (실패해도 종료에 영향 없음)
+          void reader.cancel().catch(() => {});
+          break;
+        }
+      }
+
+      // [DONE] 없이 연결이 끊긴 경우 — 받은 부분 유지 + 표시
+      if (!sawDone) {
+        notice = acc
+          ? {
+              kind: 'warn',
+              message: '응답이 중단되었습니다 — 받은 부분까지 표시합니다.',
+              retryable: false,
+            }
+          : { kind: 'error', message: '응답을 받지 못했습니다 — 다시 시도하세요.', retryable: true };
+      }
+    } catch (err) {
+      if ((err as Error)?.name === 'AbortError') {
+        // 새 대화 / 대기 취소 / 심층 전환 등 사용자 의도 중단 — 안내 불필요
+        return;
+      }
+      // 스트림 도중 네트워크 에러 — 받은 부분 유지 + 표시
+      notice = acc
+        ? {
+            kind: 'warn',
+            message: '연결이 끊겼습니다 — 받은 부분까지 표시합니다.',
+            retryable: false,
+          }
+        : { kind: 'error', message: '요청에 실패했습니다 — 네트워크를 확인하세요.', retryable: true };
+    } finally {
+      // 스트림 종료 — 대기 타이머/폴링 정리. 첫 바이트에서 이미 정리됐거나
+      // 전환 재전송으로 새 스트림이 추적을 이어받았으면 token 가드로 no-op.
+      stopWaitTracking(waitTok);
+      // abort(새 대화/페이지 이탈) 시에는 push 하지 않음 — 새 대화로 비운
+      // messages 에 이전 스트림 잔여분이 흘러들어가는 race 방지.
+      if (acc && !ctrl.signal.aborted) {
+        messages.push({
+          role: 'assistant',
+          content: acc,
+          sources: accSources.length ? accSources : undefined,
+          partial: accPartial || undefined,
+        });
+      }
+      if (abortCtrl === ctrl) {
+        streaming = false;
+        streamingText = '';
+        deepPhase = null;
+        abortCtrl = null;
+      }
+      persist();
+    }
+  }
+
+  function handleKeydown(e: KeyboardEvent) {
+    // Enter 전송 / Shift+Enter 줄바꿈 (한글 조합 중 전송 방지)
+    if (e.key === 'Enter' && !e.shiftKey && !e.isComposing) {
+      e.preventDefault();
+      sendMessage();
+    }
+  }
+
+  // 마지막 메시지가 user 턴이고 스트리밍 중이 아니면 재시도 가능 상태
+  let canRetry = $derived(
+    !streaming && messages.length > 0 && messages[messages.length - 1].role === 'user'
+  );
+
+  // 입력 길이(전송 기준 = trim 후) — 7,500자부터 카운터 노출, 8,000자 초과 차단
+  let inputLength = $derived(input.trim().length);
+  let overLimit = $derived(inputLength > MAX_MESSAGE_CHARS);
+
+  // 첫 바이트 전 placeholder 문구 — "대기"와 "고장"의 정직한 구분:
+  // 바쁨 확인 = 줄 서는 중 / 비-바쁨 확인 = 생성 준비 중 / 미확인 = 응답 대기 중.
+  // deep 모드는 폴링하지 않으므로 항상 미확인(타이머만) — wake 안내는 헤더 caption.
+  let waitPlaceholder = $derived(
+    deepPhase === 'searching'
+      ? `이드가 문서·뉴스에서 근거를 찾는 중 · ${waitSeconds}초`
+      : dailyBusy === true
+        ? `엔진이 다른 작업을 처리하고 있어요 — 차례가 오면 바로 시작됩니다 (대기 ${waitSeconds}초)`
+        : dailyBusy === false
+          ? `응답 생성 준비 중 · ${waitSeconds}초`
+          : `응답 대기 중 · ${waitSeconds}초`
+  );
+
+  // 행동 버튼 노출: daily 모드 + 첫 바이트 전 + 15초 경과
+  let showWaitActions = $derived(
+    streaming && !streamingText && mode === 'daily' && waitSeconds >= WAIT_ACTIONS_SEC
+  );
+</script>
+
+<svelte:head>
+  <title>이드 - PKM</title>
+</svelte:head>
+
+<div class="h-full flex flex-col">
+  <!-- 헤더: 정체성 + 모드 토글 + 새 대화 -->
+  <div class="shrink-0 border-b border-default bg-sidebar px-4 py-2.5">
+    <div class="max-w-3xl mx-auto flex items-center gap-2 flex-wrap">
+      <h1 class="flex items-center gap-2 text-sm font-extrabold tracking-tight shrink-0">
+        <MessageCircle size={16} class="text-accent" />
+        이드
+      </h1>
+
+      <!-- 모드 segmented 토글: 일상 / 심층 -->
+      <div class="flex rounded-md border border-default overflow-hidden" role="group" aria-label="응답 모드">
+        <button
+          type="button"
+          aria-pressed={mode === 'daily'}
+          onclick={() => (mode = 'daily')}
+          disabled={streaming}
+          title="짧은 질문·일상 대화에 적합"
+          class="px-3 py-1.5 text-xs font-semibold transition-colors disabled:opacity-50
+            {mode === 'daily' ? 'bg-accent text-white' : 'bg-surface text-dim hover:text-text hover:bg-surface-hover'}"
+        >
+          일상
+        </button>
+        <button
+          type="button"
+          aria-pressed={mode === 'deep'}
+          onclick={() => (mode = 'deep')}
+          disabled={streaming}
+          title={DEEP_CAPTION}
+          class="px-3 py-1.5 text-xs font-semibold border-l border-default transition-colors disabled:opacity-50
+            {mode === 'deep' ? 'bg-accent text-white' : 'bg-surface text-dim hover:text-text hover:bg-surface-hover'}"
+        >
+          심층
+        </button>
+      </div>
+
+      <div class="flex-1"></div>
+
+      <Button variant="ghost" size="sm" icon={RotateCcw} onclick={newConversation}>
+        새 대화
+      </Button>
+    </div>
+    {#if mode === 'deep'}
+      <div class="max-w-3xl mx-auto mt-1.5">
+        <p class="text-[11px] text-dim">{DEEP_CAPTION}</p>
+      </div>
+    {/if}
+  </div>
+
+  <!-- 메시지 리스트 -->
+  <div bind:this={scrollEl} class="flex-1 min-h-0 overflow-y-auto px-4 py-4">
+    <div class="max-w-3xl mx-auto flex flex-col gap-3" role="log" aria-live="polite">
+      {#if messages.length === 0 && !streaming}
+        <div class="py-10">
+          <EmptyState
+            icon={MessageCircle}
+            title="이드와 대화를 시작하세요"
+            description="일상 질문은 바로, 장문·무거운 질문은 심층 모드로 물어보세요. 아래 프리셋 칩으로 번역·요약·글 다듬기를 빠르게 시작할 수 있습니다."
+          />
+        </div>
+      {/if}
+
+      {#each messages as msg, i (i)}
+        {#if msg.role === 'user'}
+          <div class="flex justify-end">
+            <div class="max-w-[85%] sm:max-w-[75%] px-3.5 py-2.5 rounded-lg rounded-br-sm bg-accent text-white text-sm whitespace-pre-wrap break-words">
+              {msg.content}
+            </div>
+          </div>
+        {:else}
+          <div class="flex flex-col items-start">
+            <div class="max-w-[85%] sm:max-w-[75%] px-3.5 py-2.5 rounded-lg rounded-bl-sm bg-surface border border-default text-text text-sm whitespace-pre-wrap break-words">
+              {msg.content}
+            </div>
+            {#if msg.sources?.length}
+              <EidEvidenceCard sources={msg.sources} partial={msg.partial ?? false} />
+            {/if}
+          </div>
+        {/if}
+      {/each}
+
+      <!-- 스트리밍 중 assistant 부분 응답 / 첫 바이트 전 대기 표시 -->
+      {#if streaming}
+        <div class="flex justify-start">
+          <div class="max-w-[85%] sm:max-w-[75%] px-3.5 py-2.5 rounded-lg rounded-bl-sm bg-surface border border-default text-text text-sm whitespace-pre-wrap break-words">
+            {#if streamingText}
+              {streamingText}<span class="inline-block w-1.5 h-3.5 ml-0.5 align-middle bg-accent animate-pulse rounded-sm"></span>
+            {:else}
+              <span class="text-dim animate-pulse">{waitPlaceholder}</span>
+            {/if}
+          </div>
+        </div>
+        <!-- 대기 행동 버튼: daily + 15초 경과 — 전환은 명시 클릭만 (자동 fallback 금지) -->
+        {#if showWaitActions}
+          <div class="flex justify-start gap-2">
+            <Button variant="secondary" size="sm" onclick={switchToDeep}>심층으로 전환</Button>
+            <Button variant="ghost" size="sm" onclick={cancelWait}>취소</Button>
+          </div>
+        {/if}
+      {/if}
+
+      <!-- 에러/안내 카드: 자동 fallback 없이 명시 표시만 -->
+      {#if notice}
+        <div
+          class="flex items-start gap-2 px-3.5 py-3 rounded-lg border text-sm
+            {notice.kind === 'warn'
+              ? 'border-warning/30 bg-warning/10 text-warning'
+              : 'border-error/30 bg-error/10 text-error'}"
+        >
+          <AlertCircle size={15} class="mt-0.5 shrink-0" />
+          <div class="flex-1 min-w-0">
+            <p>{notice.message}</p>
+            {#if notice.retryable && canRetry}
+              <Button variant="secondary" size="sm" class="mt-2" onclick={retry}>
+                다시 시도
+              </Button>
+            {/if}
+          </div>
+        </div>
+      {/if}
+    </div>
+  </div>
+
+  <!-- 입력 바 (하단 고정 — 모바일에서도 flex 컬럼 하단에 붙음) -->
+  <div class="shrink-0 border-t border-default bg-sidebar px-4 pt-2 pb-3">
+    <div class="max-w-3xl mx-auto">
+      <!-- 프리셋 칩 -->
+      <div class="flex gap-1.5 overflow-x-auto pb-2">
+        {#each PRESETS as preset (preset.label)}
+          <button
+            type="button"
+            onclick={() => applyPreset(preset.prefix)}
+            class="shrink-0 px-2.5 py-1 rounded-full border border-default bg-surface text-xs text-dim hover:text-text hover:border-accent transition-colors"
+          >
+            {preset.label}
+          </button>
+        {/each}
+      </div>
+
+      <div class="flex items-end gap-2">
+        <textarea
+          bind:this={textareaEl}
+          bind:value={input}
+          onkeydown={handleKeydown}
+          rows="1"
+          placeholder="이드에게 메시지 보내기 (Enter 전송, Shift+Enter 줄바꿈)"
+          class="flex-1 min-w-0 px-3 py-2 rounded-lg text-sm bg-bg text-text placeholder:text-faint border border-default focus:border-accent focus:ring-2 focus:ring-accent-ring outline-none resize-none overflow-y-auto transition-colors"
+        ></textarea>
+        <Button
+          variant="primary"
+          size="md"
+          icon={SendHorizontal}
+          loading={streaming}
+          disabled={!input.trim() || overLimit}
+          onclick={sendMessage}
+          aria-label="전송"
+        >
+          <span class="hidden sm:inline">전송</span>
+        </Button>
+      </div>
+
+      <!-- 글자수 카운터: 한도(8,000자) 근접 시에만 노출, 초과 시 인라인 안내 -->
+      {#if inputLength >= COUNTER_THRESHOLD}
+        <p class="mt-1 text-right text-[11px] {overLimit ? 'text-error' : 'text-dim'}" aria-live="polite">
+          {inputLength.toLocaleString()} / {MAX_MESSAGE_CHARS.toLocaleString()}자{overLimit
+            ? ' — 입력이 너무 깁니다 (8,000자 이내)'
+            : ''}
+        </p>
+      {/if}
+    </div>
+  </div>
+</div>
@@ -7,7 +7,7 @@
  import { goto } from '$app/navigation';
  import { api, getAccessToken } from '$lib/api';
  import { isMdSuccess } from '$lib/utils/mdStatus';
-  import { buildAnchorMap } from '$lib/utils/outlineAnchors';
+  import { resolveAnchorMap } from '$lib/utils/resolveAnchorMap';
  import { addToast } from '$lib/stores/toast';
  import { marked } from 'marked';
  import DOMPurify from 'dompurify';
@@ -30,7 +30,6 @@
  import AnalysisPanel from '$lib/components/AnalysisPanel.svelte';
  import ReadCounter from '$lib/components/ReadCounter.svelte';
  import SectionOutline from '$lib/components/SectionOutline.svelte';
-  import Tabs from '$lib/components/ui/Tabs.svelte';

  marked.use({ mangle: false, headerIds: false });
  function renderMd(text) {
@@ -164,11 +163,12 @@
    }
  });

-  // ── 개요 점프 (outlineAnchors, 경로 A) ──
-  // anchorMap = md_content 의 각 절 heading offset. MarkdownDoc 가 <span id="sec-N"> 주입.
+  // ── 개요 점프 (경로 B: BE char_start primary + string-match 폴백) ──
+  // 이 사이트는 항상 md_content basis(canShowMarkdown && doc.md_content) → trustBE=true.
+  // BE char_start 가 있으면 채택, 비면(non-PASS/미백필) resolveAnchorMap 내부에서 buildAnchorMap 로 폴백.
  let anchorMap = $derived(
    hasSections && canShowMarkdown && doc?.md_content
-      ? buildAnchorMap(doc.md_content, sections).anchors
+      ? resolveAnchorMap(doc.md_content, sections, { trustBE: true }).anchors
      : {}
  );
  let activeKey = $state(null);
@@ -411,11 +411,13 @@
                </span>
              </div>
              {#if doc.md_content || doc.extracted_text}
+                <!-- article = 텍스트 네이티브(markdown 변환 비대상). md_status='skipped' 라도
+                     "Markdown 제외" badge 를 띄우지 않도록 mdStatus 미전달(badge 는 mdStatus 로만 구동). -->
                <MarkdownDoc
                  documentId={doc.id}
                  mdContent={doc.md_content}
                  mdFrontmatter={doc.md_frontmatter}
-                  mdStatus={doc.md_status}
+                  mdStatus={null}
                  mdExtractionError={doc.md_extraction_error}
                  mdExtractionQuality={doc.md_extraction_quality}
                  extractedText={doc.extracted_text}
@@ -457,53 +459,68 @@
        {/if}
      </div>

-      <!-- 오른쪽 — 메타 Tabs [정보 | AI | 관리] (카드 11개 수직 스프롤 해소) -->
-      <aside class="min-w-0">
-        <Card>
-          <Tabs
-            tabs={[
-              { id: 'info', label: '정보' },
-              { id: 'ai', label: 'AI' },
-              { id: 'manage', label: '관리' },
-            ]}
-          >
-            {#snippet children(tab)}
-              <div class="pt-3 space-y-4">
-                {#if tab === 'info'}
-                  {#if doc.category === 'library'}
-                    <ReadCounter
-                      documentId={doc.id}
-                      initialCount={doc.read_count ?? 0}
-                      initialLastReadAt={doc.last_read_at ?? null}
-                    />
-                  {/if}
-                  <FileInfoView {doc} />
-                  <ProcessingStatusView {doc} />
-                {:else if tab === 'ai'}
-                  <AnalysisPanel docId={doc.id} doc={doc} />
-                  <AIClassificationEditor {doc} />
-                  <div>
-                    <h4 class="text-xs font-semibold text-dim uppercase mb-1.5">관련 문서</h4>
-                    <!-- TODO(backend): GET /documents/{id}/related?limit=10 (벡터 유사도) -->
-                    <EmptyState
-                      icon={FileText}
-                      title="추후 지원"
-                      description="관련 문서 추천은 backend 연동 후 제공됩니다."
-                    />
-                  </div>
-                {:else}
-                  <LibraryPathEditor {doc} />
-                  <NoteEditor {doc} />
-                  <EditUrlEditor {doc} />
-                  <TagsEditor {doc} />
-                  <div class="pt-2 border-t border-default">
-                    <DocumentDangerZone {doc} ondelete={handleDocDelete} />
-                  </div>
-                {/if}
-              </div>
-            {/snippet}
-          </Tabs>
-        </Card>
+      <!-- 오른쪽 — 슬림 전역 인사이트 레일 (D3: 탭 게이트 제거, 요약·심층·불일치 상시 노출).
+           정보/관리는 접이(<details>) — 데스크탑은 인사이트 상시, 모바일은 본문 메인 + 열어서 확인. -->
+      <aside class="min-w-0 space-y-3">
+        {#if doc.category === 'library'}
+          <Card>
+            <ReadCounter
+              documentId={doc.id}
+              initialCount={doc.read_count ?? 0}
+              initialLastReadAt={doc.last_read_at ?? null}
+            />
+          </Card>
+        {/if}
+
+        <!-- 요약·분석 — 기본 펼침(데스크탑 상시감, 모바일 접기 가능) -->
+        <details open class="bg-surface border border-default rounded-card overflow-hidden group">
+          <summary class="cursor-pointer list-none flex items-center justify-between px-3.5 py-2.5 text-xs font-semibold text-dim uppercase tracking-wide select-none">
+            <span>요약 · 분석</span>
+            <ChevronRight size={14} class="transition-transform group-open:rotate-90 text-faint" />
+          </summary>
+          <div class="px-3.5 pb-3.5 space-y-4">
+            <AnalysisPanel docId={doc.id} doc={doc} />
+            <AIClassificationEditor {doc} />
+            <div>
+              <h4 class="text-xs font-semibold text-dim uppercase mb-1.5">관련 문서</h4>
+              <!-- TODO(backend): GET /documents/{id}/related?limit=10 (벡터 유사도) — v1 제외(자리만) -->
+              <EmptyState
+                icon={FileText}
+                title="추후 지원"
+                description="관련 문서 추천은 backend 연동 후 제공됩니다."
+              />
+            </div>
+          </div>
+        </details>
+
+        <!-- 문서 정보 — 접이(기본 닫힘) -->
+        <details class="bg-surface border border-default rounded-card overflow-hidden group">
+          <summary class="cursor-pointer list-none flex items-center justify-between px-3.5 py-2.5 text-xs font-semibold text-dim uppercase tracking-wide select-none">
+            <span>문서 정보</span>
+            <ChevronRight size={14} class="transition-transform group-open:rotate-90 text-faint" />
+          </summary>
+          <div class="px-3.5 pb-3.5 space-y-3">
+            <FileInfoView {doc} />
+            <ProcessingStatusView {doc} />
+          </div>
+        </details>
+
+        <!-- 관리 — 접이(기본 닫힘) -->
+        <details class="bg-surface border border-default rounded-card overflow-hidden group">
+          <summary class="cursor-pointer list-none flex items-center justify-between px-3.5 py-2.5 text-xs font-semibold text-dim uppercase tracking-wide select-none">
+            <span>관리</span>
+            <ChevronRight size={14} class="transition-transform group-open:rotate-90 text-faint" />
+          </summary>
+          <div class="px-3.5 pb-3.5 space-y-3">
+            <LibraryPathEditor {doc} />
+            <NoteEditor {doc} />
+            <EditUrlEditor {doc} />
+            <TagsEditor {doc} />
+            <div class="pt-2 border-t border-default">
+              <DocumentDangerZone {doc} ondelete={handleDocDelete} />
+            </div>
+          </div>
+        </details>
      </aside>
    </div>

@@ -0,0 +1,15 @@
+-- 318_document_chunks_char_start.sql
+-- 플랜 ds-outline-anchor-b5 (Path B, g1-t1): hier 절 → md_content 본문 점프용 offset 컬럼.
+--
+-- char_start = md_content 내 heading 라인 시작 offset, **UTF-16 code unit** 기준
+--   (FE outlineAnchors.ts:64 `off += raw.length + 1` / MarkdownDoc.svelte:63 `out.slice(off)` 와 동일 단위).
+-- NULL 허용 = (a) md_content 없음(legacy/news/Path A) (b) window-child(node_type='window') (c) preamble(title NULL).
+--   → jump-target(비-window leaf OR %_split parent)만 NOT NULL 을 받는다(BY DESIGN, B1/B3 완료마커 기준).
+--
+-- 두 backfill 경로 공통 prereq:
+--   - UPDATE-only path(g3-tU, hash_stable): 저장된 hier 행에 char_start 만 UPDATE (DELETE/CASCADE/재임베딩 0).
+--   - re-decompose path(g3-t2, hash_changed): persist INSERT 시 char_start 동봉.
+--
+-- 멱등: ADD COLUMN IF NOT EXISTS + init_db version-skip + pg_advisory_xact_lock. BEGIN/COMMIT 금지(단일 statement).
+
+ALTER TABLE document_chunks ADD COLUMN IF NOT EXISTS char_start INTEGER NULL;
@@ -0,0 +1,19 @@
+-- A-3 (plan crawl-24x7-1): 소스 레지스트리 증축 — additive only.
+-- fetch_method     : rss / rss+page / sitemap+page / page / api / signal-only
+-- fulltext_policy  : none(현행 유지) / page(기사 페이지 fetch 후 4-tier 승격) / feed-full(피드 본문이 전문)
+-- auth_profile     : NULL=공개, 값=구독 세션 키 (B-3 Playwright 어댑터용 슬롯)
+-- poll_interval_minutes : 소스별 차등 폴링 (NULL=전역 6h 사이클)
+-- etag / last_modified  : 조건부 GET 워터마크 — 받은 그대로 저장·재전송 (상태는 전부 DB, APScheduler in-process)
+-- feed_content_hash     : CDN ETag 회전 대비 콘텐츠 해시 변경감지 병행
+-- selector_override     : 추출 실패 잦은 소스의 site-specific CSS selector (JSONB)
+-- parser_quirk          : rdf / table-strip / gn-redirect 등 파서 특이 케이스
+ALTER TABLE news_sources
+    ADD COLUMN IF NOT EXISTS fetch_method VARCHAR(20) NOT NULL DEFAULT 'rss',
+    ADD COLUMN IF NOT EXISTS fulltext_policy VARCHAR(20) NOT NULL DEFAULT 'none',
+    ADD COLUMN IF NOT EXISTS auth_profile VARCHAR(50),
+    ADD COLUMN IF NOT EXISTS poll_interval_minutes INTEGER,
+    ADD COLUMN IF NOT EXISTS etag TEXT,
+    ADD COLUMN IF NOT EXISTS last_modified TEXT,
+    ADD COLUMN IF NOT EXISTS feed_content_hash VARCHAR(64),
+    ADD COLUMN IF NOT EXISTS selector_override JSONB,
+    ADD COLUMN IF NOT EXISTS parser_quirk VARCHAR(30);
@@ -0,0 +1,3 @@
+-- 0-5 (a) 확정 (plan crawl-24x7-1): 도메인 자료(안전/공학/철학) 채널 신설 — news 와 분리.
+-- 신규 값은 같은 트랜잭션 내 사용 금지 (PG 제약) — 본 배치의 다른 마이그레이션은 'crawl' 미사용.
+ALTER TYPE source_channel ADD VALUE IF NOT EXISTS 'crawl';
@@ -0,0 +1,3 @@
+-- A-2 (plan crawl-24x7-1): RSS 요약 → 기사 페이지 fetch → 4-tier 본문 승격 stage.
+-- fulltext_policy='page' 소스의 기사에만 news_collector 가 enqueue.
+ALTER TYPE process_stage ADD VALUE IF NOT EXISTS 'fulltext';
@@ -0,0 +1,19 @@
+-- A-5 (plan crawl-24x7-1): 소스 건강 — 소스별 실패 격리 기록 + circuit breaker.
+-- 한 소스가 죽어도 나머지 영향 0. silent skip 누적 방지의 가시성 기반 (A-8 패널이 읽음).
+-- circuit_state: closed(정상) / open(연속 실패로 지수 backoff 중) / disabled(M회 초과, 수동 복구 대상)
+-- empty_streak : 200 인데 entries 0 인 연속 fetch 횟수 (피드 부패 감시 — 304/해시동일은 미집계)
+CREATE TABLE IF NOT EXISTS source_health (
+    id SERIAL PRIMARY KEY,
+    source_id INTEGER NOT NULL REFERENCES news_sources(id) ON DELETE CASCADE,
+    consecutive_failures INTEGER NOT NULL DEFAULT 0,
+    total_fetches BIGINT NOT NULL DEFAULT 0,
+    total_failures BIGINT NOT NULL DEFAULT 0,
+    last_success_at TIMESTAMPTZ,
+    last_error TEXT,
+    last_error_at TIMESTAMPTZ,
+    last_fetch_items INTEGER,
+    empty_streak INTEGER NOT NULL DEFAULT 0,
+    circuit_state VARCHAR(10) NOT NULL DEFAULT 'closed',
+    circuit_opened_at TIMESTAMPTZ,
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
@@ -0,0 +1,2 @@
+-- A-5: source_health 는 news_sources 와 1:1 — upsert 기준 키.
+CREATE UNIQUE INDEX IF NOT EXISTS uq_source_health_source_id ON source_health (source_id);
@@ -0,0 +1,5 @@
+-- B/C 그룹 (plan crawl-24x7-1, 0-5 확정): 레지스트리에 채널 컬럼 — additive only.
+-- documents.source_channel 과 동일 enum 재사용 ('crawl' 값은 320 에서 별도 트랜잭션으로 추가 완료).
+-- 기존 행 전부 'news' 기본값 = 무회귀. crawl 채널 소스의 문서 생성/색인 게이트 분기 기준.
+ALTER TABLE news_sources
+    ADD COLUMN IF NOT EXISTS source_channel source_channel NOT NULL DEFAULT 'news';
@@ -0,0 +1,8 @@
+-- B-3 (plan crawl-24x7-1): 구독 세션 상태 노출 계약 — additive only.
+-- relogin_requested: 쓰기 1종 플래그 (A-8 버튼이 기록, 어댑터가 소비 = 수동 half-open).
+--   소비 위치 함정(r5 고정): open-스킵 분기보다 앞 — 어댑터 틱마다 확인.
+-- last_probe_at/ok: 내용 기반 probe 결과 (시간 기반 만료 판정 금지 — silent corruption 차단).
+ALTER TABLE source_health
+    ADD COLUMN IF NOT EXISTS relogin_requested BOOLEAN NOT NULL DEFAULT FALSE,
+    ADD COLUMN IF NOT EXISTS last_probe_at TIMESTAMPTZ,
+    ADD COLUMN IF NOT EXISTS last_probe_ok BOOLEAN;
@@ -0,0 +1,33 @@
+-- crawl-24x7 사이클 2 소스 seed (B-2 + C-1 안전 + C-5 철학) — 2026-06-10 전 URL live 검증.
+-- 262 선례: WHERE NOT EXISTS idempotent, 기존 행 보존, 신규만 insert (단일 statement).
+-- 채널: news = 다이제스트/브리핑 대상 / crawl = 도메인 재료 (0-5 분리).
+-- 정책: feed-full = 피드 본문이 전문 (UK HSE content:encoded 실측) / page = 기사 페이지 4-tier 승격.
+-- EU-OSHA 는 후보 등재만 (enabled=false — 카드 C-1 '우선순위 낮음').
+-- 르몽드 B-3 활성화는 seed 아님 — 세션 박제 후 runtime UPDATE (auth_profile/selector_override).
+INSERT INTO news_sources
+    (name, country, language, feed_type, feed_url, category, enabled,
+     fetch_method, fulltext_policy, source_channel, parser_quirk)
+SELECT v.name, v.country, v.language, v.feed_type, v.feed_url, v.category, v.enabled,
+       v.fetch_method, v.fulltext_policy, v.source_channel::source_channel, v.parser_quirk
+FROM (VALUES
+    -- B-2: Guardian Open Platform (전문 JSON — 스크래핑 불요, GUARDIAN_API_KEY 필요)
+    ('Guardian World',          'GB', 'en', 'api', 'https://content.guardianapis.com/search?section=world',      'International', true,  'api',  'none',      'news',  NULL),
+    -- C-1 안전 (Safety)
+    ('UK HSE Press',            'GB', 'en', 'rss', 'https://press.hse.gov.uk/feed/',                             'Safety',        true,  'rss',  'feed-full', 'crawl', NULL),
+    ('안전신문',                 'KR', 'ko', 'rss', 'https://www.safetynews.co.kr/rss/allArticle.xml',            'Safety',        true,  'rss',  'page',      'crawl', NULL),
+    ('고용노동부 공지',          'KR', 'ko', 'rss', 'https://www.moel.go.kr/rss/notice.do',                       'Safety',        true,  'rss',  'page',      'crawl', NULL),
+    ('고용노동부 정책',          'KR', 'ko', 'rss', 'https://www.moel.go.kr/rss/policy.do',                       'Safety',        true,  'rss',  'page',      'crawl', NULL),
+    ('고용노동부 입법행정예고',   'KR', 'ko', 'rss', 'https://www.moel.go.kr/rss/lawinfo.do',                      'Safety',        true,  'rss',  'page',      'crawl', NULL),
+    ('OSHA QuickTakes',         'US', 'en', 'rss', 'https://www.osha.gov/sites/default/files/quicktakes.xml',    'Safety',        true,  'rss',  'page',      'crawl', NULL),
+    ('EU-OSHA News',            'EU', 'en', 'rss', 'https://osha.europa.eu/en/rss-feeds/latest/news.xml',        'Safety',        false, 'rss',  'page',      'crawl', NULL),
+    -- C-5 철학 (Philosophy)
+    ('SEP 신규·개정',            'US', 'en', 'rss', 'https://plato.stanford.edu/rss/sep.xml',                     'Philosophy',    true,  'rss',  'page',      'crawl', NULL),
+    ('1000-Word Philosophy',    'US', 'en', 'rss', 'https://1000wordphilosophy.com/feed/',                       'Philosophy',    true,  'rss',  'feed-full', 'crawl', NULL),
+    ('Doing Philosophy',        'KR', 'ko', 'rss', 'https://doingphilosophy.kr/feed',                            'Philosophy',    true,  'rss',  'page',      'crawl', NULL),
+    ('Aeon',                    'GB', 'en', 'rss', 'https://aeon.co/feed.rss',                                   'Philosophy',    true,  'rss',  'page',      'crawl', 'skip-video'),
+    ('Psyche',                  'GB', 'en', 'rss', 'https://psyche.co/feed.rss',                                 'Philosophy',    true,  'rss',  'page',      'crawl', 'skip-video')
+) AS v(name, country, language, feed_type, feed_url, category, enabled,
+       fetch_method, fulltext_policy, source_channel, parser_quirk)
+WHERE NOT EXISTS (
+    SELECT 1 FROM news_sources ns WHERE ns.name = v.name
+);
@@ -0,0 +1,32 @@
+-- crawl-24x7 사이클 3 소스 seed (B-4 시그널 + C-4 공학 지속수집) — 2026-06-11 전 URL live 검증.
+-- 326 선례: WHERE NOT EXISTS idempotent, 기존 행 보존, 신규만 insert (단일 statement).
+-- fetch_method='signal-only' (B-4): 헤드라인+요약만 인제스트, 페이지 fetch 0,
+--   summarize 스킵(검색 색인만 — embed/chunk). 다이제스트는 ai_summary NULL 제외라 자연 배제.
+-- Bloomberg = anti-bot 최강이라 본문 수집 비권고 → 시그널 전용. 피드에 비디오 혼재 실측 → skip-video.
+-- Economist = 실측 200 (Archiver UA 는 feed-reader 로 취급됨 — 브라우저 UA 만 403). 구독 없음 = 시그널.
+-- Nikkei Asia = RSS 1.0(RDF) 실측 — feedparser 가 네이티브 정규화 (title/link 만, 요약·날짜 없음
+--   = 제목 시그널). 코드 분기 불요 (tests/test_crawl_cycle3_shapes.py fixture 회귀로 박제).
+-- arXiv/ASME = 초록이 곧 본문 (C-4 2단: 초록 색인 먼저, 선별 전문은 Phase 3) → signal-only 재사용.
+-- IEEE Spectrum = 피드 description 이 전문 (7.9~14K자 실측) → feed-full. 카테고리 필터 = topic 피드.
+INSERT INTO news_sources
+    (name, country, language, feed_type, feed_url, category, enabled,
+     fetch_method, fulltext_policy, source_channel, parser_quirk)
+SELECT v.name, v.country, v.language, v.feed_type, v.feed_url, v.category, v.enabled,
+       v.fetch_method, v.fulltext_policy, v.source_channel::source_channel, v.parser_quirk
+FROM (VALUES
+    -- B-4: 시그널 전용 (news 채널 — 헤드라인 시그널)
+    ('Bloomberg Markets',        'US', 'en', 'rss', 'https://feeds.bloomberg.com/markets/news.rss',                                              'Economy',       true, 'signal-only', 'none',      'news',  'skip-video'),
+    ('Bloomberg Technology',     'US', 'en', 'rss', 'https://feeds.bloomberg.com/technology/news.rss',                                           'Technology',    true, 'signal-only', 'none',      'news',  'skip-video'),
+    ('Economist Latest',         'GB', 'en', 'rss', 'https://www.economist.com/latest/rss.xml',                                                  'International', true, 'signal-only', 'none',      'news',  NULL),
+    ('Nikkei Asia',              'JP', 'en', 'rss', 'https://asia.nikkei.com/rss/feed/nar',                                                      'International', true, 'signal-only', 'none',      'news',  NULL),
+    -- C-4: 공학 지속수집 (crawl 채널 — 도메인 재료. API 공지/CSB/CCPS 는 전용 워커가 runtime 등록)
+    ('ASME J. Pressure Vessel Technology', 'US', 'en', 'rss', 'https://asmedigitalcollection.asme.org/rss/site_1000037/LatestOpenIssueArticles_1000020.xml', 'Engineering', true, 'signal-only', 'none', 'crawl', NULL),
+    ('arXiv cond-mat.mtrl-sci',  'US', 'en', 'rss', 'https://rss.arxiv.org/rss/cond-mat.mtrl-sci',                                               'Engineering',   true, 'signal-only', 'none',      'crawl', NULL),
+    ('arXiv physics.app-ph',     'US', 'en', 'rss', 'https://rss.arxiv.org/rss/physics.app-ph',                                                  'Engineering',   true, 'signal-only', 'none',      'crawl', NULL),
+    ('IEEE Spectrum Energy',     'US', 'en', 'rss', 'https://spectrum.ieee.org/feeds/topic/energy.rss',                                          'Engineering',   true, 'rss',         'feed-full', 'crawl', NULL),
+    ('IEEE Spectrum Robotics',   'US', 'en', 'rss', 'https://spectrum.ieee.org/feeds/topic/robotics.rss',                                        'Engineering',   true, 'rss',         'feed-full', 'crawl', NULL)
+) AS v(name, country, language, feed_type, feed_url, category, enabled,
+       fetch_method, fulltext_policy, source_channel, parser_quirk)
+WHERE NOT EXISTS (
+    SELECT 1 FROM news_sources ns WHERE ns.name = v.name
+);
@@ -0,0 +1,8 @@
+-- Phase 2A (embedding-phase2a-1 E-1): 후보 임베딩 docs 섀도 테이블 (eval 전용, 단일 statement).
+-- 평가 = exact scan 이라 벡터 인덱스 없음 (인덱스 전략 = C-1 컷오버 소관).
+CREATE TABLE IF NOT EXISTS documents_cand_qwen06 (
+    doc_id BIGINT PRIMARY KEY,
+    embed_input_hash TEXT,
+    embedding vector(1024) NOT NULL,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT now()
+);
@@ -0,0 +1,10 @@
+-- Phase 2A (embedding-phase2a-1 E-1): 후보 임베딩 chunks 섀도 테이블 (eval 전용, 단일 statement).
+CREATE TABLE IF NOT EXISTS document_chunks_cand_qwen06 (
+    id BIGINT PRIMARY KEY,
+    doc_id BIGINT NOT NULL,
+    chunk_index INTEGER,
+    section_title TEXT,
+    text TEXT,
+    embedding vector(1024) NOT NULL,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT now()
+);
@@ -0,0 +1,8 @@
+-- Phase 2A (embedding-phase2a-1 E-1): 후보 임베딩 docs 섀도 테이블 (eval 전용, 단일 statement).
+-- 평가 = exact scan 이라 벡터 인덱스 없음 (인덱스 전략 = C-1 컷오버 소관).
+CREATE TABLE IF NOT EXISTS documents_cand_qwen4 (
+    doc_id BIGINT PRIMARY KEY,
+    embed_input_hash TEXT,
+    embedding vector(2560) NOT NULL,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT now()
+);
@@ -0,0 +1,10 @@
+-- Phase 2A (embedding-phase2a-1 E-1): 후보 임베딩 chunks 섀도 테이블 (eval 전용, 단일 statement).
+CREATE TABLE IF NOT EXISTS document_chunks_cand_qwen4 (
+    id BIGINT PRIMARY KEY,
+    doc_id BIGINT NOT NULL,
+    chunk_index INTEGER,
+    section_title TEXT,
+    text TEXT,
+    embedding vector(2560) NOT NULL,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT now()
+);
--- a/Show More
+++ b/Show More