diff --git a/app/core/config.py b/app/core/config.py
index 5cf3302..fdb1521 100644
--- a/app/core/config.py
+++ b/app/core/config.py
@@ -169,6 +169,14 @@ class Settings(BaseModel):
     # 1 = 구 single-inference 동작. 2 = continuous batching 활용 (llm_gate docstring 참조).
     mlx_gate_concurrency: int = 1
 
+    # digest/briefing 생성 LLM 호출 파라미터 (2026-06-15, 모델 교체 후 타임아웃 단일소스화).
+    # 구 하드코딩 25s(빠른 Gemma 기준)가 Qwen3.6-27B-6bit(콜당 ~90~300s) 교체 sweep 에서
+    # 누락돼 digest 600s 하드캡 초과·briefing 4/4 폴백을 유발 → config 단일소스로 이관.
+    # 동시성은 별 키 아님 — 전역 mlx_gate_concurrency(게이트 단일 budget)가 담당.
+    digest_llm_timeout_s: int = 200
+    digest_llm_attempts: int = 2
+    digest_pipeline_hard_cap_s: int = 1800
+
     # PR-MacMini-Derived-Worker-1: study explanation owner = Mac mini
     # GPU 측은 false 로 설정 (.env), explanation 분기 skip guard 트리거.
     study_explanation_enabled: bool = True
@@ -257,6 +265,9 @@ def load_settings() -> Settings:
 
     pipeline_held_stages: list[str] = []
     mlx_gate_concurrency = 1
+    digest_llm_timeout_s = 200
+    digest_llm_attempts = 2
+    digest_pipeline_hard_cap_s = 1800
     if config_path.exists() and raw and "pipeline" in raw:
         held_raw = (raw.get("pipeline") or {}).get("held_stages") or []
         # 스칼라(문자열) 오기입 시 char-split 방지 — 단일 항목 리스트로 수용.
@@ -269,6 +280,19 @@ def load_settings() -> Settings:
             )
         except (TypeError, ValueError):
             mlx_gate_concurrency = 1
+        _pl = raw.get("pipeline") or {}
+        try:
+            digest_llm_timeout_s = max(1, int(_pl.get("digest_llm_timeout_s", 200)))
+        except (TypeError, ValueError):
+            digest_llm_timeout_s = 200
+        try:
+            digest_llm_attempts = max(1, int(_pl.get("digest_llm_attempts", 2)))
+        except (TypeError, ValueError):
+            digest_llm_attempts = 2
+        try:
+            digest_pipeline_hard_cap_s = max(60, int(_pl.get("digest_pipeline_hard_cap_s", 1800)))
+        except (TypeError, ValueError):
+            digest_pipeline_hard_cap_s = 1800
 
     taxonomy = raw.get("taxonomy", {}) if config_path.exists() and raw else {}
     document_types = raw.get("document_types", []) if config_path.exists() and raw else []
@@ -300,6 +324,9 @@ def load_settings() -> Settings:
         internal_worker_token=internal_worker_token,
         pipeline_held_stages=pipeline_held_stages,
         mlx_gate_concurrency=mlx_gate_concurrency,
+        digest_llm_timeout_s=digest_llm_timeout_s,
+        digest_llm_attempts=digest_llm_attempts,
+        digest_pipeline_hard_cap_s=digest_pipeline_hard_cap_s,
     )
 
 
diff --git a/app/main.py b/app/main.py
index edfa6f7..f4ee993 100644
--- a/app/main.py
+++ b/app/main.py
@@ -64,7 +64,7 @@ async def lifespan(app: FastAPI):
     from workers.csb_collector import run as csb_collector_run
     from workers.api_standards_collector import run as api_standards_run
     from workers.ccps_collector import run as ccps_collector_run
-    from workers.queue_consumer import consume_queue, consume_fast_queue, consume_markdown_queue
+    from workers.queue_consumer import consume_queue, consume_fast_queue, consume_markdown_queue, consume_deep_queue
     from workers.study_queue_consumer import consume_study_queue
     from workers.study_session_queue_consumer import consume_study_session_queue
     from workers.study_memo_card_jobs_consumer import consume_study_memo_card_queue
@@ -101,6 +101,8 @@ async def lifespan(app: FastAPI):
     # 2026-06-12 fast-consumer split: embed/chunk(건당 <1s)를 LLM 사이클에서 분리 —
     # classify(~190s×3)가 사이클을 점유해 벡터 적재가 굶던 구조 캡 해소 (markdown 선례).
     scheduler.add_job(consume_fast_queue, "interval", minutes=1, id="fast_queue_consumer")
+    # 2026-06-15 deep-consumer split: deep_summary(70~300s)를 메인 루프에서 분리 (markdown/fast 선례).
+    scheduler.add_job(consume_deep_queue, "interval", minutes=1, id="deep_queue_consumer")
     scheduler.add_job(watch_inbox, "interval", minutes=5, id="file_watcher")
     scheduler.add_job(cleanup_orphan_uploads, "interval", minutes=10, id="upload_cleanup")
     # PR-4: study_questions 자동 임베딩 (status='none/failed/stale' 행을 batch=10 처리).
diff --git a/app/services/briefing/comparator.py b/app/services/briefing/comparator.py
index 973826a..c0252d5 100644
--- a/app/services/briefing/comparator.py
+++ b/app/services/briefing/comparator.py
@@ -18,12 +18,14 @@ from typing import Any
 import numpy as np
 
 from ai.client import parse_json_response
+from core.config import settings
 from core.utils import setup_logger
 from services.clustering_common import normalize_vector
+from services.search.llm_gate import Priority, acquire_mlx_gate
 
 logger = setup_logger("briefing_comparator")
 
-LLM_CALL_TIMEOUT = 25                 # 초. Phase 4 와 동일
+LLM_CALL_TIMEOUT = settings.digest_llm_timeout_s   # 2026-06-15 config 단일소스 (Phase 4 와 동일 키)
 HISTORICAL_TOP_K = 5
 HISTORICAL_SIMILARITY_MIN = 0.70
 HISTORICAL_WINDOW_DAYS = 30
@@ -39,7 +41,6 @@ MAX_ARTICLE_IDS_PER_COUNTRY = 5       # country_perspectives[].article_ids 후
 FALLBACK_HEADLINE = "LLM 분석 실패로 원문 기사 묶음만 표시합니다."
 FALLBACK_TOPIC_LABEL = "주요 뉴스 묶음"
 
-_llm_sem = asyncio.Semaphore(1)
 _PROMPT_PATH = Path(__file__).resolve().parent.parent.parent / "prompts" / "briefing_comparative.txt"
 _PROMPT_TEMPLATE: str | None = None
 
@@ -112,7 +113,8 @@ def retrieve_historical(
 
 
 async def _try_call_llm(client: Any, prompt: str) -> str:
-    async with _llm_sem:
+    # 전역 MLX gate(BACKGROUND) 경유 — 영구 룰(llm_gate): 새 Semaphore 금지, timeout 은 gate 안쪽.
+    async with acquire_mlx_gate(Priority.BACKGROUND):
         return await asyncio.wait_for(
             client.call_primary(prompt),
             timeout=LLM_CALL_TIMEOUT,
@@ -282,7 +284,7 @@ async def compare_cluster_with_fallback(
     historical_docs = historical_docs or []
     prompt = build_prompt(selected, historical_docs)
 
-    for attempt in range(2):
+    for attempt in range(settings.digest_llm_attempts):  # 2026-06-15 config 단일소스
         try:
             raw = await _try_call_llm(client, prompt)
         except asyncio.TimeoutError:
diff --git a/app/services/briefing/pipeline.py b/app/services/briefing/pipeline.py
index 715b578..3d7df8d 100644
--- a/app/services/briefing/pipeline.py
+++ b/app/services/briefing/pipeline.py
@@ -6,6 +6,7 @@
 regenerate 정책: briefing_date UNIQUE 충돌 시 transaction 안에서 DELETE+INSERT.
 """
 
+import asyncio
 import time
 from datetime import date, datetime, timedelta, timezone
 from typing import Any
@@ -33,7 +34,6 @@ KST = ZoneInfo("Asia/Seoul")
 NIGHT_WINDOW_HOURS = 5             # KST 00:00 ~ 05:00
 SELECT_K = 7                       # Plan §"Clustering 파라미터" briefing K_PER_CLUSTER=7
 SELECT_LAMBDA_MMR = 0.6            # Plan briefing MMR lambda 0.6
-PIPELINE_HARD_CAP = 600            # 초. Phase 4 와 동일
 
 
 def _compute_window(target_date: date | None = None) -> tuple[datetime, datetime, date]:
@@ -206,16 +206,28 @@ async def run_briefing_pipeline(target_date: date | None = None) -> dict[str, An
     usable_count = 0
 
     try:
+        # 2026-06-15: cluster 호출 gather 동시 실행. 실동시성 = 전역 MLX gate
+        # (config.mlx_gate_concurrency, BACKGROUND 우선순위). rank/순서 보존.
+        jobs = []
         for rank, cluster in enumerate(clusters, start=1):
             selected = select_for_llm(cluster, k=SELECT_K, lambda_mmr=SELECT_LAMBDA_MMR)
             historical_docs = (
                 retrieve_historical(cluster, historical_candidates)
                 if historical_enabled() else []
             )
-            llm_calls += 1
-            envelope = await compare_cluster_with_fallback(
+            jobs.append((rank, cluster, selected, historical_docs))
+
+        async def _run_one(cluster, selected, historical_docs):
+            return await compare_cluster_with_fallback(
                 client, cluster, selected, historical_docs=historical_docs
             )
+
+        results = await asyncio.gather(
+            *[_run_one(c, s, h) for (_, c, s, h) in jobs]
+        )
+
+        for (rank, cluster, selected, historical_docs), envelope in zip(jobs, results):
+            llm_calls += 1
             if envelope.get("llm_fallback_used"):
                 llm_failures += 1
             if _is_usable_topic(envelope, envelope["topic_label"]):
diff --git a/app/services/digest/pipeline.py b/app/services/digest/pipeline.py
index e4f45ab..42c9266 100644
--- a/app/services/digest/pipeline.py
+++ b/app/services/digest/pipeline.py
@@ -10,6 +10,7 @@ Step:
     7. start/end 로그 + generation_ms + fallback 비율 health metric
 """
 
+import asyncio
 import hashlib
 import time
 from datetime import datetime, timedelta, timezone
@@ -107,20 +108,29 @@ async def run_digest_pipeline() -> dict:
     stats = {"llm_calls": 0, "fallback_used": 0}
 
     try:
+        # 2026-06-15: cluster 호출을 gather 로 동시 실행. 실제 동시성은 전역 MLX gate
+        # (config.mlx_gate_concurrency, BACKGROUND 우선순위) 가 제한한다. rank/순서 보존.
+        jobs = []
         for country, docs in docs_by_country.items():
             clusters = cluster_country(country, docs)
             if not clusters:
                 continue  # sparse country 자동 제외
-
             for rank, cluster in enumerate(clusters, start=1):
                 selected = select_for_llm(cluster)
-                stats["llm_calls"] += 1
-                llm_result = await summarize_cluster_with_fallback(client, cluster, selected)
-                if llm_result["llm_fallback_used"]:
-                    stats["fallback_used"] += 1
-                all_topic_rows.append(
-                    _build_topic_row(country, rank, cluster, selected, llm_result, primary_model)
-                )
+                jobs.append((country, rank, cluster, selected))
+
+        async def _run_one(cluster, selected):
+            return await summarize_cluster_with_fallback(client, cluster, selected)
+
+        results = await asyncio.gather(*[_run_one(c, s) for (_, _, c, s) in jobs])
+
+        for (country, rank, cluster, selected), llm_result in zip(jobs, results):
+            stats["llm_calls"] += 1
+            if llm_result["llm_fallback_used"]:
+                stats["fallback_used"] += 1
+            all_topic_rows.append(
+                _build_topic_row(country, rank, cluster, selected, llm_result, primary_model)
+            )
     finally:
         await client.close()
 
diff --git a/app/services/digest/summarizer.py b/app/services/digest/summarizer.py
index 173f58c..b0095a7 100644
--- a/app/services/digest/summarizer.py
+++ b/app/services/digest/summarizer.py
@@ -2,8 +2,8 @@
 
 핵심 결정:
 - AIClient._call_chat 직접 호출 (client.py 수정 회피, fallback 로직 재사용)
-- Semaphore(1) 로 MLX 과부하 회피
-- Per-call timeout 25초 (asyncio.wait_for) — MLX hang / fallback Claude API stall 방어
+- 전역 MLX gate(BACKGROUND) 경유로 동시성 제어 (services.search.llm_gate 단일 게이트)
+- Per-call timeout = config.digest_llm_timeout_s (asyncio.wait_for, gate 안쪽)
 - JSON 파싱 실패 → 1회 재시도 → 그래도 실패 시 minimal fallback (drop 금지)
 - fallback: topic_label="주요 뉴스 묶음", summary = top member ai_summary[:200]
 """
@@ -13,15 +13,16 @@ from pathlib import Path
 from typing import Any
 
 from ai.client import parse_json_response
+from core.config import settings
 from core.utils import setup_logger
+from services.search.llm_gate import Priority, acquire_mlx_gate
 
 logger = setup_logger("digest_summarizer")
 
-LLM_CALL_TIMEOUT = 25       # 초. MLX 평균 5초 + tail latency 마진
+# 2026-06-15: config 단일소스 (구 하드코딩 25s = 빠른 Gemma 기준, Qwen 27B 교체 후 누락).
+LLM_CALL_TIMEOUT = settings.digest_llm_timeout_s
 FALLBACK_SUMMARY_LIMIT = 200
 
-_llm_sem = asyncio.Semaphore(1)
-
 _PROMPT_PATH = Path(__file__).resolve().parent.parent.parent / "prompts" / "digest_topic.txt"
 _PROMPT_TEMPLATE: str | None = None
 
@@ -48,8 +49,12 @@ def build_prompt(selected: list[dict]) -> str:
 
 
 async def _try_call_llm(client: Any, prompt: str) -> str:
-    """Semaphore + per-call timeout 으로 감싼 단일 호출."""
-    async with _llm_sem:
+    """전역 MLX gate(BACKGROUND) + per-call timeout 으로 감싼 단일 호출.
+
+    영구 룰(llm_gate): Mac mini endpoint 는 단일 게이트 공유, 새 Semaphore 금지.
+    동시성 lever = config.mlx_gate_concurrency. timeout 은 gate 안쪽에서만.
+    """
+    async with acquire_mlx_gate(Priority.BACKGROUND):
         return await asyncio.wait_for(
             client._call_chat(client.ai.primary, prompt),
             timeout=LLM_CALL_TIMEOUT,
@@ -86,7 +91,7 @@ async def summarize_cluster_with_fallback(
     """
     prompt = build_prompt(selected)
 
-    for attempt in range(2):  # 1회 재시도 포함
+    for attempt in range(settings.digest_llm_attempts):  # config 단일소스 (기본 2 = 1회 재시도)
         try:
             raw = await _try_call_llm(client, prompt)
         except asyncio.TimeoutError:
diff --git a/app/workers/briefing_worker.py b/app/workers/briefing_worker.py
index 679f26f..5ee9fcf 100644
--- a/app/workers/briefing_worker.py
+++ b/app/workers/briefing_worker.py
@@ -14,7 +14,8 @@ from services.briefing.pipeline import run_briefing_pipeline
 
 logger = setup_logger("briefing_worker")
 
-PIPELINE_HARD_CAP = 600
+# 2026-06-15: config 단일소스 (digest 와 공유 키). 구 600s = 빠른 Gemma 기준.
+PIPELINE_HARD_CAP = settings.digest_pipeline_hard_cap_s
 
 
 async def run(target_date: date | None = None) -> dict | None:
diff --git a/app/workers/digest_worker.py b/app/workers/digest_worker.py
index 477a954..dc81490 100644
--- a/app/workers/digest_worker.py
+++ b/app/workers/digest_worker.py
@@ -16,7 +16,8 @@ from services.digest.pipeline import run_digest_pipeline
 
 logger = setup_logger("digest_worker")
 
-PIPELINE_HARD_CAP = 600  # 10분 hard cap
+# 2026-06-15: config 단일소스 (구 600s = 빠른 Gemma 기준, Qwen 27B 교체 후 누락 → 초과).
+PIPELINE_HARD_CAP = settings.digest_pipeline_hard_cap_s
 
 
 async def run() -> None:
diff --git a/app/workers/queue_consumer.py b/app/workers/queue_consumer.py
index 94cbe57..4903ff1 100644
--- a/app/workers/queue_consumer.py
+++ b/app/workers/queue_consumer.py
@@ -47,10 +47,15 @@ MARKDOWN_STALE_THRESHOLD_MINUTES = int(os.getenv("MARKDOWN_STALE_MINUTES", "120"
 # STT 도 장기 작업 가능성이 있으나 본 PR 범위 밖 — main 에 유지(follow-up).
 MAIN_QUEUE_STAGES = [
     "extract", "classify", "summarize",
-    "preview", "stt", "thumbnail", "deep_summary", "fulltext",
+    "preview", "stt", "thumbnail", "fulltext",
 ]
 MARKDOWN_QUEUE_STAGES = ["markdown"]
 
+# 2026-06-15: deep_summary(26B, 콜당 70~300s)를 메인 루프에서 분리 (markdown/fast 선례).
+# 단일 deep 호출이 1분 틱을 초과해 메인 consume_queue 가 영구 coalesce 되고 extract/
+# classify 등 경량 stage 까지 굶던 문제 제거. 집합 disjoint(자기 집합만 stale reset).
+DEEP_QUEUE_STAGES = ["deep_summary"]
+
 # 고속(비-LLM·경량 GPU) stage — LLM 사이클(분 단위)에서 분리해 1분 잡 전용 소비.
 # embed/chunk 는 건당 <1s 라 main 루프에 두면 classify(~190s×3) 뒤에서 굶는다
 # (2026-06-12 실측: 적체 3,570 · 4070 가동률 0%). markdown 분리(05-01)와 동일 패턴.
@@ -405,3 +410,24 @@ async def consume_markdown_queue():
 
     for stage in MARKDOWN_QUEUE_STAGES:
         await _process_stage(stage, workers[stage])
+
+
+async def consume_deep_queue():
+    """deep_summary 전용 큐 소비자 (2026-06-15) — 26B 심층요약을 메인 파이프라인과 분리.
+
+    deep_summary 1콜이 70~300s(맥미니 Qwen 27B 폴백)라 메인 consume_queue(1분 틱) 안에
+    있으면 매 틱이 interval 을 초과해 영구 "maximum running instances" coalesce 되고
+    extract/classify 등 경량 stage 까지 함께 굶었다. 분리 후 = deep 만 자기 1분 잡에서
+    coalesce, 나머지 메인 루프는 틱 내 완료. max_instances=1 로 동시 deep 2건은 방지.
+    """
+    workers = _load_workers()
+
+    try:
+        await reset_stale_items(DEEP_QUEUE_STAGES, STALE_THRESHOLD_MINUTES)
+    except Exception:
+        logger.exception("deep stale reset failed, but continuing queue consumption")
+
+    for stage in DEEP_QUEUE_STAGES:
+        if stage in settings.pipeline_held_stages:
+            continue
+        await _process_stage(stage, workers[stage])
diff --git a/config.yaml b/config.yaml
index 0d9afa3..416a7ea 100644
--- a/config.yaml
+++ b/config.yaml
@@ -199,8 +199,14 @@ schedule:
 # 이력: 2026-06-11 맥미니 모델 확정까지 8키 홀드 → 同日 Qwen3.6-27B-6bit 전환과 함께 해제([]).
 pipeline:
   held_stages: []
-  # mlx gate 동시 실행 상한 (2026-06-12 fair-share): 구 "1 고정" 룰의 전제(single-inference
-  # 서버)가 소멸 — 현 mlx_vlm 은 continuous batching (2026-06-11 밤 6~8 concurrent 실측 정상).
-  # 2 = 워커 LLM 호출과 인터랙티브(ask/eid)가 서로 안 막힘 + 집계 throughput ~1.8배.
-  # 게이트(상한+우선순위)는 유지 — thundering herd 방지. 1 로 되돌리면 구 동작.
-  mlx_gate_concurrency: 2
+  # mlx gate 동시 실행 상한 (config.mlx_gate_concurrency). 현 mlx_vlm = continuous batching
+  # (2026-06-11 밤 6~8 concurrent 실측 정상). 2026-06-15: 2→4 — digest/briefing 합성을
+  # 이 단일 게이트(BACKGROUND 우선순위)로 라우팅하며 digest(클러스터 44~68)가 하드캡 내
+  # 완료되도록 동시성 확보. ask/eid(FOREGROUND)는 큐 점프라 영향 최소. 되돌리면 구 동작.
+  mlx_gate_concurrency: 4
+  # 2026-06-15: digest/briefing 생성 LLM 파라미터 (모델 교체 후 단일소스, 상세 = config.py).
+  # 구 하드코딩 25s(빠른 Gemma)가 Qwen 27B(콜당 ~90~300s) 교체 sweep 누락 → digest 600s
+  # 초과·briefing 4/4 폴백. 동시성은 위 mlx_gate_concurrency 가 담당(별 키 없음).
+  digest_llm_timeout_s: 300
+  digest_llm_attempts: 2
+  digest_pipeline_hard_cap_s: 3000
diff --git a/tests/test_pipeline_hold.py b/tests/test_pipeline_hold.py
index b6613db..5fcefb3 100644
--- a/tests/test_pipeline_hold.py
+++ b/tests/test_pipeline_hold.py
@@ -26,7 +26,8 @@ def _fake_consumer_env(monkeypatch, held):
         lambda: {
             s: object()
             for s in (queue_consumer.MAIN_QUEUE_STAGES
-                      + queue_consumer.FAST_QUEUE_STAGES + ["markdown"])
+                      + queue_consumer.FAST_QUEUE_STAGES
+                      + queue_consumer.DEEP_QUEUE_STAGES + ["markdown"])
         },
     )
     monkeypatch.setattr(queue_consumer, "_hold_logged", False)
@@ -83,13 +84,37 @@ async def test_fast_consumer_respects_hold(monkeypatch):
     assert processed == ["chunk"]
 
 
+@pytest.mark.asyncio
+async def test_deep_consumer_processes_deep_only(monkeypatch):
+    """deep 컨슈머(2026-06-15 분리) = deep_summary 전용 (메인 루프와 디커플)."""
+    processed = _fake_consumer_env(monkeypatch, [])
+
+    await queue_consumer.consume_deep_queue()
+
+    assert processed == ["deep_summary"]
+
+
+@pytest.mark.asyncio
+async def test_deep_consumer_respects_hold(monkeypatch):
+    """deep_summary 홀드 시 deep 컨슈머가 claim 안 함."""
+    processed = _fake_consumer_env(monkeypatch, ["deep_summary"])
+
+    await queue_consumer.consume_deep_queue()
+
+    assert processed == []
+
+
 def test_fast_split_invariants():
-    """세 컨슈머 stage 집합 disjoint + embed/chunk 배치 상향 회귀 가드."""
+    """네 컨슈머 stage 집합 disjoint + embed/chunk 배치 상향 + deep split 회귀 가드."""
     main = set(queue_consumer.MAIN_QUEUE_STAGES)
     fast = set(queue_consumer.FAST_QUEUE_STAGES)
     md = set(queue_consumer.MARKDOWN_QUEUE_STAGES)
+    deep = set(queue_consumer.DEEP_QUEUE_STAGES)
     assert not (main & fast) and not (main & md) and not (fast & md)
+    assert not (main & deep) and not (fast & deep) and not (md & deep)
     assert fast == {"embed", "chunk"}
+    assert deep == {"deep_summary"}
+    assert "deep_summary" not in main  # 2026-06-15 split 회귀 가드
     assert queue_consumer.BATCH_SIZE["embed"] >= 10
     assert queue_consumer.BATCH_SIZE["chunk"] >= 10