feat(workers): 맥북 M5 Max 분담 배선 — deep 슬롯 + 보류 시멘틱 + queue_drain CLI

plan ds-macbook-offload-1 P2 (Soft Lock 예외 박제 ds-macbook-offload-exec-20260611.md):
- config ai.models.deep optional 슬롯 (라우터 :8890 경유 qwen-macbook, 부재 시 기존 경로)
- AIClient.call_deep + is_deferrable_error + call_deep_or_defer (자동 cloud/맥미니 폴백 0)
- deep_summary_worker: deep 슬롯 시 맥북 경유 (맥미니 mlx gate 미점유) + 실모델 기록
- StageDeferred 보류 시멘틱: 503/connect/read-timeout(sleep 절단) = attempts 미소모 +
  payload.deferred_until 30분 백오프, doc 쓰기는 완주+파싱 후 단일 커밋 (부분 쓰기 0)
- queue_consumer: claim 에 deferred 필터 + StageDeferred 분기
- workers.queue_drain: 수동 burst-drain CLI (summarize/deep_summary, SKIP LOCKED 단건
  claim, per-item 커밋, 보류 시 run 종료, deep 슬롯 필수 가드)
- tests 20건 + 라우터 경유 Qwen 실응답 fixture 박제 (13.2s 라이브)

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
hyungi
2026-06-11 12:55:16 +09:00
parent 9fb3de6e0a
commit 88e5893041
9 changed files with 507 additions and 17 deletions
+32
View File
@@ -0,0 +1,32 @@
{
"id": "chatcmpl-80cd8ddc-7788-4605-b40e-3975fe7e1326",
"object": "chat.completion",
"created": 1781149952,
"model": "/Users/hyungi/mlx-models/Qwen3.6-27B-8bit",
"choices": [
{
"index": 0,
"finish_reason": "stop",
"message": {
"role": "assistant",
"content": "\uc81c\uacf5\ub41c \ubb38\uc11c\ub294 \uc555\ub825\uc6a9\uae30 \uac80\uc0ac\uc758 \uae30\uc900\uc774 \ub418\ub294 \uaddc\uc815\uc744 \uba85\uc2dc\ud558\uace0 \uc788\uc2b5\ub2c8\ub2e4. \ud575\uc2ec \ub0b4\uc6a9\uc740 \uc555\ub825\uc6a9\uae30\uc5d0 \ub300\ud55c \ubaa8\ub4e0 \uac80\uc0ac \uc808\ucc28\uc640 \uae30\uc900\uc774 'ASME Section VIII Div 1'\uc774\ub77c\ub294 \uad6d\uc81c\uc801\uc73c\ub85c \uc778\uc815\ubc1b\ub294 \uc555\ub825\uc6a9\uae30 \uc124\uacc4 \ubc0f \uc81c\uc791 \uaddc\uc815\uc5d0 \ub530\ub77c \uc5c4\uaca9\ud558\uac8c \uc218\ud589\ub418\uc5b4\uc57c \ud55c\ub2e4\ub294 \uac83\uc785\ub2c8\ub2e4. \uc774\ub294 \uc548\uc804\uc131\uacfc \uc2e0\ub8b0\uc131\uc744 \ubcf4\uc7a5\ud558\uae30 \uc704\ud55c \ud544\uc218\uc801\uc778 \uc694\uad6c\uc0ac\ud56d\uc73c\ub85c, \ud574\ub2f9 \uaddc\uc815\uc744 \uc900\uc218\ud568\uc73c\ub85c\uc368 \uc555\ub825\uc6a9\uae30\uc758 \uad6c\uc870\uc801 \ubb34\uacb0\uc131\uacfc \uc6b4\uc601 \uc548\uc804\uc131\uc744 \ud655\ubcf4\ud560 \uc218 \uc788\uc2b5\ub2c8\ub2e4. \ub530\ub77c\uc11c \uad00\ub828 \uc5c5\ubb34 \uc218\ud589 \uc2dc \ubc18\ub4dc\uc2dc \uc774 \uaddc\uc815\uc744 \ucc38\uc870\ud558\uc5ec \uac80\uc0ac\ub97c \uc9c4\ud589\ud574\uc57c \ud569\ub2c8\ub2e4.",
"reasoning": null,
"tool_calls": null,
"tool_call_id": null,
"name": null
},
"logprobs": null
}
],
"usage": {
"prompt_tokens": 44,
"completion_tokens": 118,
"total_tokens": 162,
"prompt_tokens_details": {
"cached_tokens": 0
},
"prompt_tps": 0.0,
"generation_tps": 0.0,
"peak_memory": 29.804702642
}
}
+157
View File
@@ -0,0 +1,157 @@
"""ds-macbook-offload-1 P2-4 — deep 슬롯 라우팅 / 보류(StageDeferred) / drain 가드 테스트.
DB 불요(unit) — AIClient 는 __new__ 로 settings 우회, drain 가드는 settings monkeypatch.
통합(보류 백오프 DB 기록, claim 경합)은 P3-2 E2E 게이트에서 라이브 실측.
fixture = tests/fixtures/qwen_router_chat_completion.json (2026-06-11 라이브 박제 —
라우터 :8890 경유 model=qwen-macbook, production 호출 형상과 동일 body, 13.2s 실측).
"""
import json
from pathlib import Path
from types import SimpleNamespace
import httpx
import pytest
from ai.client import AIClient, call_deep_or_defer, is_deferrable_error
from models.queue import StageDeferred
FIXTURE = Path(__file__).parent / "fixtures" / "qwen_router_chat_completion.json"
def _client(deep_cfg, primary_cfg):
"""settings 비의존 AIClient — __init__ 우회 후 ai 슬롯만 주입."""
client = AIClient.__new__(AIClient)
client.ai = SimpleNamespace(deep=deep_cfg, primary=primary_cfg)
return client
def _http_status_error(status: int) -> httpx.HTTPStatusError:
req = httpx.Request("POST", "http://router:8890/v1/chat/completions")
resp = httpx.Response(status, request=req)
return httpx.HTTPStatusError(f"status {status}", request=req, response=resp)
# ─── is_deferrable_error 분류 ──────────────────────────────────────────────
@pytest.mark.parametrize("exc", [
_http_status_error(503), # 라우터 upstream_cold/editor_busy/warming
httpx.ConnectError("connection refused"), # 맥북 sleep — 연결 자체 불가
httpx.ConnectTimeout("connect timeout"),
httpx.ReadTimeout("read timeout"), # 생성 도중 sleep 절단
httpx.ReadError("connection reset"),
httpx.RemoteProtocolError("server disconnected"),
])
def test_deferrable_errors(exc):
assert is_deferrable_error(exc) is True
@pytest.mark.parametrize("exc", [
_http_status_error(400), # unknown alias 등 — 설정 오류는 보류 아님
_http_status_error(500),
ValueError("parse"),
RuntimeError("boom"),
])
def test_non_deferrable_errors(exc):
assert is_deferrable_error(exc) is False
# ─── call_deep 슬롯 선택 ───────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_call_deep_uses_deep_slot():
deep = SimpleNamespace(model="qwen-macbook")
primary = SimpleNamespace(model="gemma-26b")
client = _client(deep, primary)
captured = {}
async def fake_request(cfg, prompt, system=None):
captured["cfg"] = cfg
return "ok"
client._request = fake_request
assert await client.call_deep("p") == "ok"
assert captured["cfg"] is deep
@pytest.mark.asyncio
async def test_call_deep_falls_back_to_primary_when_slot_absent():
"""슬롯 부재 = 기능 미활성 (방어적 primary — silent 강등이 아니라 기존 경로 그대로)."""
primary = SimpleNamespace(model="gemma-26b")
client = _client(None, primary)
captured = {}
async def fake_request(cfg, prompt, system=None):
captured["cfg"] = cfg
return "ok"
client._request = fake_request
await client.call_deep("p")
assert captured["cfg"] is primary
# ─── call_deep_or_defer 보류 변환 ──────────────────────────────────────────
@pytest.mark.asyncio
@pytest.mark.parametrize("exc", [
_http_status_error(503),
httpx.ConnectError("refused"),
httpx.ReadTimeout("cut mid-generation"),
])
async def test_defer_conversion(exc):
client = _client(SimpleNamespace(model="qwen-macbook"), None)
async def fail_request(cfg, prompt, system=None):
raise exc
client._request = fail_request
with pytest.raises(StageDeferred):
await call_deep_or_defer(client, "p")
@pytest.mark.asyncio
async def test_non_deferrable_propagates():
"""400/일반 오류는 StageDeferred 아님 — 호출자 기존 실패 경로로 전파."""
client = _client(SimpleNamespace(model="qwen-macbook"), None)
async def fail_request(cfg, prompt, system=None):
raise _http_status_error(400)
client._request = fail_request
with pytest.raises(httpx.HTTPStatusError):
await call_deep_or_defer(client, "p")
def test_stage_deferred_carries_backoff():
e = StageDeferred("macbook_unavailable:ConnectError")
assert e.retry_after_minutes == 30
def test_router_fixture_shape():
"""_request 파싱 경로(choices[0].message.content)가 라우터 실응답 형상과 일치하는지 고정."""
data = json.loads(FIXTURE.read_text())
content = data["choices"][0]["message"]["content"]
assert isinstance(content, str) and len(content) > 0
assert data["choices"][0]["message"]["role"] == "assistant"
# 라우터가 alias 를 upstream 로컬 경로로 치환해 응답 — 실처리 모델 추적 가능
assert "Qwen3.6-27B-8bit" in data["model"]
# ─── drain 가드 (silent 강등 금지) ─────────────────────────────────────────
@pytest.mark.asyncio
async def test_drain_requires_deep_slot(monkeypatch):
import workers.queue_drain as qd
monkeypatch.setattr(qd, "settings", SimpleNamespace(ai=SimpleNamespace(deep=None)))
with pytest.raises(SystemExit):
await qd.drain("summarize", 1)
@pytest.mark.asyncio
async def test_drain_rejects_non_drain_stage(monkeypatch):
import workers.queue_drain as qd
monkeypatch.setattr(qd, "settings", SimpleNamespace(ai=SimpleNamespace(deep=object())))
with pytest.raises(SystemExit):
await qd.drain("classify", 1)