feat(worker-pool): Registry-1B Pull 활성화 (auth + worker_jobs + 5 endpoint)
worker-pool-policy §B 1B 영역 완료. 1A scaffold (mig 270~274 + 503 stub) 위에:
- mig 275/276: worker_jobs (status CHECK + user_id=owner) + pending partial index
- create_laptop_worker_bot_token + require_worker_user dependency (voice-memo 동형)
- /internal/worker/{register,heartbeat,claim,result,drain} 5 endpoint 실 구현
- /claim FOR UPDATE SKIP LOCKED + 204 body 0
- /result 소유권 검증 (worker_id 매칭, 404) + failed 재시도 (attempts/max)
- explicit failure 시 request.result 무시 (DB result NULL 유지)
- 테스트 22 항목 7 파일
policy §B.2 5 invariant 보존: voice-memo wrapper 변경 0, drain advisory,
result raw JSONB, ProcessingQueue 무변경, 운영 자동 분기 변경 0.
활용처 (recap context + /jobs/recap + payload 100KB guard) = Registry-1C 영역.
stale recovery / 노트북 client / canonical promote = Notebook-Pilot-1 영역.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -15,6 +15,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from core.auth import (
|
||||
REFRESH_TOKEN_EXPIRE_DAYS,
|
||||
create_access_token,
|
||||
create_laptop_worker_bot_token,
|
||||
create_refresh_token,
|
||||
create_voice_memo_bot_token,
|
||||
decode_token,
|
||||
@@ -124,6 +125,11 @@ async def login(
|
||||
if bot_token is not None:
|
||||
return AccessTokenResponse(access_token=bot_token)
|
||||
|
||||
# PR-Worker-Pool-Registry-1B — laptop-worker-bot 한정 long-expiry token (voice-memo 분기 우선 평가).
|
||||
laptop_bot_token = create_laptop_worker_bot_token(user.username)
|
||||
if laptop_bot_token is not None:
|
||||
return AccessTokenResponse(access_token=laptop_bot_token)
|
||||
|
||||
# refresh token → HttpOnly cookie
|
||||
_set_refresh_cookie(response, create_refresh_token(user.username))
|
||||
|
||||
|
||||
+240
-27
@@ -1,49 +1,262 @@
|
||||
"""PR-Worker-Pool-Registry-1A scaffold: /internal/worker/* 라우트군 503 stub.
|
||||
"""PR-Worker-Pool-Registry-1B: /internal/worker/* 5 endpoint 실 구현.
|
||||
|
||||
worker-pool-policy §8 의 5개 라우트 (register/heartbeat/claim/result/drain) 자리잡기.
|
||||
실 동작 = PR-Worker-Pool-Registry-1B (laptop-worker-bot user + worker_jobs table + recap).
|
||||
worker-pool-policy §B.2 invariant 매핑:
|
||||
- inv 2: drain = heartbeat INSERT only (advisory). claim 거부 = Notebook-Pilot-1.
|
||||
- inv 3: /result result = raw JSONB only. canonical promote 0.
|
||||
- inv 4: ProcessingQueue 무변경 — worker_jobs 별 table.
|
||||
- inv 5: 운영 자동 분기 변경 0 — heartbeat alive 판정 SQL 부재, classify_worker/queue_consumer touch 0.
|
||||
|
||||
1A 시점에는:
|
||||
- 인증 dependency 없음 (503 first response 라 attack surface 0)
|
||||
- Pydantic schema 없음 (1B 활성화 시 추가)
|
||||
- 모든 endpoint = HTTP 503 + detail
|
||||
사용자 review 정정 5개 (2026-05-19):
|
||||
- #1: worker_jobs.user_id = job owner (실 사용자). worker 인증은 worker_id + JWT 별도.
|
||||
- #2: /result 소유권 검증 (WHERE id AND worker_id AND status='processing'). 매칭 0건 → 404.
|
||||
- #3: explicit failed 재시도 (attempts<max → pending 복귀, attempts>=max → final failed).
|
||||
- #4: /claim 204 = Response(status_code=204) body 0.
|
||||
- #5: mig 275 status CHECK ('pending','processing','completed','failed').
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, HTTPException, status
|
||||
from datetime import datetime, timezone
|
||||
from typing import Annotated, Any
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Response, status
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import select, update
|
||||
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from core.auth import require_worker_user
|
||||
from core.database import get_session
|
||||
from models.worker_pool import WorkerCapability, WorkerHeartbeat, WorkerJob
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
def _stub_503(endpoint: str) -> None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
||||
detail=(
|
||||
f"/internal/worker/{endpoint} disabled "
|
||||
"(Registry-1A stub; activates in Registry-1B)"
|
||||
),
|
||||
)
|
||||
# ─── Pydantic schemas ───
|
||||
|
||||
|
||||
class WorkerRegisterRequest(BaseModel):
|
||||
worker_id: str
|
||||
device_label: str
|
||||
worker_class: str
|
||||
tier: str
|
||||
capabilities: list[str] = []
|
||||
models_loaded: list[str] = []
|
||||
endpoint: str | None = None
|
||||
|
||||
|
||||
class WorkerHeartbeatRequest(BaseModel):
|
||||
worker_id: str
|
||||
status: str # starting/available/busy/draining
|
||||
current_job_id: int | None = None
|
||||
battery: str | None = None
|
||||
thermal: str | None = None
|
||||
raw_payload: dict[str, Any] = {}
|
||||
|
||||
|
||||
class WorkerClaimRequest(BaseModel):
|
||||
worker_id: str
|
||||
job_type: str
|
||||
|
||||
|
||||
class WorkerClaimResponse(BaseModel):
|
||||
id: int
|
||||
job_type: str
|
||||
payload: dict[str, Any]
|
||||
attempts: int
|
||||
|
||||
|
||||
class WorkerResultRequest(BaseModel):
|
||||
job_id: int
|
||||
worker_id: str # 정정 #2 — 소유권 검증
|
||||
status: str # completed | failed
|
||||
result: dict[str, Any] | None = None
|
||||
error_message: str | None = None
|
||||
|
||||
|
||||
class WorkerDrainRequest(BaseModel):
|
||||
worker_id: str
|
||||
reason: str | None = None
|
||||
|
||||
|
||||
# ─── 엔드포인트 ───
|
||||
|
||||
|
||||
@router.post("/register")
|
||||
async def register():
|
||||
_stub_503("register")
|
||||
async def register(
|
||||
body: WorkerRegisterRequest,
|
||||
user: Annotated[Any, Depends(require_worker_user)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
):
|
||||
"""worker_capabilities UPSERT — register 또는 capability 갱신."""
|
||||
now = datetime.now(timezone.utc)
|
||||
stmt = pg_insert(WorkerCapability).values(
|
||||
worker_id=body.worker_id,
|
||||
user_id=user.id,
|
||||
device_label=body.device_label,
|
||||
worker_class=body.worker_class,
|
||||
tier=body.tier,
|
||||
capabilities=body.capabilities,
|
||||
models_loaded=body.models_loaded,
|
||||
endpoint=body.endpoint,
|
||||
created_at=now,
|
||||
last_registered_at=now,
|
||||
).on_conflict_do_update(
|
||||
index_elements=["worker_id"],
|
||||
set_={
|
||||
"device_label": body.device_label,
|
||||
"worker_class": body.worker_class,
|
||||
"tier": body.tier,
|
||||
"capabilities": body.capabilities,
|
||||
"models_loaded": body.models_loaded,
|
||||
"endpoint": body.endpoint,
|
||||
"last_registered_at": now,
|
||||
},
|
||||
)
|
||||
await session.execute(stmt)
|
||||
await session.commit()
|
||||
return {"ok": True, "worker_id": body.worker_id}
|
||||
|
||||
|
||||
@router.post("/heartbeat")
|
||||
async def heartbeat():
|
||||
_stub_503("heartbeat")
|
||||
async def heartbeat(
|
||||
body: WorkerHeartbeatRequest,
|
||||
user: Annotated[Any, Depends(require_worker_user)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
):
|
||||
"""worker_heartbeats append-only INSERT.
|
||||
|
||||
inv 5 강제: alive 판정 SQL 부재. 본 endpoint 는 row 추가 + ok 반환만.
|
||||
"""
|
||||
hb = WorkerHeartbeat(
|
||||
worker_id=body.worker_id,
|
||||
status=body.status,
|
||||
current_job_id=body.current_job_id,
|
||||
battery=body.battery,
|
||||
thermal=body.thermal,
|
||||
raw_payload=body.raw_payload,
|
||||
)
|
||||
session.add(hb)
|
||||
await session.commit()
|
||||
return {"ok": True}
|
||||
|
||||
|
||||
@router.post("/claim")
|
||||
async def claim():
|
||||
_stub_503("claim")
|
||||
@router.post(
|
||||
"/claim",
|
||||
responses={
|
||||
200: {"model": WorkerClaimResponse},
|
||||
204: {"description": "queue empty"},
|
||||
},
|
||||
)
|
||||
async def claim(
|
||||
body: WorkerClaimRequest,
|
||||
user: Annotated[Any, Depends(require_worker_user)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
):
|
||||
"""SELECT FOR UPDATE SKIP LOCKED 로 pending job 1건 claim.
|
||||
|
||||
정정 #4: miss → Response(status_code=204) body 0. WorkerClaimResponse | None 회피.
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
stmt = (
|
||||
select(WorkerJob)
|
||||
.where(WorkerJob.status == "pending", WorkerJob.job_type == body.job_type)
|
||||
.order_by(WorkerJob.created_at)
|
||||
.limit(1)
|
||||
.with_for_update(skip_locked=True)
|
||||
)
|
||||
result = await session.execute(stmt)
|
||||
job = result.scalar_one_or_none()
|
||||
if job is None:
|
||||
await session.commit() # FOR UPDATE 트랜잭션 해제
|
||||
return Response(status_code=204)
|
||||
|
||||
job.status = "processing"
|
||||
job.worker_id = body.worker_id
|
||||
job.claimed_at = now
|
||||
job.attempts = job.attempts + 1
|
||||
await session.commit()
|
||||
|
||||
return WorkerClaimResponse(
|
||||
id=job.id,
|
||||
job_type=job.job_type,
|
||||
payload=job.payload,
|
||||
attempts=job.attempts,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/result")
|
||||
async def result():
|
||||
_stub_503("result")
|
||||
async def result(
|
||||
body: WorkerResultRequest,
|
||||
user: Annotated[Any, Depends(require_worker_user)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
):
|
||||
"""job 결과 제출. 정정 #2 (소유권) + #3 (재시도) 강제.
|
||||
|
||||
소유권 검증: WHERE id AND worker_id AND status='processing'. 매칭 0건 → 404.
|
||||
completed: status='completed' + result + completed_at.
|
||||
failed:
|
||||
attempts < max_attempts → status='pending' (worker_id/claimed_at/completed_at NULL).
|
||||
attempts >= max_attempts → status='failed' final + completed_at.
|
||||
result 컬럼 절대 갱신 X — request.result 무시 (failed 시 partial result 저장 차단).
|
||||
"""
|
||||
if body.status not in ("completed", "failed"):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="status must be 'completed' or 'failed'",
|
||||
)
|
||||
|
||||
stmt = select(WorkerJob).where(
|
||||
WorkerJob.id == body.job_id,
|
||||
WorkerJob.worker_id == body.worker_id,
|
||||
WorkerJob.status == "processing",
|
||||
)
|
||||
res = await session.execute(stmt)
|
||||
job = res.scalar_one_or_none()
|
||||
if job is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="job not found or not owned by this worker (or not in processing)",
|
||||
)
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
if body.status == "completed":
|
||||
job.status = "completed"
|
||||
job.result = body.result # raw JSONB (inv 3 — canonical promote 0)
|
||||
job.completed_at = now
|
||||
job.error_message = None
|
||||
else: # failed
|
||||
job.error_message = body.error_message
|
||||
# 정정 #3 정책: result 컬럼 절대 갱신 X (request.result 무시)
|
||||
if job.attempts < job.max_attempts:
|
||||
job.status = "pending"
|
||||
job.worker_id = None
|
||||
job.claimed_at = None
|
||||
job.completed_at = None
|
||||
else:
|
||||
job.status = "failed"
|
||||
job.completed_at = now
|
||||
|
||||
await session.commit()
|
||||
return {"ok": True, "status": job.status, "attempts": job.attempts}
|
||||
|
||||
|
||||
@router.post("/drain")
|
||||
async def drain():
|
||||
_stub_503("drain")
|
||||
async def drain(
|
||||
body: WorkerDrainRequest,
|
||||
user: Annotated[Any, Depends(require_worker_user)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
):
|
||||
"""drain = heartbeat INSERT status='draining' (advisory/audit only, inv 2).
|
||||
|
||||
claim 거부 로직 부재 = Notebook-Pilot-1 영역.
|
||||
"""
|
||||
payload: dict[str, Any] = {}
|
||||
if body.reason:
|
||||
payload["reason"] = body.reason
|
||||
hb = WorkerHeartbeat(
|
||||
worker_id=body.worker_id,
|
||||
status="draining",
|
||||
raw_payload=payload,
|
||||
)
|
||||
session.add(hb)
|
||||
await session.commit()
|
||||
return {"ok": True}
|
||||
|
||||
@@ -51,6 +51,17 @@ def create_voice_memo_bot_token(username: str) -> str | None:
|
||||
return create_access_token(username, expires_minutes=expire_days * 24 * 60)
|
||||
|
||||
|
||||
def create_laptop_worker_bot_token(username: str) -> str | None:
|
||||
# PR-Worker-Pool-Registry-1B — laptop-worker-bot 계정 한정 long-expiry token (voice-memo 동형).
|
||||
if os.getenv("LAPTOP_WORKER_BOT_TOKEN_ENABLED", "false").lower() != "true":
|
||||
return None
|
||||
bot_username = os.getenv("LAPTOP_WORKER_BOT_USERNAME", "laptop-worker-bot")
|
||||
if username != bot_username:
|
||||
return None
|
||||
expire_days = int(os.getenv("LAPTOP_WORKER_BOT_TOKEN_EXPIRE_DAYS", "365"))
|
||||
return create_access_token(username, expires_minutes=expire_days * 24 * 60)
|
||||
|
||||
|
||||
def create_refresh_token(subject: str) -> str:
|
||||
now = datetime.now(timezone.utc)
|
||||
expire = now + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
|
||||
@@ -129,3 +140,21 @@ async def require_admin(
|
||||
detail="관리자 권한 필요",
|
||||
)
|
||||
return user
|
||||
|
||||
|
||||
async def require_worker_user(
|
||||
credentials: Annotated[HTTPAuthorizationCredentials, Depends(security)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
):
|
||||
"""PR-Worker-Pool-Registry-1B — /internal/worker/* 인증.
|
||||
|
||||
laptop-worker-bot 만 허용. voice-memo-bot 또는 일반 사용자 토큰 → 403.
|
||||
"""
|
||||
user = await get_current_user(credentials, session)
|
||||
bot_username = os.getenv("LAPTOP_WORKER_BOT_USERNAME", "laptop-worker-bot")
|
||||
if user.username != bot_username:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="worker user only",
|
||||
)
|
||||
return user
|
||||
|
||||
@@ -1,13 +1,11 @@
|
||||
"""worker_capabilities + worker_heartbeats 테이블 ORM (PR-Worker-Pool-Registry-1A).
|
||||
"""worker_capabilities + worker_heartbeats + worker_jobs 테이블 ORM.
|
||||
|
||||
1A 단계: schema only. 라우트 5개 (register/heartbeat/claim/result/drain) 모두 503 stub.
|
||||
실 활성화 + WorkerJob 모델은 1B 영역. 본 모듈 import 자체는 init_db 가 mig 270~274 적용
|
||||
후 안전 (테이블 존재 보장).
|
||||
1A scaffold (mig 270~274) + 1B 활성화 (mig 275~276). 1B = WorkerJob 신규 + 5 endpoint 실 구현.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import BigInteger, DateTime, ForeignKey, Text
|
||||
from sqlalchemy import BigInteger, DateTime, ForeignKey, SmallInteger, Text
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
@@ -50,3 +48,29 @@ class WorkerHeartbeat(Base):
|
||||
battery: Mapped[str | None] = mapped_column(Text)
|
||||
thermal: Mapped[str | None] = mapped_column(Text)
|
||||
raw_payload: Mapped[dict] = mapped_column(JSONB, default=dict, nullable=False)
|
||||
|
||||
|
||||
class WorkerJob(Base):
|
||||
# user_id = job owner user_id (실 사용자). worker bot 아님. worker 인증은 worker_id+JWT 별도.
|
||||
# result = raw JSONB only (policy §B.2 invariant 3 — canonical promote = Notebook-Pilot-1).
|
||||
__tablename__ = "worker_jobs"
|
||||
|
||||
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
|
||||
user_id: Mapped[int] = mapped_column(
|
||||
BigInteger, ForeignKey("users.id"), nullable=False
|
||||
)
|
||||
job_type: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
status: Mapped[str] = mapped_column(Text, nullable=False, default="pending")
|
||||
worker_id: Mapped[str | None] = mapped_column(
|
||||
Text, ForeignKey("worker_capabilities.worker_id")
|
||||
)
|
||||
payload: Mapped[dict] = mapped_column(JSONB, default=dict, nullable=False)
|
||||
result: Mapped[dict | None] = mapped_column(JSONB)
|
||||
error_message: Mapped[str | None] = mapped_column(Text)
|
||||
attempts: Mapped[int] = mapped_column(SmallInteger, default=0, nullable=False)
|
||||
max_attempts: Mapped[int] = mapped_column(SmallInteger, default=3, nullable=False)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=datetime.now, nullable=False
|
||||
)
|
||||
claimed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
|
||||
Reference in New Issue
Block a user