Files
gpu-services/nanoclaude/services/job_queue.py
Hyungi Ahn c4c32170f1 feat: NanoClaude Phase 2 — EXAONE→Gemma 파이프라인, 큐, 상태 API
- ModelAdapter: 범용 OpenAI-compat 어댑터 (stream/complete/health)
- BackendRegistry: rewriter(EXAONE) + reasoner(Gemma4) 헬스체크 루프
- 2단계 파이프라인: EXAONE rewrite → Gemma reasoning (SSE rewrite 이벤트 노출)
- Fallback: 맥미니 다운 시 EXAONE 단독 모드, stream 중간 실패 시 자동 전환
- Cancel-safe: rewrite 전/후, streaming loop 내, fallback 경로 모두 체크
- Rewrite heartbeat: complete_chat 대기 중 2초 간격 processing 이벤트
- JobQueue: Semaphore(3) 기반 동시성 제한, 정확한 queue position
- GET /chat/{job_id}/status, GET /queue/stats 엔드포인트
- DB: rewrite_model, reasoning_model, rewritten_message 컬럼 추가

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-06 12:04:15 +09:00

58 lines
1.6 KiB
Python

"""JobQueue — Semaphore 기반 동시성 제한."""
from __future__ import annotations
import asyncio
import logging
from services import worker
from services.job_manager import Job, job_manager
from services.state_stream import state_stream
logger = logging.getLogger(__name__)
class JobQueue:
def __init__(self, max_concurrent: int = 3) -> None:
self._semaphore = asyncio.Semaphore(max_concurrent)
self._waiting: list[str] = [] # 대기 중 job_id (순서 보장)
self._active: set[str] = set()
async def submit(self, job: Job) -> asyncio.Task:
task = asyncio.create_task(self._run_with_semaphore(job))
job_manager.attach_task(job.id, task)
return task
async def _run_with_semaphore(self, job: Job) -> None:
self._waiting.append(job.id)
pos = self.position(job.id)
if pos and pos > 0:
await state_stream.push(job.id, "queued", {"position": pos})
try:
async with self._semaphore:
self._waiting.remove(job.id)
self._active.add(job.id)
await worker.run(job)
finally:
self._active.discard(job.id)
def position(self, job_id: str) -> int | None:
try:
return self._waiting.index(job_id) + 1
except ValueError:
return None
@property
def stats(self) -> dict:
return {"pending": len(self._waiting), "active": len(self._active)}
job_queue: JobQueue | None = None
def init_queue(max_concurrent: int = 3) -> JobQueue:
global job_queue
job_queue = JobQueue(max_concurrent)
return job_queue