feat: NanoClaude Phase 2 — EXAONE→Gemma 파이프라인, 큐, 상태 API
- ModelAdapter: 범용 OpenAI-compat 어댑터 (stream/complete/health)
- BackendRegistry: rewriter(EXAONE) + reasoner(Gemma4) 헬스체크 루프
- 2단계 파이프라인: EXAONE rewrite → Gemma reasoning (SSE rewrite 이벤트 노출)
- Fallback: 맥미니 다운 시 EXAONE 단독 모드, stream 중간 실패 시 자동 전환
- Cancel-safe: rewrite 전/후, streaming loop 내, fallback 경로 모두 체크
- Rewrite heartbeat: complete_chat 대기 중 2초 간격 processing 이벤트
- JobQueue: Semaphore(3) 기반 동시성 제한, 정확한 queue position
- GET /chat/{job_id}/status, GET /queue/stats 엔드포인트
- DB: rewrite_model, reasoning_model, rewritten_message 컬럼 추가
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
57
nanoclaude/services/job_queue.py
Normal file
57
nanoclaude/services/job_queue.py
Normal file
@@ -0,0 +1,57 @@
|
||||
"""JobQueue — Semaphore 기반 동시성 제한."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
from services import worker
|
||||
from services.job_manager import Job, job_manager
|
||||
from services.state_stream import state_stream
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class JobQueue:
|
||||
def __init__(self, max_concurrent: int = 3) -> None:
|
||||
self._semaphore = asyncio.Semaphore(max_concurrent)
|
||||
self._waiting: list[str] = [] # 대기 중 job_id (순서 보장)
|
||||
self._active: set[str] = set()
|
||||
|
||||
async def submit(self, job: Job) -> asyncio.Task:
|
||||
task = asyncio.create_task(self._run_with_semaphore(job))
|
||||
job_manager.attach_task(job.id, task)
|
||||
return task
|
||||
|
||||
async def _run_with_semaphore(self, job: Job) -> None:
|
||||
self._waiting.append(job.id)
|
||||
pos = self.position(job.id)
|
||||
if pos and pos > 0:
|
||||
await state_stream.push(job.id, "queued", {"position": pos})
|
||||
|
||||
try:
|
||||
async with self._semaphore:
|
||||
self._waiting.remove(job.id)
|
||||
self._active.add(job.id)
|
||||
await worker.run(job)
|
||||
finally:
|
||||
self._active.discard(job.id)
|
||||
|
||||
def position(self, job_id: str) -> int | None:
|
||||
try:
|
||||
return self._waiting.index(job_id) + 1
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
@property
|
||||
def stats(self) -> dict:
|
||||
return {"pending": len(self._waiting), "active": len(self._active)}
|
||||
|
||||
|
||||
job_queue: JobQueue | None = None
|
||||
|
||||
|
||||
def init_queue(max_concurrent: int = 3) -> JobQueue:
|
||||
global job_queue
|
||||
job_queue = JobQueue(max_concurrent)
|
||||
return job_queue
|
||||
Reference in New Issue
Block a user