- ModelAdapter: 범용 OpenAI-compat 어댑터 (stream/complete/health)
- BackendRegistry: rewriter(EXAONE) + reasoner(Gemma4) 헬스체크 루프
- 2단계 파이프라인: EXAONE rewrite → Gemma reasoning (SSE rewrite 이벤트 노출)
- Fallback: 맥미니 다운 시 EXAONE 단독 모드, stream 중간 실패 시 자동 전환
- Cancel-safe: rewrite 전/후, streaming loop 내, fallback 경로 모두 체크
- Rewrite heartbeat: complete_chat 대기 중 2초 간격 processing 이벤트
- JobQueue: Semaphore(3) 기반 동시성 제한, 정확한 queue position
- GET /chat/{job_id}/status, GET /queue/stats 엔드포인트
- DB: rewrite_model, reasoning_model, rewritten_message 컬럼 추가
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
58 lines
1.6 KiB
Python
58 lines
1.6 KiB
Python
"""JobQueue — Semaphore 기반 동시성 제한."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
|
|
from services import worker
|
|
from services.job_manager import Job, job_manager
|
|
from services.state_stream import state_stream
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class JobQueue:
|
|
def __init__(self, max_concurrent: int = 3) -> None:
|
|
self._semaphore = asyncio.Semaphore(max_concurrent)
|
|
self._waiting: list[str] = [] # 대기 중 job_id (순서 보장)
|
|
self._active: set[str] = set()
|
|
|
|
async def submit(self, job: Job) -> asyncio.Task:
|
|
task = asyncio.create_task(self._run_with_semaphore(job))
|
|
job_manager.attach_task(job.id, task)
|
|
return task
|
|
|
|
async def _run_with_semaphore(self, job: Job) -> None:
|
|
self._waiting.append(job.id)
|
|
pos = self.position(job.id)
|
|
if pos and pos > 0:
|
|
await state_stream.push(job.id, "queued", {"position": pos})
|
|
|
|
try:
|
|
async with self._semaphore:
|
|
self._waiting.remove(job.id)
|
|
self._active.add(job.id)
|
|
await worker.run(job)
|
|
finally:
|
|
self._active.discard(job.id)
|
|
|
|
def position(self, job_id: str) -> int | None:
|
|
try:
|
|
return self._waiting.index(job_id) + 1
|
|
except ValueError:
|
|
return None
|
|
|
|
@property
|
|
def stats(self) -> dict:
|
|
return {"pending": len(self._waiting), "active": len(self._active)}
|
|
|
|
|
|
job_queue: JobQueue | None = None
|
|
|
|
|
|
def init_queue(max_concurrent: int = 3) -> JobQueue:
|
|
global job_queue
|
|
job_queue = JobQueue(max_concurrent)
|
|
return job_queue
|