feat: NanoClaude Phase 2 — EXAONE→Gemma 파이프라인, 큐, 상태 API

- ModelAdapter: 범용 OpenAI-compat 어댑터 (stream/complete/health)
- BackendRegistry: rewriter(EXAONE) + reasoner(Gemma4) 헬스체크 루프
- 2단계 파이프라인: EXAONE rewrite → Gemma reasoning (SSE rewrite 이벤트 노출)
- Fallback: 맥미니 다운 시 EXAONE 단독 모드, stream 중간 실패 시 자동 전환
- Cancel-safe: rewrite 전/후, streaming loop 내, fallback 경로 모두 체크
- Rewrite heartbeat: complete_chat 대기 중 2초 간격 processing 이벤트
- JobQueue: Semaphore(3) 기반 동시성 제한, 정확한 queue position
- GET /chat/{job_id}/status, GET /queue/stats 엔드포인트
- DB: rewrite_model, reasoning_model, rewritten_message 컬럼 추가

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hyungi Ahn
2026-04-06 12:04:15 +09:00
parent 8c41a5dead
commit c4c32170f1
14 changed files with 495 additions and 141 deletions

View File

@@ -1,15 +1,13 @@
"""Chat router — POST /chat, GET /chat/{job_id}/stream, POST /chat/{job_id}/cancel."""
"""Chat router — POST /chat, GET /chat/{job_id}/stream, GET /chat/{job_id}/status, POST /chat/{job_id}/cancel."""
from __future__ import annotations
import asyncio
from fastapi import APIRouter, HTTPException
from fastapi.responses import StreamingResponse
from models.schemas import CancelResponse, ChatRequest, ChatResponse
from services import worker
from models.schemas import CancelResponse, ChatRequest, ChatResponse, JobStatusResponse
from services.job_manager import job_manager
from services.job_queue import job_queue
from services.state_stream import state_stream
router = APIRouter(tags=["chat"])
@@ -17,13 +15,10 @@ router = APIRouter(tags=["chat"])
@router.post("/chat", response_model=ChatResponse)
async def create_chat(body: ChatRequest):
"""job_id 즉시 반환 (ACK). 백그라운드에서 EXAONE 처리 시작."""
"""job_id 즉시 반환 (ACK). 백그라운드에서 파이프라인 처리 시작."""
job = job_manager.create(body.message)
state_stream.create(job.id)
task = asyncio.create_task(worker.run(job))
job_manager.attach_task(job.id, task)
await job_queue.submit(job)
return ChatResponse(job_id=job.id)
@@ -52,6 +47,22 @@ async def _stream_with_cleanup(job_id: str):
state_stream.cleanup(job_id)
@router.get("/chat/{job_id}/status", response_model=JobStatusResponse)
async def job_status(job_id: str):
"""job 상태 조회 (SSE 없이)."""
job = job_manager.get(job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
return JobStatusResponse(
job_id=job.id,
status=job.status,
created_at=job.created_at,
pipeline=job.pipeline,
queue_position=job_queue.position(job.id) if job_queue else None,
)
@router.post("/chat/{job_id}/cancel", response_model=CancelResponse)
async def cancel_chat(job_id: str):
"""진행 중인 job 취소."""
@@ -59,3 +70,11 @@ async def cancel_chat(job_id: str):
if not success:
raise HTTPException(status_code=404, detail="Job not found or already finished")
return CancelResponse(status="cancelled")
@router.get("/queue/stats")
async def queue_stats():
"""큐 통계."""
if job_queue:
return job_queue.stats
return {"pending": 0, "active": 0}