feat: NanoClaude Phase 2 — EXAONE→Gemma 파이프라인, 큐, 상태 API
- ModelAdapter: 범용 OpenAI-compat 어댑터 (stream/complete/health)
- BackendRegistry: rewriter(EXAONE) + reasoner(Gemma4) 헬스체크 루프
- 2단계 파이프라인: EXAONE rewrite → Gemma reasoning (SSE rewrite 이벤트 노출)
- Fallback: 맥미니 다운 시 EXAONE 단독 모드, stream 중간 실패 시 자동 전환
- Cancel-safe: rewrite 전/후, streaming loop 내, fallback 경로 모두 체크
- Rewrite heartbeat: complete_chat 대기 중 2초 간격 processing 이벤트
- JobQueue: Semaphore(3) 기반 동시성 제한, 정확한 queue position
- GET /chat/{job_id}/status, GET /queue/stats 엔드포인트
- DB: rewrite_model, reasoning_model, rewritten_message 컬럼 추가
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,15 +1,13 @@
|
||||
"""Chat router — POST /chat, GET /chat/{job_id}/stream, POST /chat/{job_id}/cancel."""
|
||||
"""Chat router — POST /chat, GET /chat/{job_id}/stream, GET /chat/{job_id}/status, POST /chat/{job_id}/cancel."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from fastapi.responses import StreamingResponse
|
||||
|
||||
from models.schemas import CancelResponse, ChatRequest, ChatResponse
|
||||
from services import worker
|
||||
from models.schemas import CancelResponse, ChatRequest, ChatResponse, JobStatusResponse
|
||||
from services.job_manager import job_manager
|
||||
from services.job_queue import job_queue
|
||||
from services.state_stream import state_stream
|
||||
|
||||
router = APIRouter(tags=["chat"])
|
||||
@@ -17,13 +15,10 @@ router = APIRouter(tags=["chat"])
|
||||
|
||||
@router.post("/chat", response_model=ChatResponse)
|
||||
async def create_chat(body: ChatRequest):
|
||||
"""job_id 즉시 반환 (ACK). 백그라운드에서 EXAONE 처리 시작."""
|
||||
"""job_id 즉시 반환 (ACK). 백그라운드에서 파이프라인 처리 시작."""
|
||||
job = job_manager.create(body.message)
|
||||
state_stream.create(job.id)
|
||||
|
||||
task = asyncio.create_task(worker.run(job))
|
||||
job_manager.attach_task(job.id, task)
|
||||
|
||||
await job_queue.submit(job)
|
||||
return ChatResponse(job_id=job.id)
|
||||
|
||||
|
||||
@@ -52,6 +47,22 @@ async def _stream_with_cleanup(job_id: str):
|
||||
state_stream.cleanup(job_id)
|
||||
|
||||
|
||||
@router.get("/chat/{job_id}/status", response_model=JobStatusResponse)
|
||||
async def job_status(job_id: str):
|
||||
"""job 상태 조회 (SSE 없이)."""
|
||||
job = job_manager.get(job_id)
|
||||
if not job:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
|
||||
return JobStatusResponse(
|
||||
job_id=job.id,
|
||||
status=job.status,
|
||||
created_at=job.created_at,
|
||||
pipeline=job.pipeline,
|
||||
queue_position=job_queue.position(job.id) if job_queue else None,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/chat/{job_id}/cancel", response_model=CancelResponse)
|
||||
async def cancel_chat(job_id: str):
|
||||
"""진행 중인 job 취소."""
|
||||
@@ -59,3 +70,11 @@ async def cancel_chat(job_id: str):
|
||||
if not success:
|
||||
raise HTTPException(status_code=404, detail="Job not found or already finished")
|
||||
return CancelResponse(status="cancelled")
|
||||
|
||||
|
||||
@router.get("/queue/stats")
|
||||
async def queue_stats():
|
||||
"""큐 통계."""
|
||||
if job_queue:
|
||||
return job_queue.stats
|
||||
return {"pending": 0, "active": 0}
|
||||
|
||||
Reference in New Issue
Block a user