feat: NanoClaude Phase 2 — EXAONE→Gemma 파이프라인, 큐, 상태 API
- ModelAdapter: 범용 OpenAI-compat 어댑터 (stream/complete/health)
- BackendRegistry: rewriter(EXAONE) + reasoner(Gemma4) 헬스체크 루프
- 2단계 파이프라인: EXAONE rewrite → Gemma reasoning (SSE rewrite 이벤트 노출)
- Fallback: 맥미니 다운 시 EXAONE 단독 모드, stream 중간 실패 시 자동 전환
- Cancel-safe: rewrite 전/후, streaming loop 내, fallback 경로 모두 체크
- Rewrite heartbeat: complete_chat 대기 중 2초 간격 processing 이벤트
- JobQueue: Semaphore(3) 기반 동시성 제한, 정확한 queue position
- GET /chat/{job_id}/status, GET /queue/stats 엔드포인트
- DB: rewrite_model, reasoning_model, rewritten_message 컬럼 추가
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
"""Worker — background task that drives EXAONE call and pushes SSE events."""
|
||||
"""Worker — 2단계 파이프라인: EXAONE rewrite → Gemma reasoning (cancel-safe + fallback)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -9,23 +9,66 @@ from time import time
|
||||
from config import settings
|
||||
from db.database import log_completion, log_request
|
||||
from models.schemas import JobStatus
|
||||
from services.exaone_adapter import stream_chat
|
||||
from services.backend_registry import backend_registry
|
||||
from services.job_manager import Job, job_manager
|
||||
from services.state_stream import state_stream
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 무응답 방지: 3~5초 간격으로 processing heartbeat
|
||||
HEARTBEAT_INTERVAL = 4.0
|
||||
REWRITE_HEARTBEAT = 2.0
|
||||
MAX_REWRITE_LENGTH = 1000
|
||||
|
||||
|
||||
async def _complete_with_heartbeat(adapter, message: str, job_id: str) -> str:
|
||||
"""complete_chat + heartbeat 병행. rewrite 대기 중 사용자 체감 멈춤 방지."""
|
||||
result_holder: dict[str, str] = {}
|
||||
exc_holder: list[Exception] = []
|
||||
|
||||
async def call():
|
||||
try:
|
||||
result_holder["text"] = await adapter.complete_chat(message)
|
||||
except Exception as e:
|
||||
exc_holder.append(e)
|
||||
|
||||
task = asyncio.create_task(call())
|
||||
while not task.done():
|
||||
await asyncio.sleep(REWRITE_HEARTBEAT)
|
||||
if not task.done():
|
||||
await state_stream.push(job_id, "processing", {"message": "질문을 재구성하고 있습니다..."})
|
||||
|
||||
if exc_holder:
|
||||
raise exc_holder[0]
|
||||
return result_holder.get("text", "")
|
||||
|
||||
|
||||
async def _stream_with_cancel(adapter, message: str, job: Job, collected: list[str]) -> bool:
|
||||
"""스트리밍 + cancel 체크. 정상 완료 시 True, cancel 시 False."""
|
||||
last_heartbeat = asyncio.get_event_loop().time()
|
||||
|
||||
async for chunk in adapter.stream_chat(message):
|
||||
if job.status == JobStatus.cancelled:
|
||||
return False
|
||||
collected.append(chunk)
|
||||
await state_stream.push(job.id, "result", {"content": chunk})
|
||||
|
||||
now = asyncio.get_event_loop().time()
|
||||
if now - last_heartbeat >= HEARTBEAT_INTERVAL:
|
||||
await state_stream.push(job.id, "processing", {"message": "응답 생성 중..."})
|
||||
last_heartbeat = now
|
||||
|
||||
return True
|
||||
|
||||
|
||||
async def run(job: Job) -> None:
|
||||
"""EXAONE 호출 → SSE 이벤트 발행."""
|
||||
"""EXAONE rewrite → Gemma reasoning 파이프라인 (fallback + cancel-safe)."""
|
||||
start_time = time()
|
||||
rewrite_model = None
|
||||
reasoning_model = None
|
||||
rewritten_message = ""
|
||||
|
||||
# DB 로깅: 요청 기록
|
||||
try:
|
||||
await log_request(job.id, job.message, settings.exaone_model, job.created_at)
|
||||
await log_request(job.id, job.message, "pipeline", job.created_at)
|
||||
except Exception:
|
||||
logger.warning("Failed to log request for job %s", job.id, exc_info=True)
|
||||
|
||||
@@ -34,49 +77,91 @@ async def run(job: Job) -> None:
|
||||
await state_stream.push(job.id, "ack", {"message": "요청을 확인했습니다. 분석을 시작합니다."})
|
||||
job_manager.set_status(job.id, JobStatus.processing)
|
||||
|
||||
# --- Processing + Streaming ---
|
||||
await state_stream.push(job.id, "processing", {"message": "EXAONE 모델이 응답을 생성하고 있습니다..."})
|
||||
# --- Cancel 체크 #1 ---
|
||||
if job.status == JobStatus.cancelled:
|
||||
return
|
||||
|
||||
use_pipeline = settings.pipeline_enabled and backend_registry.is_healthy("reasoner")
|
||||
collected: list[str] = []
|
||||
last_heartbeat = asyncio.get_event_loop().time()
|
||||
|
||||
async for chunk in stream_chat(job.message):
|
||||
if not use_pipeline:
|
||||
# === EXAONE 단독 모드 (Phase 1 fallback) ===
|
||||
rewrite_model = backend_registry.rewriter.model
|
||||
await state_stream.push(job.id, "processing", {"message": "EXAONE 모델이 응답을 생성하고 있습니다..."})
|
||||
|
||||
ok = await _stream_with_cancel(backend_registry.rewriter, job.message, job, collected)
|
||||
if not ok:
|
||||
return
|
||||
else:
|
||||
# === 파이프라인 모드: EXAONE rewrite → Gemma reasoning ===
|
||||
rewrite_model = backend_registry.rewriter.model
|
||||
reasoning_model = backend_registry.reasoner.model
|
||||
|
||||
# --- Rewrite ---
|
||||
await state_stream.push(job.id, "processing", {"message": "질문을 재구성하고 있습니다..."})
|
||||
rewrite_start = time()
|
||||
|
||||
try:
|
||||
rewritten_message = await _complete_with_heartbeat(
|
||||
backend_registry.rewriter, job.message, job.id
|
||||
)
|
||||
rewritten_message = rewritten_message[:MAX_REWRITE_LENGTH]
|
||||
except Exception:
|
||||
logger.warning("Rewrite failed for job %s, using original message", job.id)
|
||||
rewritten_message = job.message
|
||||
|
||||
rewrite_latency = (time() - rewrite_start) * 1000
|
||||
job.rewritten_message = rewritten_message
|
||||
|
||||
# --- Rewrite 결과 SSE 노출 ---
|
||||
await state_stream.push(job.id, "rewrite", {"content": rewritten_message})
|
||||
|
||||
# --- Cancel 체크 #2 ---
|
||||
if job.status == JobStatus.cancelled:
|
||||
logger.info("Job %s cancelled during streaming", job.id)
|
||||
await state_stream.push(job.id, "error", {"message": "작업이 취소되었습니다."})
|
||||
latency_ms = (time() - start_time) * 1000
|
||||
try:
|
||||
await log_completion(job.id, "cancelled", len("".join(collected)), latency_ms, time())
|
||||
except Exception:
|
||||
pass
|
||||
return
|
||||
|
||||
collected.append(chunk)
|
||||
# --- Reasoning ---
|
||||
await state_stream.push(job.id, "processing", {"message": "Gemma 4가 응답을 생성하고 있습니다..."})
|
||||
|
||||
# Stream partial result
|
||||
await state_stream.push(job.id, "result", {"content": chunk})
|
||||
try:
|
||||
ok = await _stream_with_cancel(backend_registry.reasoner, rewritten_message, job, collected)
|
||||
if not ok:
|
||||
return
|
||||
except Exception:
|
||||
# Gemma streaming 중간 실패 → EXAONE fallback
|
||||
logger.warning("Reasoner failed for job %s, falling back to rewriter", job.id, exc_info=True)
|
||||
|
||||
# Heartbeat: 긴 침묵 방지
|
||||
now = asyncio.get_event_loop().time()
|
||||
if now - last_heartbeat >= HEARTBEAT_INTERVAL:
|
||||
await state_stream.push(job.id, "processing", {"message": "응답 생성 중..."})
|
||||
last_heartbeat = now
|
||||
if job.status == JobStatus.cancelled:
|
||||
return
|
||||
|
||||
await state_stream.push(job.id, "processing", {"message": "모델 전환 중..."})
|
||||
reasoning_model = rewrite_model # fallback 기록
|
||||
|
||||
ok = await _stream_with_cancel(backend_registry.rewriter, job.message, job, collected)
|
||||
if not ok:
|
||||
return
|
||||
|
||||
# --- Complete ---
|
||||
if not collected:
|
||||
job_manager.set_status(job.id, JobStatus.failed)
|
||||
await state_stream.push(job.id, "error", {"message": "EXAONE으로부터 응답을 받지 못했습니다."})
|
||||
await state_stream.push(job.id, "error", {"message": "응답을 받지 못했습니다."})
|
||||
status = "failed"
|
||||
else:
|
||||
job_manager.set_status(job.id, JobStatus.completed)
|
||||
await state_stream.push(job.id, "done", {"message": "완료"})
|
||||
status = "completed"
|
||||
|
||||
# DB 로깅: 완료 기록
|
||||
# --- DB 로깅 ---
|
||||
latency_ms = (time() - start_time) * 1000
|
||||
response_text = "".join(collected)
|
||||
try:
|
||||
await log_completion(job.id, status, len(response_text), latency_ms, time())
|
||||
await log_completion(
|
||||
job.id, status, len(response_text), latency_ms, time(),
|
||||
rewrite_model=rewrite_model,
|
||||
reasoning_model=reasoning_model,
|
||||
rewritten_message=rewritten_message,
|
||||
rewrite_latency_ms=rewrite_latency if use_pipeline else 0,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning("Failed to log completion for job %s", job.id, exc_info=True)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user