Files
gpu-services/nanoclaude/services/backend_registry.py
Hyungi Ahn 9b8059ca38 fix: 시스템 프롬프트 튜닝 — 상냥하고 간결한 대화 스타일
- reasoner: "이드" 페르소나, 간결+상냥, 불필요한 구조화 금지
- rewriter: 인사/잡담은 그대로 통과

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-06 12:33:39 +09:00

97 lines
3.5 KiB
Python

"""BackendRegistry — 모델 어댑터 관리 + 헬스체크 루프."""
from __future__ import annotations
import asyncio
import logging
import time
from services.model_adapter import ModelAdapter
logger = logging.getLogger(__name__)
REWRITER_PROMPT = (
"너는 질문 재구성 전문가다. "
"사용자의 질문을 분석하여 의도를 명확히 하고, 구조화된 질문으로 재작성하라. "
"재구성된 질문만 출력하라. 부연 설명이나 답변은 절대 하지 마라. "
"인사나 잡담이면 그대로 출력하라."
)
REASONER_PROMPT = (
"너는 이드, 상냥하고 친근한 AI 어시스턴트야. "
"간결하고 자연스럽게 대화해. 인사에는 짧게 인사로 답하고, "
"질문에는 핵심만 명확하게 답해. "
"불필요한 구조화(번호 매기기, 헤더)는 피하고, 대화하듯 편하게 답변해."
)
class BackendRegistry:
def __init__(self) -> None:
self.rewriter: ModelAdapter | None = None
self.reasoner: ModelAdapter | None = None
self._health: dict[str, bool] = {"rewriter": False, "reasoner": False}
self._latency: dict[str, float] = {"rewriter": 0.0, "reasoner": 0.0}
self._health_task: asyncio.Task | None = None
def init_from_settings(self, settings) -> None:
self.rewriter = ModelAdapter(
name="EXAONE",
base_url=settings.exaone_base_url,
model=settings.exaone_model,
system_prompt=REWRITER_PROMPT,
temperature=settings.exaone_temperature,
timeout=settings.exaone_timeout,
)
self.reasoner = ModelAdapter(
name="Gemma4",
base_url=settings.reasoning_base_url,
model=settings.reasoning_model,
system_prompt=REASONER_PROMPT,
temperature=settings.reasoning_temperature,
timeout=settings.reasoning_timeout,
)
def start_health_loop(self, interval: float = 30.0) -> None:
self._health_task = asyncio.create_task(self._health_loop(interval))
def stop_health_loop(self) -> None:
if self._health_task and not self._health_task.done():
self._health_task.cancel()
async def _health_loop(self, interval: float) -> None:
while True:
await self._check_all()
await asyncio.sleep(interval)
async def _check_all(self) -> None:
for role, adapter in [("rewriter", self.rewriter), ("reasoner", self.reasoner)]:
if not adapter:
continue
start = time.monotonic()
healthy = await adapter.health_check()
elapsed = round((time.monotonic() - start) * 1000, 1)
prev = self._health[role]
self._health[role] = healthy
self._latency[role] = elapsed
if prev != healthy:
status = "UP" if healthy else "DOWN"
logger.warning("%s (%s) → %s (%.0fms)", adapter.name, role, status, elapsed)
def is_healthy(self, role: str) -> bool:
return self._health.get(role, False)
def health_summary(self) -> dict:
result = {}
for role, adapter in [("rewriter", self.rewriter), ("reasoner", self.reasoner)]:
if adapter:
result[role] = {
"name": adapter.name,
"model": adapter.model,
"healthy": self._health[role],
"latency_ms": self._latency[role],
}
return result
backend_registry = BackendRegistry()