- ModelAdapter: 범용 OpenAI-compat 어댑터 (stream/complete/health)
- BackendRegistry: rewriter(EXAONE) + reasoner(Gemma4) 헬스체크 루프
- 2단계 파이프라인: EXAONE rewrite → Gemma reasoning (SSE rewrite 이벤트 노출)
- Fallback: 맥미니 다운 시 EXAONE 단독 모드, stream 중간 실패 시 자동 전환
- Cancel-safe: rewrite 전/후, streaming loop 내, fallback 경로 모두 체크
- Rewrite heartbeat: complete_chat 대기 중 2초 간격 processing 이벤트
- JobQueue: Semaphore(3) 기반 동시성 제한, 정확한 queue position
- GET /chat/{job_id}/status, GET /queue/stats 엔드포인트
- DB: rewrite_model, reasoning_model, rewritten_message 컬럼 추가
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
40 lines
1.0 KiB
Python
40 lines
1.0 KiB
Python
from pydantic_settings import BaseSettings
|
|
|
|
|
|
class Settings(BaseSettings):
|
|
# EXAONE (rewriter) via Ollama
|
|
exaone_base_url: str = "http://localhost:11434"
|
|
exaone_model: str = "exaone3.5:7.8b-instruct-q8_0"
|
|
exaone_temperature: float = 0.7
|
|
exaone_timeout: float = 30.0 # rewrite는 짧아야 함
|
|
|
|
# Gemma 4 (reasoner) via MLX on Mac mini
|
|
reasoning_base_url: str = "http://192.168.1.122:8800"
|
|
reasoning_model: str = "mlx-community/gemma-4-26b-a4b-it-8bit"
|
|
reasoning_temperature: float = 0.7
|
|
reasoning_timeout: float = 180.0
|
|
|
|
# Pipeline
|
|
pipeline_enabled: bool = True # False = EXAONE 단독 모드 (Phase 1 fallback)
|
|
|
|
# Queue
|
|
max_concurrent_jobs: int = 3
|
|
|
|
# Health check
|
|
health_check_interval: float = 30.0
|
|
|
|
# Server
|
|
host: str = "0.0.0.0"
|
|
port: int = 8100
|
|
|
|
# DB
|
|
db_path: str = "/app/data/nanoclaude.db"
|
|
|
|
# Optional API key (empty = disabled)
|
|
api_key: str = ""
|
|
|
|
model_config = {"env_file": ".env", "extra": "ignore"}
|
|
|
|
|
|
settings = Settings()
|