diff --git a/app/ai/client.py b/app/ai/client.py index 45a7401..3ad9a72 100644 --- a/app/ai/client.py +++ b/app/ai/client.py @@ -262,14 +262,19 @@ class AIClient: data = response.json() return data["content"][0]["text"] else: + payload = { + "model": model_config.model, + "messages": [{"role": "user", "content": prompt}], + "max_tokens": model_config.max_tokens, + "chat_template_kwargs": {"enable_thinking": False}, + } + if model_config.temperature is not None: + payload["temperature"] = model_config.temperature + if model_config.top_p is not None: + payload["top_p"] = model_config.top_p response = await self._http.post( model_config.endpoint, - json={ - "model": model_config.model, - "messages": [{"role": "user", "content": prompt}], - "max_tokens": model_config.max_tokens, - "chat_template_kwargs": {"enable_thinking": False}, - }, + json=payload, timeout=model_config.timeout, ) response.raise_for_status() diff --git a/app/core/config.py b/app/core/config.py index d2f765c..e999900 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -26,6 +26,10 @@ class AIModelConfig(BaseModel): # B-0: 4B/26B 에 부여한 실사용 컨텍스트 상한 (char). triage=120k, primary=260k. # classify_worker 가 에스컬레이션 판정 시 참고. 0/None 이면 상한 무시. context_char_limit: int | None = None + # P1 of family-adaptive-bengio (2026-05-23): config-driven sampling profile. + # None = MLX/OpenAI server default. Anthropic branch 는 미적용 (별 plan 범위). + temperature: float | None = None + top_p: float | None = None class DeepSummaryBacklogConfig(BaseModel): diff --git a/config.yaml b/config.yaml index 690dd88..716a555 100644 --- a/config.yaml +++ b/config.yaml @@ -17,6 +17,7 @@ ai: max_tokens: 4096 timeout: 30 context_char_limit: 120000 + temperature: 0.0 # primary: 에스컬레이션 전용. 26B MLX (맥미니 Semaphore(1) 보호 대상). primary: @@ -25,6 +26,8 @@ ai: max_tokens: 8192 timeout: 180 context_char_limit: 260000 + temperature: 0.3 + top_p: 0.9 # fallback: primary 장애 시 최후 방어선. Claude Sonnet 4 API (소액 한도, 자동 trigger). # 호출 빈도 낮음 가정 (Mac mini 가 거의 항상 up) → premium 과 budget 공유 OK.