diff --git a/nanoclaude/services/backend_registry.py b/nanoclaude/services/backend_registry.py index ac196b7..b72fcfb 100644 --- a/nanoclaude/services/backend_registry.py +++ b/nanoclaude/services/backend_registry.py @@ -58,6 +58,7 @@ class BackendRegistry: system_prompt=REASONER_PROMPT, temperature=settings.reasoning_temperature, timeout=settings.reasoning_timeout, + max_tokens=16000, ) def start_health_loop(self, interval: float = 30.0) -> None: diff --git a/nanoclaude/services/model_adapter.py b/nanoclaude/services/model_adapter.py index 5b289c2..9690709 100644 --- a/nanoclaude/services/model_adapter.py +++ b/nanoclaude/services/model_adapter.py @@ -23,6 +23,7 @@ class ModelAdapter: system_prompt: str, temperature: float = 0.7, timeout: float = 120.0, + max_tokens: int = 4096, ): self.name = name self.base_url = base_url @@ -30,6 +31,7 @@ class ModelAdapter: self.system_prompt = system_prompt self.temperature = temperature self.timeout = timeout + self.max_tokens = max_tokens async def stream_chat(self, message: str, *, messages: list[dict] | None = None) -> AsyncGenerator[str, None]: """스트리밍 호출. content chunk를 yield. messages 직접 전달 가능.""" @@ -43,6 +45,7 @@ class ModelAdapter: "messages": messages, "stream": True, "temperature": self.temperature, + "max_tokens": self.max_tokens, } async with httpx.AsyncClient(timeout=self.timeout) as client: @@ -92,6 +95,7 @@ class ModelAdapter: "messages": messages, "stream": False, "temperature": self.temperature, + "max_tokens": self.max_tokens, } async with httpx.AsyncClient(timeout=self.timeout) as client: