fix: max_tokens 추가 — Gemma 16000, EXAONE 4096

응답이 중간에 끊기는 문제 해결. ModelAdapter에 max_tokens
파라미터 추가, stream/complete 양쪽 payload에 반영.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hyungi Ahn
2026-04-06 12:52:31 +09:00
parent 74f8df48fc
commit a16ff2ea88
2 changed files with 5 additions and 0 deletions

View File

@@ -58,6 +58,7 @@ class BackendRegistry:
system_prompt=REASONER_PROMPT, system_prompt=REASONER_PROMPT,
temperature=settings.reasoning_temperature, temperature=settings.reasoning_temperature,
timeout=settings.reasoning_timeout, timeout=settings.reasoning_timeout,
max_tokens=16000,
) )
def start_health_loop(self, interval: float = 30.0) -> None: def start_health_loop(self, interval: float = 30.0) -> None:

View File

@@ -23,6 +23,7 @@ class ModelAdapter:
system_prompt: str, system_prompt: str,
temperature: float = 0.7, temperature: float = 0.7,
timeout: float = 120.0, timeout: float = 120.0,
max_tokens: int = 4096,
): ):
self.name = name self.name = name
self.base_url = base_url self.base_url = base_url
@@ -30,6 +31,7 @@ class ModelAdapter:
self.system_prompt = system_prompt self.system_prompt = system_prompt
self.temperature = temperature self.temperature = temperature
self.timeout = timeout self.timeout = timeout
self.max_tokens = max_tokens
async def stream_chat(self, message: str, *, messages: list[dict] | None = None) -> AsyncGenerator[str, None]: async def stream_chat(self, message: str, *, messages: list[dict] | None = None) -> AsyncGenerator[str, None]:
"""스트리밍 호출. content chunk를 yield. messages 직접 전달 가능.""" """스트리밍 호출. content chunk를 yield. messages 직접 전달 가능."""
@@ -43,6 +45,7 @@ class ModelAdapter:
"messages": messages, "messages": messages,
"stream": True, "stream": True,
"temperature": self.temperature, "temperature": self.temperature,
"max_tokens": self.max_tokens,
} }
async with httpx.AsyncClient(timeout=self.timeout) as client: async with httpx.AsyncClient(timeout=self.timeout) as client:
@@ -92,6 +95,7 @@ class ModelAdapter:
"messages": messages, "messages": messages,
"stream": False, "stream": False,
"temperature": self.temperature, "temperature": self.temperature,
"max_tokens": self.max_tokens,
} }
async with httpx.AsyncClient(timeout=self.timeout) as client: async with httpx.AsyncClient(timeout=self.timeout) as client: