fix: LLM thinking 출력 대응 — max_tokens 증가 + JSON 추출 강화
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -111,22 +111,29 @@ def llm_generate(prompt: str, model: str = "mlx-community/Qwen3.5-35B-A3B-4bit",
|
||||
import requests
|
||||
messages = []
|
||||
if json_mode:
|
||||
messages.append({"role": "system", "content": "You must respond ONLY with valid JSON. No thinking, no explanation, no markdown."})
|
||||
# Qwen3.5: /nothink 접미사로 thinking 출력 억제
|
||||
messages.append({"role": "user", "content": prompt + " /nothink"})
|
||||
messages.append({"role": "system", "content": "IMPORTANT: Output ONLY valid JSON. No thinking process, no explanation, no markdown fences. Start your response with { and end with }."})
|
||||
messages.append({"role": "user", "content": prompt})
|
||||
resp = requests.post(f"{host}/v1/chat/completions", json={
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"temperature": 0.3,
|
||||
"max_tokens": 1024,
|
||||
}, timeout=120)
|
||||
"temperature": 0.1 if json_mode else 0.3,
|
||||
"max_tokens": 2048,
|
||||
}, timeout=180)
|
||||
resp.raise_for_status()
|
||||
content = resp.json()["choices"][0]["message"]["content"]
|
||||
# JSON 블록 추출 (```json ... ``` 감싸기 대응)
|
||||
# thinking 블록 제거 (Qwen3.5 thinking 모델 대응)
|
||||
if "<think>" in content and "</think>" in content:
|
||||
content = content.split("</think>")[-1].strip()
|
||||
# JSON 블록 추출
|
||||
if "```json" in content:
|
||||
content = content.split("```json")[1].split("```")[0].strip()
|
||||
elif "```" in content:
|
||||
content = content.split("```")[1].split("```")[0].strip()
|
||||
# { 로 시작하는 JSON 추출
|
||||
import re
|
||||
json_match = re.search(r'\{[\s\S]*\}', content)
|
||||
if json_match:
|
||||
content = json_match.group(0)
|
||||
return content
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user