fix: LLM thinking 허용 + 마지막 유효 JSON 추출 방식으로 변경
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -109,32 +109,32 @@ def llm_generate(prompt: str, model: str = "mlx-community/Qwen3.5-35B-A3B-4bit",
|
|||||||
host: str = "http://localhost:8800", json_mode: bool = False) -> str:
|
host: str = "http://localhost:8800", json_mode: bool = False) -> str:
|
||||||
"""MLX 서버 API 호출 (OpenAI 호환)"""
|
"""MLX 서버 API 호출 (OpenAI 호환)"""
|
||||||
import requests
|
import requests
|
||||||
messages = []
|
messages = [{"role": "user", "content": prompt}]
|
||||||
if json_mode:
|
|
||||||
messages.append({"role": "system", "content": "IMPORTANT: Output ONLY valid JSON. No thinking process, no explanation, no markdown fences. Start your response with { and end with }."})
|
|
||||||
messages.append({"role": "user", "content": prompt})
|
|
||||||
resp = requests.post(f"{host}/v1/chat/completions", json={
|
resp = requests.post(f"{host}/v1/chat/completions", json={
|
||||||
"model": model,
|
"model": model,
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
"temperature": 0.1 if json_mode else 0.3,
|
"temperature": 0.3,
|
||||||
"max_tokens": 2048,
|
"max_tokens": 4096,
|
||||||
}, timeout=180)
|
}, timeout=300)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
content = resp.json()["choices"][0]["message"]["content"]
|
content = resp.json()["choices"][0]["message"]["content"]
|
||||||
# thinking 블록 제거 (Qwen3.5 thinking 모델 대응)
|
if not json_mode:
|
||||||
if "<think>" in content and "</think>" in content:
|
return content
|
||||||
content = content.split("</think>")[-1].strip()
|
# JSON 모드: thinking 허용 → 마지막 유효 JSON 객체 추출
|
||||||
# JSON 블록 추출
|
|
||||||
if "```json" in content:
|
|
||||||
content = content.split("```json")[1].split("```")[0].strip()
|
|
||||||
elif "```" in content:
|
|
||||||
content = content.split("```")[1].split("```")[0].strip()
|
|
||||||
# { 로 시작하는 JSON 추출
|
|
||||||
import re
|
import re
|
||||||
|
import json as _json
|
||||||
|
# 배열이 포함된 JSON 객체 매칭
|
||||||
|
all_jsons = re.findall(r'\{[^{}]*(?:\[[^\]]*\])?[^{}]*\}', content)
|
||||||
|
for j in reversed(all_jsons):
|
||||||
|
try:
|
||||||
|
parsed = _json.loads(j)
|
||||||
|
if any(k in parsed for k in ("domain_db", "tags", "domain", "classification")):
|
||||||
|
return j
|
||||||
|
except _json.JSONDecodeError:
|
||||||
|
continue
|
||||||
|
# 폴백: 전체에서 가장 큰 JSON 추출
|
||||||
json_match = re.search(r'\{[\s\S]*\}', content)
|
json_match = re.search(r'\{[\s\S]*\}', content)
|
||||||
if json_match:
|
return json_match.group(0) if json_match else content
|
||||||
content = json_match.group(0)
|
|
||||||
return content
|
|
||||||
|
|
||||||
|
|
||||||
# 하위호환 별칭
|
# 하위호환 별칭
|
||||||
|
|||||||
Reference in New Issue
Block a user