feat: language-aware routing (English -> llama3:8b-instruct; else Qwen 7B/14B). Docs updated
This commit is contained in:
@@ -9,6 +9,7 @@ class Settings:
|
||||
ollama_host: str = os.getenv("OLLAMA_HOST", "http://localhost:11434")
|
||||
base_model: str = os.getenv("BASE_MODEL", "qwen2.5:7b-instruct")
|
||||
boost_model: str = os.getenv("BOOST_MODEL", "qwen2.5:14b-instruct")
|
||||
english_model: str = os.getenv("ENGLISH_MODEL", "llama3:8b-instruct")
|
||||
embedding_model: str = os.getenv("EMBEDDING_MODEL", "nomic-embed-text")
|
||||
index_path: str = os.getenv("INDEX_PATH", "data/index.jsonl")
|
||||
|
||||
|
||||
@@ -84,9 +84,16 @@ def search(req: SearchRequest) -> Dict[str, Any]:
|
||||
def chat(req: ChatRequest) -> Dict[str, Any]:
|
||||
model = req.model
|
||||
if not model:
|
||||
# 라우팅: 메시지 길이/force_boost 기준 간단 분기
|
||||
total_chars = sum(len(m.get("content", "")) for m in req.messages)
|
||||
model = settings.boost_model if (req.force_boost or total_chars > 2000) else settings.base_model
|
||||
# 언어 감지(매우 단순): 영문 비율이 높으면 영어 모델, 아니면 기본/부스팅
|
||||
user_text = "\n".join(m.get("content", "") for m in req.messages if m.get("role") == "user")
|
||||
ascii_letters = sum(ch.isascii() and ch.isalpha() for ch in user_text)
|
||||
non_ascii_letters = sum((not ch.isascii()) and ch.isalpha() for ch in user_text)
|
||||
english_ratio = ascii_letters / max(ascii_letters + non_ascii_letters, 1)
|
||||
total_chars = len(user_text)
|
||||
if english_ratio > 0.8:
|
||||
model = settings.english_model
|
||||
else:
|
||||
model = settings.boost_model if (req.force_boost or total_chars > 2000) else settings.base_model
|
||||
|
||||
context_docs: List[str] = []
|
||||
if req.use_rag and index.rows:
|
||||
|
||||
Reference in New Issue
Block a user