Files
tk-factory-services/ai-service/services/embedding_service.py
Hyungi Ahn 2f7e083db0 feat: AI 서비스 MLX 듀얼 백엔드 및 모델 최적화
- MLX(맥미니 27B) 우선 → Ollama(조립컴 9B) fallback 구조
- pydantic-settings 기반 config 전환
- health check에 MLX 상태 추가
- 텍스트 모델 qwen3:8b → qwen3.5:9b-q8_0 변경

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-06 23:17:50 +09:00

150 lines
4.7 KiB
Python

from services.ollama_client import ollama_client
from db.vector_store import vector_store
from db.metadata_store import metadata_store
from services.db_client import get_all_issues, get_issue_by_id, get_issues_since
def build_document_text(issue: dict) -> str:
parts = []
if issue.get("description"):
parts.append(issue["description"])
if issue.get("final_description"):
parts.append(issue["final_description"])
if issue.get("detail_notes"):
parts.append(issue["detail_notes"])
if issue.get("solution"):
parts.append(f"해결: {issue['solution']}")
if issue.get("management_comment"):
parts.append(f"의견: {issue['management_comment']}")
if issue.get("cause_detail"):
parts.append(f"원인: {issue['cause_detail']}")
return " ".join(parts)
def build_metadata(issue: dict) -> dict:
meta = {"issue_id": issue["id"]}
for key in [
"category", "project_id", "review_status",
"responsible_department", "location_info",
]:
val = issue.get(key)
if val is not None:
meta[key] = str(val)
rd = issue.get("report_date")
if rd:
meta["report_date"] = str(rd)[:10]
meta["has_solution"] = "true" if issue.get("solution") else "false"
return meta
BATCH_SIZE = 10
async def _sync_issues_batch(issues: list[dict]) -> tuple[int, int]:
"""배치 단위로 임베딩 생성 후 벡터 스토어에 저장"""
synced = 0
skipped = 0
# 유효한 이슈와 텍스트 준비
valid = []
for issue in issues:
doc_text = build_document_text(issue)
if not doc_text.strip():
skipped += 1
continue
valid.append((issue, doc_text))
# 배치 단위로 임베딩 생성
for i in range(0, len(valid), BATCH_SIZE):
batch = valid[i:i + BATCH_SIZE]
texts = [doc_text for _, doc_text in batch]
try:
embeddings = await ollama_client.batch_embeddings(texts)
for (issue, doc_text), embedding in zip(batch, embeddings):
vector_store.upsert(
doc_id=f"issue_{issue['id']}",
document=doc_text,
embedding=embedding,
metadata=build_metadata(issue),
)
synced += 1
except Exception:
skipped += len(batch)
return synced, skipped
async def sync_all_issues() -> dict:
issues = get_all_issues()
synced, skipped = await _sync_issues_batch(issues)
if issues:
max_id = max(i["id"] for i in issues)
metadata_store.set_last_synced_id(max_id)
return {"synced": synced, "skipped": skipped, "total": len(issues)}
async def sync_single_issue(issue_id: int) -> dict:
issue = get_issue_by_id(issue_id)
if not issue:
return {"status": "not_found"}
doc_text = build_document_text(issue)
if not doc_text.strip():
return {"status": "empty_text"}
embedding = await ollama_client.generate_embedding(doc_text)
vector_store.upsert(
doc_id=f"issue_{issue['id']}",
document=doc_text,
embedding=embedding,
metadata=build_metadata(issue),
)
return {"status": "synced", "issue_id": issue_id}
async def sync_incremental() -> dict:
last_id = metadata_store.get_last_synced_id()
issues = get_issues_since(last_id)
synced, skipped = await _sync_issues_batch(issues)
if issues:
max_id = max(i["id"] for i in issues)
metadata_store.set_last_synced_id(max_id)
return {"synced": synced, "skipped": skipped, "new_issues": len(issues)}
async def search_similar_by_id(issue_id: int, n_results: int = 5) -> list[dict]:
issue = get_issue_by_id(issue_id)
if not issue:
return []
doc_text = build_document_text(issue)
if not doc_text.strip():
return []
embedding = await ollama_client.generate_embedding(doc_text)
results = vector_store.query(
embedding=embedding,
n_results=n_results + 1,
)
# exclude self
filtered = []
for r in results:
if r["id"] != f"issue_{issue_id}":
filtered.append(r)
return filtered[:n_results]
async def search_similar_by_text(query: str, n_results: int = 5, filters: dict = None) -> list[dict]:
embedding = await ollama_client.generate_embedding(query)
where = None
if filters:
conditions = []
for k, v in filters.items():
if v is not None:
conditions.append({k: str(v)})
if len(conditions) == 1:
where = conditions[0]
elif len(conditions) > 1:
where = {"$and": conditions}
return vector_store.query(
embedding=embedding,
n_results=n_results,
where=where,
)