Files
tk-factory-services/ai-service/services/embedding_service.py
Hyungi Ahn b3012b8320 feat: AI 서비스 및 AI 어시스턴트 전용 페이지 추가
- ai-service: Ollama 기반 AI 서비스 (분류, 시맨틱 검색, RAG Q&A, 패턴 분석)
- AI 어시스턴트 페이지: 채팅형 Q&A, 시맨틱 검색, 패턴 분석, 분류 테스트
- 권한 시스템에 ai_assistant 페이지 등록 (기본 비활성)
- 기존 페이지에 AI 기능 통합 (대시보드, 수신함, 관리함)
- docker-compose, gateway, nginx 설정 업데이트

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-06 09:38:30 +09:00

145 lines
4.5 KiB
Python

from services.ollama_client import ollama_client
from db.vector_store import vector_store
from db.metadata_store import metadata_store
from services.db_client import get_all_issues, get_issue_by_id, get_issues_since
def build_document_text(issue: dict) -> str:
parts = []
if issue.get("description"):
parts.append(issue["description"])
if issue.get("final_description"):
parts.append(issue["final_description"])
if issue.get("detail_notes"):
parts.append(issue["detail_notes"])
if issue.get("solution"):
parts.append(f"해결: {issue['solution']}")
if issue.get("management_comment"):
parts.append(f"의견: {issue['management_comment']}")
if issue.get("cause_detail"):
parts.append(f"원인: {issue['cause_detail']}")
return " ".join(parts)
def build_metadata(issue: dict) -> dict:
meta = {"issue_id": issue["id"]}
for key in [
"category", "project_id", "review_status",
"responsible_department", "location_info",
]:
val = issue.get(key)
if val is not None:
meta[key] = str(val)
rd = issue.get("report_date")
if rd:
meta["report_date"] = str(rd)[:10]
meta["has_solution"] = "true" if issue.get("solution") else "false"
return meta
async def sync_all_issues() -> dict:
issues = get_all_issues()
synced = 0
skipped = 0
for issue in issues:
doc_text = build_document_text(issue)
if not doc_text.strip():
skipped += 1
continue
try:
embedding = await ollama_client.generate_embedding(doc_text)
vector_store.upsert(
doc_id=f"issue_{issue['id']}",
document=doc_text,
embedding=embedding,
metadata=build_metadata(issue),
)
synced += 1
except Exception as e:
skipped += 1
if issues:
max_id = max(i["id"] for i in issues)
metadata_store.set_last_synced_id(max_id)
return {"synced": synced, "skipped": skipped, "total": len(issues)}
async def sync_single_issue(issue_id: int) -> dict:
issue = get_issue_by_id(issue_id)
if not issue:
return {"status": "not_found"}
doc_text = build_document_text(issue)
if not doc_text.strip():
return {"status": "empty_text"}
embedding = await ollama_client.generate_embedding(doc_text)
vector_store.upsert(
doc_id=f"issue_{issue['id']}",
document=doc_text,
embedding=embedding,
metadata=build_metadata(issue),
)
return {"status": "synced", "issue_id": issue_id}
async def sync_incremental() -> dict:
last_id = metadata_store.get_last_synced_id()
issues = get_issues_since(last_id)
synced = 0
for issue in issues:
doc_text = build_document_text(issue)
if not doc_text.strip():
continue
try:
embedding = await ollama_client.generate_embedding(doc_text)
vector_store.upsert(
doc_id=f"issue_{issue['id']}",
document=doc_text,
embedding=embedding,
metadata=build_metadata(issue),
)
synced += 1
except Exception:
pass
if issues:
max_id = max(i["id"] for i in issues)
metadata_store.set_last_synced_id(max_id)
return {"synced": synced, "new_issues": len(issues)}
async def search_similar_by_id(issue_id: int, n_results: int = 5) -> list[dict]:
issue = get_issue_by_id(issue_id)
if not issue:
return []
doc_text = build_document_text(issue)
if not doc_text.strip():
return []
embedding = await ollama_client.generate_embedding(doc_text)
results = vector_store.query(
embedding=embedding,
n_results=n_results + 1,
)
# exclude self
filtered = []
for r in results:
if r["id"] != f"issue_{issue_id}":
filtered.append(r)
return filtered[:n_results]
async def search_similar_by_text(query: str, n_results: int = 5, filters: dict = None) -> list[dict]:
embedding = await ollama_client.generate_embedding(query)
where = None
if filters:
conditions = []
for k, v in filters.items():
if v is not None:
conditions.append({k: str(v)})
if len(conditions) == 1:
where = conditions[0]
elif len(conditions) > 1:
where = {"$and": conditions}
return vector_store.query(
embedding=embedding,
n_results=n_results,
where=where,
)