from services.ollama_client import ollama_client from db.vector_store import vector_store from db.metadata_store import metadata_store from services.db_client import get_all_issues, get_issue_by_id, get_issues_since def build_document_text(issue: dict) -> str: parts = [] if issue.get("description"): parts.append(issue["description"]) if issue.get("final_description"): parts.append(issue["final_description"]) if issue.get("detail_notes"): parts.append(issue["detail_notes"]) if issue.get("solution"): parts.append(f"해결: {issue['solution']}") if issue.get("management_comment"): parts.append(f"의견: {issue['management_comment']}") if issue.get("cause_detail"): parts.append(f"원인: {issue['cause_detail']}") return " ".join(parts) def build_metadata(issue: dict) -> dict: meta = {"issue_id": issue["id"]} for key in [ "category", "project_id", "review_status", "responsible_department", "location_info", ]: val = issue.get(key) if val is not None: meta[key] = str(val) rd = issue.get("report_date") if rd: meta["report_date"] = str(rd)[:10] meta["has_solution"] = "true" if issue.get("solution") else "false" return meta async def sync_all_issues() -> dict: issues = get_all_issues() synced = 0 skipped = 0 for issue in issues: doc_text = build_document_text(issue) if not doc_text.strip(): skipped += 1 continue try: embedding = await ollama_client.generate_embedding(doc_text) vector_store.upsert( doc_id=f"issue_{issue['id']}", document=doc_text, embedding=embedding, metadata=build_metadata(issue), ) synced += 1 except Exception as e: skipped += 1 if issues: max_id = max(i["id"] for i in issues) metadata_store.set_last_synced_id(max_id) return {"synced": synced, "skipped": skipped, "total": len(issues)} async def sync_single_issue(issue_id: int) -> dict: issue = get_issue_by_id(issue_id) if not issue: return {"status": "not_found"} doc_text = build_document_text(issue) if not doc_text.strip(): return {"status": "empty_text"} embedding = await ollama_client.generate_embedding(doc_text) vector_store.upsert( doc_id=f"issue_{issue['id']}", document=doc_text, embedding=embedding, metadata=build_metadata(issue), ) return {"status": "synced", "issue_id": issue_id} async def sync_incremental() -> dict: last_id = metadata_store.get_last_synced_id() issues = get_issues_since(last_id) synced = 0 for issue in issues: doc_text = build_document_text(issue) if not doc_text.strip(): continue try: embedding = await ollama_client.generate_embedding(doc_text) vector_store.upsert( doc_id=f"issue_{issue['id']}", document=doc_text, embedding=embedding, metadata=build_metadata(issue), ) synced += 1 except Exception: pass if issues: max_id = max(i["id"] for i in issues) metadata_store.set_last_synced_id(max_id) return {"synced": synced, "new_issues": len(issues)} async def search_similar_by_id(issue_id: int, n_results: int = 5) -> list[dict]: issue = get_issue_by_id(issue_id) if not issue: return [] doc_text = build_document_text(issue) if not doc_text.strip(): return [] embedding = await ollama_client.generate_embedding(doc_text) results = vector_store.query( embedding=embedding, n_results=n_results + 1, ) # exclude self filtered = [] for r in results: if r["id"] != f"issue_{issue_id}": filtered.append(r) return filtered[:n_results] async def search_similar_by_text(query: str, n_results: int = 5, filters: dict = None) -> list[dict]: embedding = await ollama_client.generate_embedding(query) where = None if filters: conditions = [] for k, v in filters.items(): if v is not None: conditions.append({k: str(v)}) if len(conditions) == 1: where = conditions[0] elif len(conditions) > 1: where = {"$and": conditions} return vector_store.query( embedding=embedding, n_results=n_results, where=where, )