import chromadb from config import settings class VectorStore: def __init__(self): self.client = None self.collection = None def initialize(self): self.client = chromadb.PersistentClient(path=settings.CHROMA_PERSIST_DIR) self.collection = self.client.get_or_create_collection( name="qc_issues", metadata={"hnsw:space": "cosine"}, ) def upsert( self, doc_id: str, document: str, embedding: list[float], metadata: dict = None, ): self.collection.upsert( ids=[doc_id], documents=[document], embeddings=[embedding], metadatas=[metadata] if metadata else None, ) def query( self, embedding: list[float], n_results: int = 5, where: dict = None, ) -> list[dict]: kwargs = { "query_embeddings": [embedding], "n_results": n_results, "include": ["documents", "metadatas", "distances"], } if where: kwargs["where"] = where try: results = self.collection.query(**kwargs) except Exception: return [] items = [] if results and results["ids"] and results["ids"][0]: for i, doc_id in enumerate(results["ids"][0]): item = { "id": doc_id, "document": results["documents"][0][i] if results["documents"] else "", "distance": results["distances"][0][i] if results["distances"] else 0, "metadata": results["metadatas"][0][i] if results["metadatas"] else {}, } # cosine distance → similarity item["similarity"] = round(1 - item["distance"], 4) items.append(item) return items def delete(self, doc_id: str): self.collection.delete(ids=[doc_id]) def count(self) -> int: return self.collection.count() def stats(self) -> dict: return { "total_documents": self.count(), "collection_name": "qc_issues", } vector_store = VectorStore()