Add kb index status

This commit is contained in:
Xin Wang
2026-02-10 10:34:25 +08:00
parent 94a562a1d5
commit 323ef61573
5 changed files with 30 additions and 5 deletions

View File

@@ -64,6 +64,8 @@ class VectorStore:
):
"""添加文档片段到向量库"""
collection = self.get_collection(kb_id)
if collection is None:
raise ValueError(f"Knowledge collection not found for kb_id={kb_id}")
if ids is None:
ids = [f"chunk-{i}" for i in range(len(documents))]
@@ -93,6 +95,11 @@ class VectorStore:
) -> Dict:
"""检索相似文档"""
collection = self.get_collection(kb_id)
if collection is None:
raise ValueError(
f"Knowledge collection not found for kb_id={kb_id}. "
"Please ensure the knowledge base exists and documents are indexed."
)
# 生成查询向量
query_embedding = embedding_service.embed_query(query)
@@ -108,6 +115,8 @@ class VectorStore:
def get_stats(self, kb_id: str) -> Dict:
"""获取向量库统计"""
collection = self.get_collection(kb_id)
if collection is None:
raise ValueError(f"Knowledge collection not found for kb_id={kb_id}")
return {
"count": collection.count(),
"kb_id": kb_id
@@ -116,11 +125,15 @@ class VectorStore:
def delete_documents(self, kb_id: str, ids: List[str]):
"""删除指定文档片段"""
collection = self.get_collection(kb_id)
if collection is None:
return
collection.delete(ids=ids)
def delete_by_metadata(self, kb_id: str, document_id: str):
"""根据文档 ID 删除所有片段"""
collection = self.get_collection(kb_id)
if collection is None:
return
results = collection.get(where={"document_id": document_id})
if results["ids"]:
collection.delete(ids=results["ids"])
@@ -244,9 +257,6 @@ embedding_service = EmbeddingService()
def search_knowledge(kb_id: str, query: str, n_results: int = 5) -> Dict:
"""知识库检索"""
# 生成查询向量
query_vector = embedding_service.embed_query(query)
# 检索
results = vector_store.search(
kb_id=kb_id,