Update knowledge base layout

This commit is contained in:
Xin Wang
2026-02-09 07:27:54 +08:00
parent e643c7db17
commit 7206c313d2
2 changed files with 76 additions and 6 deletions

View File

@@ -87,10 +87,21 @@ def get_knowledge_base(kb_id: str, db: Session = Depends(get_db)):
@router.post("/bases") @router.post("/bases")
def create_knowledge_base(data: KnowledgeBaseCreate, user_id: int = 1, db: Session = Depends(get_db)): def create_knowledge_base(data: KnowledgeBaseCreate, user_id: int = 1, db: Session = Depends(get_db)):
name = (data.name or "").strip()
if not name:
raise HTTPException(status_code=400, detail="Knowledge base name cannot be empty")
exists = db.query(KnowledgeBase).filter(
KnowledgeBase.user_id == user_id,
KnowledgeBase.name == name
).first()
if exists:
raise HTTPException(status_code=400, detail=f"Knowledge base name already exists: {name}")
kb = KnowledgeBase( kb = KnowledgeBase(
id=str(uuid.uuid4())[:8], id=str(uuid.uuid4())[:8],
user_id=user_id, user_id=user_id,
name=data.name, name=name,
description=data.description, description=data.description,
embedding_model=data.embeddingModel, embedding_model=data.embeddingModel,
chunk_size=data.chunkSize, chunk_size=data.chunkSize,
@@ -101,8 +112,11 @@ def create_knowledge_base(data: KnowledgeBaseCreate, user_id: int = 1, db: Sessi
db.refresh(kb) db.refresh(kb)
try: try:
vector_store.create_collection(kb.id, data.embeddingModel) vector_store.create_collection(kb.id, data.embeddingModel)
except Exception: except Exception as exc:
pass # Keep DB and vector store consistent on create failure
db.delete(kb)
db.commit()
raise HTTPException(status_code=502, detail=f"Failed to create ChromaDB collection: {exc}") from exc
return kb_to_dict(kb) return kb_to_dict(kb)
@@ -117,8 +131,38 @@ def update_knowledge_base(kb_id: str, data: KnowledgeBaseUpdate, db: Session = D
"chunkSize": "chunk_size", "chunkSize": "chunk_size",
"chunkOverlap": "chunk_overlap", "chunkOverlap": "chunk_overlap",
} }
if "name" in update_data:
update_data["name"] = (update_data["name"] or "").strip()
if not update_data["name"]:
raise HTTPException(status_code=400, detail="Knowledge base name cannot be empty")
name_exists = db.query(KnowledgeBase).filter(
KnowledgeBase.user_id == kb.user_id,
KnowledgeBase.name == update_data["name"],
KnowledgeBase.id != kb.id
).first()
if name_exists:
raise HTTPException(status_code=400, detail=f"Knowledge base name already exists: {update_data['name']}")
embedding_changed = "embeddingModel" in update_data and update_data["embeddingModel"] != kb.embedding_model
if embedding_changed and kb.chunk_count > 0:
raise HTTPException(
status_code=400,
detail="Cannot change embedding model when knowledge base has indexed chunks. Remove documents first."
)
for field, value in update_data.items(): for field, value in update_data.items():
setattr(kb, field_map.get(field, field), value) setattr(kb, field_map.get(field, field), value)
if embedding_changed:
try:
vector_store.delete_collection(kb_id)
except Exception:
pass
try:
vector_store.create_collection(kb_id, kb.embedding_model)
except Exception as exc:
raise HTTPException(status_code=502, detail=f"Failed to update ChromaDB collection: {exc}") from exc
kb.updated_at = datetime.utcnow() kb.updated_at = datetime.utcnow()
db.commit() db.commit()
db.refresh(kb) db.refresh(kb)
@@ -130,15 +174,18 @@ def delete_knowledge_base(kb_id: str, db: Session = Depends(get_db)):
kb = db.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id).first() kb = db.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id).first()
if not kb: if not kb:
raise HTTPException(status_code=404, detail="Knowledge base not found") raise HTTPException(status_code=404, detail="Knowledge base not found")
vector_deleted = True
try: try:
vector_store.delete_collection(kb_id) vector_store.delete_collection(kb_id)
except Exception: except Exception:
pass vector_deleted = False
docs = db.query(KnowledgeDocument).filter(KnowledgeDocument.kb_id == kb_id).all() docs = db.query(KnowledgeDocument).filter(KnowledgeDocument.kb_id == kb_id).all()
for doc in docs: for doc in docs:
db.delete(doc) db.delete(doc)
db.delete(kb) db.delete(kb)
db.commit() db.commit()
if not vector_deleted:
return {"message": "Deleted successfully", "warning": "Knowledge base deleted but failed to remove ChromaDB collection"}
return {"message": "Deleted successfully"} return {"message": "Deleted successfully"}

View File

@@ -1,6 +1,7 @@
"""Tests for Knowledge Base API endpoints""" """Tests for Knowledge Base API endpoints"""
import pytest import pytest
import uuid import uuid
from app.models import KnowledgeBase
class TestKnowledgeAPI: class TestKnowledgeAPI:
@@ -224,8 +225,8 @@ class TestKnowledgeAPI:
{"chunkSize": 1000, "chunkOverlap": 100}, {"chunkSize": 1000, "chunkOverlap": 100},
{"chunkSize": 256, "chunkOverlap": 25} {"chunkSize": 256, "chunkOverlap": 25}
] ]
for config in configs: for idx, config in enumerate(configs):
data = {"name": "Chunk Test KB", **config} data = {"name": f"Chunk Test KB {idx}", **config}
response = client.post("/api/knowledge/bases", json=data) response = client.post("/api/knowledge/bases", json=data)
assert response.status_code == 200 assert response.status_code == 200
@@ -253,3 +254,25 @@ class TestKnowledgeAPI:
assert response.status_code == 200 assert response.status_code == 200
data = response.json() data = response.json()
assert len(data["documents"]) == 3 assert len(data["documents"]) == 3
def test_create_knowledge_base_duplicate_name(self, client):
"""Test duplicate KB names are rejected for same user."""
payload = {"name": "Duplicate KB"}
first = client.post("/api/knowledge/bases", json=payload)
assert first.status_code == 200
second = client.post("/api/knowledge/bases", json=payload)
assert second.status_code == 400
def test_update_embedding_model_blocked_when_chunks_exist(self, client, db_session):
"""Test embedding model change is blocked after indexing chunks."""
create_resp = client.post("/api/knowledge/bases", json={"name": "KB Embedding Lock"})
assert create_resp.status_code == 200
kb_id = create_resp.json()["id"]
kb = db_session.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id).first()
kb.chunk_count = 5
db_session.commit()
update_resp = client.put(f"/api/knowledge/bases/{kb_id}", json={"embeddingModel": "text-embedding-3-large"})
assert update_resp.status_code == 400