From 7206c313d2fee7d834163d668b10969a4bc4c500 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Mon, 9 Feb 2026 07:27:54 +0800 Subject: [PATCH] Update knowledge base layout --- api/app/routers/knowledge.py | 55 +++++++++++++++++++++++++++++++++--- api/tests/test_knowledge.py | 27 ++++++++++++++++-- 2 files changed, 76 insertions(+), 6 deletions(-) diff --git a/api/app/routers/knowledge.py b/api/app/routers/knowledge.py index e4fae3e..ee43f02 100644 --- a/api/app/routers/knowledge.py +++ b/api/app/routers/knowledge.py @@ -87,10 +87,21 @@ def get_knowledge_base(kb_id: str, db: Session = Depends(get_db)): @router.post("/bases") def create_knowledge_base(data: KnowledgeBaseCreate, user_id: int = 1, db: Session = Depends(get_db)): + name = (data.name or "").strip() + if not name: + raise HTTPException(status_code=400, detail="Knowledge base name cannot be empty") + + exists = db.query(KnowledgeBase).filter( + KnowledgeBase.user_id == user_id, + KnowledgeBase.name == name + ).first() + if exists: + raise HTTPException(status_code=400, detail=f"Knowledge base name already exists: {name}") + kb = KnowledgeBase( id=str(uuid.uuid4())[:8], user_id=user_id, - name=data.name, + name=name, description=data.description, embedding_model=data.embeddingModel, chunk_size=data.chunkSize, @@ -101,8 +112,11 @@ def create_knowledge_base(data: KnowledgeBaseCreate, user_id: int = 1, db: Sessi db.refresh(kb) try: vector_store.create_collection(kb.id, data.embeddingModel) - except Exception: - pass + except Exception as exc: + # Keep DB and vector store consistent on create failure + db.delete(kb) + db.commit() + raise HTTPException(status_code=502, detail=f"Failed to create ChromaDB collection: {exc}") from exc return kb_to_dict(kb) @@ -117,8 +131,38 @@ def update_knowledge_base(kb_id: str, data: KnowledgeBaseUpdate, db: Session = D "chunkSize": "chunk_size", "chunkOverlap": "chunk_overlap", } + if "name" in update_data: + update_data["name"] = (update_data["name"] or "").strip() + if not update_data["name"]: + raise HTTPException(status_code=400, detail="Knowledge base name cannot be empty") + name_exists = db.query(KnowledgeBase).filter( + KnowledgeBase.user_id == kb.user_id, + KnowledgeBase.name == update_data["name"], + KnowledgeBase.id != kb.id + ).first() + if name_exists: + raise HTTPException(status_code=400, detail=f"Knowledge base name already exists: {update_data['name']}") + + embedding_changed = "embeddingModel" in update_data and update_data["embeddingModel"] != kb.embedding_model + if embedding_changed and kb.chunk_count > 0: + raise HTTPException( + status_code=400, + detail="Cannot change embedding model when knowledge base has indexed chunks. Remove documents first." + ) + for field, value in update_data.items(): setattr(kb, field_map.get(field, field), value) + + if embedding_changed: + try: + vector_store.delete_collection(kb_id) + except Exception: + pass + try: + vector_store.create_collection(kb_id, kb.embedding_model) + except Exception as exc: + raise HTTPException(status_code=502, detail=f"Failed to update ChromaDB collection: {exc}") from exc + kb.updated_at = datetime.utcnow() db.commit() db.refresh(kb) @@ -130,15 +174,18 @@ def delete_knowledge_base(kb_id: str, db: Session = Depends(get_db)): kb = db.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id).first() if not kb: raise HTTPException(status_code=404, detail="Knowledge base not found") + vector_deleted = True try: vector_store.delete_collection(kb_id) except Exception: - pass + vector_deleted = False docs = db.query(KnowledgeDocument).filter(KnowledgeDocument.kb_id == kb_id).all() for doc in docs: db.delete(doc) db.delete(kb) db.commit() + if not vector_deleted: + return {"message": "Deleted successfully", "warning": "Knowledge base deleted but failed to remove ChromaDB collection"} return {"message": "Deleted successfully"} diff --git a/api/tests/test_knowledge.py b/api/tests/test_knowledge.py index 3a4dbe6..82a6024 100644 --- a/api/tests/test_knowledge.py +++ b/api/tests/test_knowledge.py @@ -1,6 +1,7 @@ """Tests for Knowledge Base API endpoints""" import pytest import uuid +from app.models import KnowledgeBase class TestKnowledgeAPI: @@ -224,8 +225,8 @@ class TestKnowledgeAPI: {"chunkSize": 1000, "chunkOverlap": 100}, {"chunkSize": 256, "chunkOverlap": 25} ] - for config in configs: - data = {"name": "Chunk Test KB", **config} + for idx, config in enumerate(configs): + data = {"name": f"Chunk Test KB {idx}", **config} response = client.post("/api/knowledge/bases", json=data) assert response.status_code == 200 @@ -253,3 +254,25 @@ class TestKnowledgeAPI: assert response.status_code == 200 data = response.json() assert len(data["documents"]) == 3 + + def test_create_knowledge_base_duplicate_name(self, client): + """Test duplicate KB names are rejected for same user.""" + payload = {"name": "Duplicate KB"} + first = client.post("/api/knowledge/bases", json=payload) + assert first.status_code == 200 + + second = client.post("/api/knowledge/bases", json=payload) + assert second.status_code == 400 + + def test_update_embedding_model_blocked_when_chunks_exist(self, client, db_session): + """Test embedding model change is blocked after indexing chunks.""" + create_resp = client.post("/api/knowledge/bases", json={"name": "KB Embedding Lock"}) + assert create_resp.status_code == 200 + kb_id = create_resp.json()["id"] + + kb = db_session.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id).first() + kb.chunk_count = 5 + db_session.commit() + + update_resp = client.put(f"/api/knowledge/bases/{kb_id}", json={"embeddingModel": "text-embedding-3-large"}) + assert update_resp.status_code == 400