"""Tests for Knowledge Base API endpoints""" import pytest import uuid class TestKnowledgeAPI: """Test cases for Knowledge Base endpoints""" def test_get_knowledge_bases_empty(self, client): """Test getting knowledge bases when database is empty""" response = client.get("/api/knowledge/bases") assert response.status_code == 200 data = response.json() assert "total" in data assert "list" in data def test_create_knowledge_base(self, client): """Test creating a new knowledge base""" data = { "name": "Test Knowledge Base", "description": "A test knowledge base", "embeddingModel": "text-embedding-3-small", "chunkSize": 500, "chunkOverlap": 50 } response = client.post("/api/knowledge/bases", json=data) assert response.status_code == 200 data = response.json() assert data["name"] == "Test Knowledge Base" assert data["description"] == "A test knowledge base" assert data["embeddingModel"] == "text-embedding-3-small" assert "id" in data assert data["docCount"] == 0 assert data["chunkCount"] == 0 assert data["status"] == "active" def test_create_knowledge_base_minimal(self, client): """Test creating a knowledge base with minimal data""" data = {"name": "Minimal KB"} response = client.post("/api/knowledge/bases", json=data) assert response.status_code == 200 assert response.json()["name"] == "Minimal KB" def test_get_knowledge_base_by_id(self, client): """Test getting a specific knowledge base by ID""" # Create first create_data = {"name": "Test KB"} create_response = client.post("/api/knowledge/bases", json=create_data) kb_id = create_response.json()["id"] # Get by ID response = client.get(f"/api/knowledge/bases/{kb_id}") assert response.status_code == 200 data = response.json() assert data["id"] == kb_id assert data["name"] == "Test KB" def test_get_knowledge_base_not_found(self, client): """Test getting a non-existent knowledge base""" response = client.get("/api/knowledge/bases/non-existent-id") assert response.status_code == 404 def test_update_knowledge_base(self, client): """Test updating a knowledge base""" # Create first create_data = {"name": "Original Name"} create_response = client.post("/api/knowledge/bases", json=create_data) kb_id = create_response.json()["id"] # Update update_data = { "name": "Updated Name", "description": "Updated description", "chunkSize": 800 } response = client.put(f"/api/knowledge/bases/{kb_id}", json=update_data) assert response.status_code == 200 data = response.json() assert data["name"] == "Updated Name" assert data["description"] == "Updated description" assert data["chunkSize"] == 800 def test_delete_knowledge_base(self, client): """Test deleting a knowledge base""" # Create first create_data = {"name": "To Delete"} create_response = client.post("/api/knowledge/bases", json=create_data) kb_id = create_response.json()["id"] # Delete response = client.delete(f"/api/knowledge/bases/{kb_id}") assert response.status_code == 200 # Verify deleted get_response = client.get(f"/api/knowledge/bases/{kb_id}") assert get_response.status_code == 404 def test_upload_document(self, client): """Test uploading a document to knowledge base""" # Create KB first create_data = {"name": "Test KB for Docs"} create_response = client.post("/api/knowledge/bases", json=create_data) kb_id = create_response.json()["id"] # Upload document doc_data = { "name": "test-document.txt", "size": "1024", "fileType": "txt", "storageUrl": "https://storage.example.com/test-document.txt" } response = client.post( f"/api/knowledge/bases/{kb_id}/documents", json=doc_data ) assert response.status_code == 200 data = response.json() assert data["name"] == "test-document.txt" assert "id" in data assert data["status"] == "pending" def test_delete_document(self, client): """Test deleting a document from knowledge base""" # Create KB first create_data = {"name": "Test KB for Delete"} create_response = client.post("/api/knowledge/bases", json=create_data) kb_id = create_response.json()["id"] # Upload document doc_data = {"name": "to-delete.txt", "size": "100", "fileType": "txt"} upload_response = client.post( f"/api/knowledge/bases/{kb_id}/documents", json=doc_data ) doc_id = upload_response.json()["id"] # Delete document response = client.delete( f"/api/knowledge/bases/{kb_id}/documents/{doc_id}" ) assert response.status_code == 200 def test_index_document(self, client): """Test indexing a document""" # Create KB first create_data = {"name": "Test KB for Index"} create_response = client.post("/api/knowledge/bases", json=create_data) kb_id = create_response.json()["id"] # Index document index_data = { "document_id": "doc-001", "content": "This is the content to index. It contains important information about the product." } response = client.post( f"/api/knowledge/bases/{kb_id}/documents/doc-001/index", json=index_data ) # This might return 200 or error depending on vector store implementation assert response.status_code in [200, 500] def test_search_knowledge(self, client): """Test searching knowledge base""" # Create KB first create_data = {"name": "Test KB for Search"} create_response = client.post("/api/knowledge/bases", json=create_data) kb_id = create_response.json()["id"] # Search (this may fail without indexed content) search_data = { "query": "test query", "kb_id": kb_id, "nResults": 5 } response = client.post("/api/knowledge/search", json=search_data) # This might return 200 or error depending on implementation assert response.status_code in [200, 500] def test_get_knowledge_stats(self, client): """Test getting knowledge base statistics""" # Create KB first create_data = {"name": "Test KB for Stats"} create_response = client.post("/api/knowledge/bases", json=create_data) kb_id = create_response.json()["id"] response = client.get(f"/api/knowledge/bases/{kb_id}/stats") assert response.status_code == 200 data = response.json() assert data["kb_id"] == kb_id assert "docCount" in data assert "chunkCount" in data def test_knowledge_bases_pagination(self, client): """Test knowledge bases pagination""" # Create multiple KBs for i in range(5): data = {"name": f"Knowledge Base {i}"} client.post("/api/knowledge/bases", json=data) # Test pagination response = client.get("/api/knowledge/bases?page=1&limit=3") assert response.status_code == 200 data = response.json() assert data["total"] == 5 assert len(data["list"]) == 3 def test_different_embedding_models(self, client): """Test creating KB with different embedding models""" models = [ "text-embedding-3-small", "text-embedding-3-large", "bge-small-zh" ] for model in models: data = {"name": f"KB with {model}", "embeddingModel": model} response = client.post("/api/knowledge/bases", json=data) assert response.status_code == 200 assert response.json()["embeddingModel"] == model def test_different_chunk_sizes(self, client): """Test creating KB with different chunk configurations""" configs = [ {"chunkSize": 500, "chunkOverlap": 50}, {"chunkSize": 1000, "chunkOverlap": 100}, {"chunkSize": 256, "chunkOverlap": 25} ] for config in configs: data = {"name": "Chunk Test KB", **config} response = client.post("/api/knowledge/bases", json=data) assert response.status_code == 200 def test_knowledge_base_with_documents(self, client): """Test creating KB and adding multiple documents""" # Create KB create_data = {"name": "KB with Multiple Docs"} create_response = client.post("/api/knowledge/bases", json=create_data) kb_id = create_response.json()["id"] # Add multiple documents for i in range(3): doc_data = { "name": f"document-{i}.txt", "size": f"{1000 + i * 100}", "fileType": "txt" } client.post( f"/api/knowledge/bases/{kb_id}/documents", json=doc_data ) # Verify documents are listed response = client.get(f"/api/knowledge/bases/{kb_id}") assert response.status_code == 200 data = response.json() assert len(data["documents"]) == 3