Files
AI-VideoAssistant/api/tests/test_knowledge.py
2026-02-08 14:26:19 +08:00

256 lines
9.5 KiB
Python

"""Tests for Knowledge Base API endpoints"""
import pytest
import uuid
class TestKnowledgeAPI:
"""Test cases for Knowledge Base endpoints"""
def test_get_knowledge_bases_empty(self, client):
"""Test getting knowledge bases when database is empty"""
response = client.get("/api/knowledge/bases")
assert response.status_code == 200
data = response.json()
assert "total" in data
assert "list" in data
def test_create_knowledge_base(self, client):
"""Test creating a new knowledge base"""
data = {
"name": "Test Knowledge Base",
"description": "A test knowledge base",
"embeddingModel": "text-embedding-3-small",
"chunkSize": 500,
"chunkOverlap": 50
}
response = client.post("/api/knowledge/bases", json=data)
assert response.status_code == 200
data = response.json()
assert data["name"] == "Test Knowledge Base"
assert data["description"] == "A test knowledge base"
assert data["embeddingModel"] == "text-embedding-3-small"
assert "id" in data
assert data["docCount"] == 0
assert data["chunkCount"] == 0
assert data["status"] == "active"
def test_create_knowledge_base_minimal(self, client):
"""Test creating a knowledge base with minimal data"""
data = {"name": "Minimal KB"}
response = client.post("/api/knowledge/bases", json=data)
assert response.status_code == 200
assert response.json()["name"] == "Minimal KB"
def test_get_knowledge_base_by_id(self, client):
"""Test getting a specific knowledge base by ID"""
# Create first
create_data = {"name": "Test KB"}
create_response = client.post("/api/knowledge/bases", json=create_data)
kb_id = create_response.json()["id"]
# Get by ID
response = client.get(f"/api/knowledge/bases/{kb_id}")
assert response.status_code == 200
data = response.json()
assert data["id"] == kb_id
assert data["name"] == "Test KB"
def test_get_knowledge_base_not_found(self, client):
"""Test getting a non-existent knowledge base"""
response = client.get("/api/knowledge/bases/non-existent-id")
assert response.status_code == 404
def test_update_knowledge_base(self, client):
"""Test updating a knowledge base"""
# Create first
create_data = {"name": "Original Name"}
create_response = client.post("/api/knowledge/bases", json=create_data)
kb_id = create_response.json()["id"]
# Update
update_data = {
"name": "Updated Name",
"description": "Updated description",
"chunkSize": 800
}
response = client.put(f"/api/knowledge/bases/{kb_id}", json=update_data)
assert response.status_code == 200
data = response.json()
assert data["name"] == "Updated Name"
assert data["description"] == "Updated description"
assert data["chunkSize"] == 800
def test_delete_knowledge_base(self, client):
"""Test deleting a knowledge base"""
# Create first
create_data = {"name": "To Delete"}
create_response = client.post("/api/knowledge/bases", json=create_data)
kb_id = create_response.json()["id"]
# Delete
response = client.delete(f"/api/knowledge/bases/{kb_id}")
assert response.status_code == 200
# Verify deleted
get_response = client.get(f"/api/knowledge/bases/{kb_id}")
assert get_response.status_code == 404
def test_upload_document(self, client):
"""Test uploading a document to knowledge base"""
# Create KB first
create_data = {"name": "Test KB for Docs"}
create_response = client.post("/api/knowledge/bases", json=create_data)
kb_id = create_response.json()["id"]
# Upload document
doc_data = {
"name": "test-document.txt",
"size": "1024",
"fileType": "txt",
"storageUrl": "https://storage.example.com/test-document.txt"
}
response = client.post(
f"/api/knowledge/bases/{kb_id}/documents",
json=doc_data
)
assert response.status_code == 200
data = response.json()
assert data["name"] == "test-document.txt"
assert "id" in data
assert data["status"] == "pending"
def test_delete_document(self, client):
"""Test deleting a document from knowledge base"""
# Create KB first
create_data = {"name": "Test KB for Delete"}
create_response = client.post("/api/knowledge/bases", json=create_data)
kb_id = create_response.json()["id"]
# Upload document
doc_data = {"name": "to-delete.txt", "size": "100", "fileType": "txt"}
upload_response = client.post(
f"/api/knowledge/bases/{kb_id}/documents",
json=doc_data
)
doc_id = upload_response.json()["id"]
# Delete document
response = client.delete(
f"/api/knowledge/bases/{kb_id}/documents/{doc_id}"
)
assert response.status_code == 200
def test_index_document(self, client):
"""Test indexing a document"""
# Create KB first
create_data = {"name": "Test KB for Index"}
create_response = client.post("/api/knowledge/bases", json=create_data)
kb_id = create_response.json()["id"]
# Index document
index_data = {
"document_id": "doc-001",
"content": "This is the content to index. It contains important information about the product."
}
response = client.post(
f"/api/knowledge/bases/{kb_id}/documents/doc-001/index",
json=index_data
)
# This might return 200 or error depending on vector store implementation
assert response.status_code in [200, 500]
def test_search_knowledge(self, client):
"""Test searching knowledge base"""
# Create KB first
create_data = {"name": "Test KB for Search"}
create_response = client.post("/api/knowledge/bases", json=create_data)
kb_id = create_response.json()["id"]
# Search (this may fail without indexed content)
search_data = {
"query": "test query",
"kb_id": kb_id,
"nResults": 5
}
response = client.post("/api/knowledge/search", json=search_data)
# This might return 200 or error depending on implementation
assert response.status_code in [200, 500]
def test_get_knowledge_stats(self, client):
"""Test getting knowledge base statistics"""
# Create KB first
create_data = {"name": "Test KB for Stats"}
create_response = client.post("/api/knowledge/bases", json=create_data)
kb_id = create_response.json()["id"]
response = client.get(f"/api/knowledge/bases/{kb_id}/stats")
assert response.status_code == 200
data = response.json()
assert data["kb_id"] == kb_id
assert "docCount" in data
assert "chunkCount" in data
def test_knowledge_bases_pagination(self, client):
"""Test knowledge bases pagination"""
# Create multiple KBs
for i in range(5):
data = {"name": f"Knowledge Base {i}"}
client.post("/api/knowledge/bases", json=data)
# Test pagination
response = client.get("/api/knowledge/bases?page=1&limit=3")
assert response.status_code == 200
data = response.json()
assert data["total"] == 5
assert len(data["list"]) == 3
def test_different_embedding_models(self, client):
"""Test creating KB with different embedding models"""
models = [
"text-embedding-3-small",
"text-embedding-3-large",
"bge-small-zh"
]
for model in models:
data = {"name": f"KB with {model}", "embeddingModel": model}
response = client.post("/api/knowledge/bases", json=data)
assert response.status_code == 200
assert response.json()["embeddingModel"] == model
def test_different_chunk_sizes(self, client):
"""Test creating KB with different chunk configurations"""
configs = [
{"chunkSize": 500, "chunkOverlap": 50},
{"chunkSize": 1000, "chunkOverlap": 100},
{"chunkSize": 256, "chunkOverlap": 25}
]
for config in configs:
data = {"name": "Chunk Test KB", **config}
response = client.post("/api/knowledge/bases", json=data)
assert response.status_code == 200
def test_knowledge_base_with_documents(self, client):
"""Test creating KB and adding multiple documents"""
# Create KB
create_data = {"name": "KB with Multiple Docs"}
create_response = client.post("/api/knowledge/bases", json=create_data)
kb_id = create_response.json()["id"]
# Add multiple documents
for i in range(3):
doc_data = {
"name": f"document-{i}.txt",
"size": f"{1000 + i * 100}",
"fileType": "txt"
}
client.post(
f"/api/knowledge/bases/{kb_id}/documents",
json=doc_data
)
# Verify documents are listed
response = client.get(f"/api/knowledge/bases/{kb_id}")
assert response.status_code == 200
data = response.json()
assert len(data["documents"]) == 3