279 lines
11 KiB
Python
279 lines
11 KiB
Python
"""Tests for Knowledge Base API endpoints"""
|
|
import pytest
|
|
import uuid
|
|
from app.models import KnowledgeBase
|
|
|
|
|
|
class TestKnowledgeAPI:
|
|
"""Test cases for Knowledge Base endpoints"""
|
|
|
|
def test_get_knowledge_bases_empty(self, client):
|
|
"""Test getting knowledge bases when database is empty"""
|
|
response = client.get("/api/knowledge/bases")
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert "total" in data
|
|
assert "list" in data
|
|
|
|
def test_create_knowledge_base(self, client):
|
|
"""Test creating a new knowledge base"""
|
|
data = {
|
|
"name": "Test Knowledge Base",
|
|
"description": "A test knowledge base",
|
|
"embeddingModel": "text-embedding-3-small",
|
|
"chunkSize": 500,
|
|
"chunkOverlap": 50
|
|
}
|
|
response = client.post("/api/knowledge/bases", json=data)
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert data["name"] == "Test Knowledge Base"
|
|
assert data["description"] == "A test knowledge base"
|
|
assert data["embeddingModel"] == "text-embedding-3-small"
|
|
assert "id" in data
|
|
assert data["docCount"] == 0
|
|
assert data["chunkCount"] == 0
|
|
assert data["status"] == "active"
|
|
|
|
def test_create_knowledge_base_minimal(self, client):
|
|
"""Test creating a knowledge base with minimal data"""
|
|
data = {"name": "Minimal KB"}
|
|
response = client.post("/api/knowledge/bases", json=data)
|
|
assert response.status_code == 200
|
|
assert response.json()["name"] == "Minimal KB"
|
|
|
|
def test_get_knowledge_base_by_id(self, client):
|
|
"""Test getting a specific knowledge base by ID"""
|
|
# Create first
|
|
create_data = {"name": "Test KB"}
|
|
create_response = client.post("/api/knowledge/bases", json=create_data)
|
|
kb_id = create_response.json()["id"]
|
|
|
|
# Get by ID
|
|
response = client.get(f"/api/knowledge/bases/{kb_id}")
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert data["id"] == kb_id
|
|
assert data["name"] == "Test KB"
|
|
|
|
def test_get_knowledge_base_not_found(self, client):
|
|
"""Test getting a non-existent knowledge base"""
|
|
response = client.get("/api/knowledge/bases/non-existent-id")
|
|
assert response.status_code == 404
|
|
|
|
def test_update_knowledge_base(self, client):
|
|
"""Test updating a knowledge base"""
|
|
# Create first
|
|
create_data = {"name": "Original Name"}
|
|
create_response = client.post("/api/knowledge/bases", json=create_data)
|
|
kb_id = create_response.json()["id"]
|
|
|
|
# Update
|
|
update_data = {
|
|
"name": "Updated Name",
|
|
"description": "Updated description",
|
|
"chunkSize": 800
|
|
}
|
|
response = client.put(f"/api/knowledge/bases/{kb_id}", json=update_data)
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert data["name"] == "Updated Name"
|
|
assert data["description"] == "Updated description"
|
|
assert data["chunkSize"] == 800
|
|
|
|
def test_delete_knowledge_base(self, client):
|
|
"""Test deleting a knowledge base"""
|
|
# Create first
|
|
create_data = {"name": "To Delete"}
|
|
create_response = client.post("/api/knowledge/bases", json=create_data)
|
|
kb_id = create_response.json()["id"]
|
|
|
|
# Delete
|
|
response = client.delete(f"/api/knowledge/bases/{kb_id}")
|
|
assert response.status_code == 200
|
|
|
|
# Verify deleted
|
|
get_response = client.get(f"/api/knowledge/bases/{kb_id}")
|
|
assert get_response.status_code == 404
|
|
|
|
def test_upload_document(self, client):
|
|
"""Test uploading a document to knowledge base"""
|
|
# Create KB first
|
|
create_data = {"name": "Test KB for Docs"}
|
|
create_response = client.post("/api/knowledge/bases", json=create_data)
|
|
kb_id = create_response.json()["id"]
|
|
|
|
# Upload document
|
|
doc_data = {
|
|
"name": "test-document.txt",
|
|
"size": "1024",
|
|
"fileType": "txt",
|
|
"storageUrl": "https://storage.example.com/test-document.txt"
|
|
}
|
|
response = client.post(
|
|
f"/api/knowledge/bases/{kb_id}/documents",
|
|
json=doc_data
|
|
)
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert data["name"] == "test-document.txt"
|
|
assert "id" in data
|
|
assert data["status"] == "pending"
|
|
|
|
def test_delete_document(self, client):
|
|
"""Test deleting a document from knowledge base"""
|
|
# Create KB first
|
|
create_data = {"name": "Test KB for Delete"}
|
|
create_response = client.post("/api/knowledge/bases", json=create_data)
|
|
kb_id = create_response.json()["id"]
|
|
|
|
# Upload document
|
|
doc_data = {"name": "to-delete.txt", "size": "100", "fileType": "txt"}
|
|
upload_response = client.post(
|
|
f"/api/knowledge/bases/{kb_id}/documents",
|
|
json=doc_data
|
|
)
|
|
doc_id = upload_response.json()["id"]
|
|
|
|
# Delete document
|
|
response = client.delete(
|
|
f"/api/knowledge/bases/{kb_id}/documents/{doc_id}"
|
|
)
|
|
assert response.status_code == 200
|
|
|
|
def test_index_document(self, client):
|
|
"""Test indexing a document"""
|
|
# Create KB first
|
|
create_data = {"name": "Test KB for Index"}
|
|
create_response = client.post("/api/knowledge/bases", json=create_data)
|
|
kb_id = create_response.json()["id"]
|
|
|
|
# Index document
|
|
index_data = {
|
|
"document_id": "doc-001",
|
|
"content": "This is the content to index. It contains important information about the product."
|
|
}
|
|
response = client.post(
|
|
f"/api/knowledge/bases/{kb_id}/documents/doc-001/index",
|
|
json=index_data
|
|
)
|
|
# This might return 200 or error depending on vector store implementation
|
|
assert response.status_code in [200, 500]
|
|
|
|
def test_search_knowledge(self, client):
|
|
"""Test searching knowledge base"""
|
|
# Create KB first
|
|
create_data = {"name": "Test KB for Search"}
|
|
create_response = client.post("/api/knowledge/bases", json=create_data)
|
|
kb_id = create_response.json()["id"]
|
|
|
|
# Search (this may fail without indexed content)
|
|
search_data = {
|
|
"query": "test query",
|
|
"kb_id": kb_id,
|
|
"nResults": 5
|
|
}
|
|
response = client.post("/api/knowledge/search", json=search_data)
|
|
# This might return 200 or error depending on implementation
|
|
assert response.status_code in [200, 500]
|
|
|
|
def test_get_knowledge_stats(self, client):
|
|
"""Test getting knowledge base statistics"""
|
|
# Create KB first
|
|
create_data = {"name": "Test KB for Stats"}
|
|
create_response = client.post("/api/knowledge/bases", json=create_data)
|
|
kb_id = create_response.json()["id"]
|
|
|
|
response = client.get(f"/api/knowledge/bases/{kb_id}/stats")
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert data["kb_id"] == kb_id
|
|
assert "docCount" in data
|
|
assert "chunkCount" in data
|
|
|
|
def test_knowledge_bases_pagination(self, client):
|
|
"""Test knowledge bases pagination"""
|
|
# Create multiple KBs
|
|
for i in range(5):
|
|
data = {"name": f"Knowledge Base {i}"}
|
|
client.post("/api/knowledge/bases", json=data)
|
|
|
|
# Test pagination
|
|
response = client.get("/api/knowledge/bases?page=1&limit=3")
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert data["total"] == 5
|
|
assert len(data["list"]) == 3
|
|
|
|
def test_different_embedding_models(self, client):
|
|
"""Test creating KB with different embedding models"""
|
|
models = [
|
|
"text-embedding-3-small",
|
|
"text-embedding-3-large",
|
|
"bge-small-zh"
|
|
]
|
|
for model in models:
|
|
data = {"name": f"KB with {model}", "embeddingModel": model}
|
|
response = client.post("/api/knowledge/bases", json=data)
|
|
assert response.status_code == 200
|
|
assert response.json()["embeddingModel"] == model
|
|
|
|
def test_different_chunk_sizes(self, client):
|
|
"""Test creating KB with different chunk configurations"""
|
|
configs = [
|
|
{"chunkSize": 500, "chunkOverlap": 50},
|
|
{"chunkSize": 1000, "chunkOverlap": 100},
|
|
{"chunkSize": 256, "chunkOverlap": 25}
|
|
]
|
|
for idx, config in enumerate(configs):
|
|
data = {"name": f"Chunk Test KB {idx}", **config}
|
|
response = client.post("/api/knowledge/bases", json=data)
|
|
assert response.status_code == 200
|
|
|
|
def test_knowledge_base_with_documents(self, client):
|
|
"""Test creating KB and adding multiple documents"""
|
|
# Create KB
|
|
create_data = {"name": "KB with Multiple Docs"}
|
|
create_response = client.post("/api/knowledge/bases", json=create_data)
|
|
kb_id = create_response.json()["id"]
|
|
|
|
# Add multiple documents
|
|
for i in range(3):
|
|
doc_data = {
|
|
"name": f"document-{i}.txt",
|
|
"size": f"{1000 + i * 100}",
|
|
"fileType": "txt"
|
|
}
|
|
client.post(
|
|
f"/api/knowledge/bases/{kb_id}/documents",
|
|
json=doc_data
|
|
)
|
|
|
|
# Verify documents are listed
|
|
response = client.get(f"/api/knowledge/bases/{kb_id}")
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert len(data["documents"]) == 3
|
|
|
|
def test_create_knowledge_base_duplicate_name(self, client):
|
|
"""Test duplicate KB names are rejected for same user."""
|
|
payload = {"name": "Duplicate KB"}
|
|
first = client.post("/api/knowledge/bases", json=payload)
|
|
assert first.status_code == 200
|
|
|
|
second = client.post("/api/knowledge/bases", json=payload)
|
|
assert second.status_code == 400
|
|
|
|
def test_update_embedding_model_blocked_when_chunks_exist(self, client, db_session):
|
|
"""Test embedding model change is blocked after indexing chunks."""
|
|
create_resp = client.post("/api/knowledge/bases", json={"name": "KB Embedding Lock"})
|
|
assert create_resp.status_code == 200
|
|
kb_id = create_resp.json()["id"]
|
|
|
|
kb = db_session.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id).first()
|
|
kb.chunk_count = 5
|
|
db_session.commit()
|
|
|
|
update_resp = client.put(f"/api/knowledge/bases/{kb_id}", json={"embeddingModel": "text-embedding-3-large"})
|
|
assert update_resp.status_code == 400
|