Update llm library web interface

This commit is contained in:
Xin Wang
2026-02-08 23:55:40 +08:00
parent 6462c4f432
commit be68e335f1
6 changed files with 614 additions and 187 deletions

View File

@@ -10,7 +10,7 @@ from ..db import get_db
from ..models import LLMModel
from ..schemas import (
LLMModelCreate, LLMModelUpdate, LLMModelOut,
LLMModelTestResponse
LLMModelTestResponse, LLMPreviewRequest, LLMPreviewResponse
)
router = APIRouter(prefix="/llm", tags=["LLM Models"])
@@ -204,3 +204,66 @@ def chat_with_llm(
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.post("/{id}/preview", response_model=LLMPreviewResponse)
def preview_llm_model(
id: str,
request: LLMPreviewRequest,
db: Session = Depends(get_db)
):
"""预览 LLM 输出,基于 OpenAI-compatible /chat/completions。"""
model = db.query(LLMModel).filter(LLMModel.id == id).first()
if not model:
raise HTTPException(status_code=404, detail="LLM Model not found")
user_message = (request.message or "").strip()
if not user_message:
raise HTTPException(status_code=400, detail="Preview message cannot be empty")
messages = []
if request.system_prompt and request.system_prompt.strip():
messages.append({"role": "system", "content": request.system_prompt.strip()})
messages.append({"role": "user", "content": user_message})
payload = {
"model": model.model_name or "gpt-3.5-turbo",
"messages": messages,
"max_tokens": request.max_tokens or 512,
"temperature": request.temperature if request.temperature is not None else (model.temperature or 0.7),
}
headers = {"Authorization": f"Bearer {(request.api_key or model.api_key).strip()}"}
start_time = time.time()
try:
with httpx.Client(timeout=60.0) as client:
response = client.post(
f"{model.base_url.rstrip('/')}/chat/completions",
json=payload,
headers=headers
)
except Exception as exc:
raise HTTPException(status_code=502, detail=f"LLM request failed: {exc}") from exc
if response.status_code != 200:
detail = response.text
try:
detail_json = response.json()
detail = detail_json.get("error", {}).get("message") or detail_json.get("detail") or detail
except Exception:
pass
raise HTTPException(status_code=502, detail=f"LLM vendor error: {detail}")
result = response.json()
reply = ""
choices = result.get("choices", [])
if choices:
reply = choices[0].get("message", {}).get("content", "") or ""
return LLMPreviewResponse(
success=bool(reply),
reply=reply,
usage=result.get("usage"),
latency_ms=int((time.time() - start_time) * 1000),
error=None if reply else "No response content",
)

View File

@@ -153,6 +153,22 @@ class LLMModelTestResponse(BaseModel):
message: Optional[str] = None
class LLMPreviewRequest(BaseModel):
message: str
system_prompt: Optional[str] = None
max_tokens: Optional[int] = None
temperature: Optional[float] = None
api_key: Optional[str] = None
class LLMPreviewResponse(BaseModel):
success: bool
reply: Optional[str] = None
usage: Optional[dict] = None
latency_ms: Optional[int] = None
error: Optional[str] = None
# ============ ASR Model ============
class ASRModelBase(BaseModel):
name: str

View File

@@ -244,3 +244,55 @@ class TestLLMModelAPI:
response = client.post("/api/llm", json=data)
assert response.status_code == 200
assert response.json()["type"] == "embedding"
def test_preview_llm_model_success(self, client, sample_llm_model_data, monkeypatch):
"""Test LLM preview endpoint returns model reply."""
from app.routers import llm as llm_router
create_response = client.post("/api/llm", json=sample_llm_model_data)
model_id = create_response.json()["id"]
class DummyResponse:
status_code = 200
def json(self):
return {
"choices": [{"message": {"content": "Preview OK"}}],
"usage": {"prompt_tokens": 10, "completion_tokens": 2, "total_tokens": 12}
}
@property
def text(self):
return '{"ok":true}'
class DummyClient:
def __init__(self, *args, **kwargs):
pass
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def post(self, url, json=None, headers=None):
assert url.endswith("/chat/completions")
assert headers["Authorization"] == f"Bearer {sample_llm_model_data['api_key']}"
assert json["messages"][0]["role"] == "user"
return DummyResponse()
monkeypatch.setattr(llm_router.httpx, "Client", DummyClient)
response = client.post(f"/api/llm/{model_id}/preview", json={"message": "hello"})
assert response.status_code == 200
data = response.json()
assert data["success"] is True
assert data["reply"] == "Preview OK"
def test_preview_llm_model_reject_empty_message(self, client, sample_llm_model_data):
"""Test LLM preview endpoint validates message."""
create_response = client.post("/api/llm", json=sample_llm_model_data)
model_id = create_response.json()["id"]
response = client.post(f"/api/llm/{model_id}/preview", json={"message": " "})
assert response.status_code == 400