Update llm library web interface

This commit is contained in:
Xin Wang
2026-02-08 23:55:40 +08:00
parent 6462c4f432
commit be68e335f1
6 changed files with 614 additions and 187 deletions

View File

@@ -10,7 +10,7 @@ from ..db import get_db
from ..models import LLMModel
from ..schemas import (
LLMModelCreate, LLMModelUpdate, LLMModelOut,
LLMModelTestResponse
LLMModelTestResponse, LLMPreviewRequest, LLMPreviewResponse
)
router = APIRouter(prefix="/llm", tags=["LLM Models"])
@@ -204,3 +204,66 @@ def chat_with_llm(
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.post("/{id}/preview", response_model=LLMPreviewResponse)
def preview_llm_model(
id: str,
request: LLMPreviewRequest,
db: Session = Depends(get_db)
):
"""预览 LLM 输出,基于 OpenAI-compatible /chat/completions。"""
model = db.query(LLMModel).filter(LLMModel.id == id).first()
if not model:
raise HTTPException(status_code=404, detail="LLM Model not found")
user_message = (request.message or "").strip()
if not user_message:
raise HTTPException(status_code=400, detail="Preview message cannot be empty")
messages = []
if request.system_prompt and request.system_prompt.strip():
messages.append({"role": "system", "content": request.system_prompt.strip()})
messages.append({"role": "user", "content": user_message})
payload = {
"model": model.model_name or "gpt-3.5-turbo",
"messages": messages,
"max_tokens": request.max_tokens or 512,
"temperature": request.temperature if request.temperature is not None else (model.temperature or 0.7),
}
headers = {"Authorization": f"Bearer {(request.api_key or model.api_key).strip()}"}
start_time = time.time()
try:
with httpx.Client(timeout=60.0) as client:
response = client.post(
f"{model.base_url.rstrip('/')}/chat/completions",
json=payload,
headers=headers
)
except Exception as exc:
raise HTTPException(status_code=502, detail=f"LLM request failed: {exc}") from exc
if response.status_code != 200:
detail = response.text
try:
detail_json = response.json()
detail = detail_json.get("error", {}).get("message") or detail_json.get("detail") or detail
except Exception:
pass
raise HTTPException(status_code=502, detail=f"LLM vendor error: {detail}")
result = response.json()
reply = ""
choices = result.get("choices", [])
if choices:
reply = choices[0].get("message", {}).get("content", "") or ""
return LLMPreviewResponse(
success=bool(reply),
reply=reply,
usage=result.get("usage"),
latency_ms=int((time.time() - start_time) * 1000),
error=None if reply else "No response content",
)