Update llm library web interface

2026-02-08 23:55:40 +08:00
parent 6462c4f432
commit be68e335f1
6 changed files with 614 additions and 187 deletions
--- a/api/app/routers/llm.py
+++ b/api/app/routers/llm.py
@@ -10,7 +10,7 @@ from ..db import get_db
 from ..models import LLMModel
 from ..schemas import (
    LLMModelCreate, LLMModelUpdate, LLMModelOut,
-    LLMModelTestResponse
+    LLMModelTestResponse, LLMPreviewRequest, LLMPreviewResponse
 )

 router = APIRouter(prefix="/llm", tags=["LLM Models"])
@@ -204,3 +204,66 @@ def chat_with_llm(

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/{id}/preview", response_model=LLMPreviewResponse)
+def preview_llm_model(
+    id: str,
+    request: LLMPreviewRequest,
+    db: Session = Depends(get_db)
+):
+    """预览 LLM 输出，基于 OpenAI-compatible /chat/completions。"""
+    model = db.query(LLMModel).filter(LLMModel.id == id).first()
+    if not model:
+        raise HTTPException(status_code=404, detail="LLM Model not found")
+
+    user_message = (request.message or "").strip()
+    if not user_message:
+        raise HTTPException(status_code=400, detail="Preview message cannot be empty")
+
+    messages = []
+    if request.system_prompt and request.system_prompt.strip():
+        messages.append({"role": "system", "content": request.system_prompt.strip()})
+    messages.append({"role": "user", "content": user_message})
+
+    payload = {
+        "model": model.model_name or "gpt-3.5-turbo",
+        "messages": messages,
+        "max_tokens": request.max_tokens or 512,
+        "temperature": request.temperature if request.temperature is not None else (model.temperature or 0.7),
+    }
+    headers = {"Authorization": f"Bearer {(request.api_key or model.api_key).strip()}"}
+
+    start_time = time.time()
+    try:
+        with httpx.Client(timeout=60.0) as client:
+            response = client.post(
+                f"{model.base_url.rstrip('/')}/chat/completions",
+                json=payload,
+                headers=headers
+            )
+    except Exception as exc:
+        raise HTTPException(status_code=502, detail=f"LLM request failed: {exc}") from exc
+
+    if response.status_code != 200:
+        detail = response.text
+        try:
+            detail_json = response.json()
+            detail = detail_json.get("error", {}).get("message") or detail_json.get("detail") or detail
+        except Exception:
+            pass
+        raise HTTPException(status_code=502, detail=f"LLM vendor error: {detail}")
+
+    result = response.json()
+    reply = ""
+    choices = result.get("choices", [])
+    if choices:
+        reply = choices[0].get("message", {}).get("content", "") or ""
+
+    return LLMPreviewResponse(
+        success=bool(reply),
+        reply=reply,
+        usage=result.get("usage"),
+        latency_ms=int((time.time() - start_time) * 1000),
+        error=None if reply else "No response content",
+    )