Update llm library web interface
This commit is contained in:
@@ -10,7 +10,7 @@ from ..db import get_db
|
||||
from ..models import LLMModel
|
||||
from ..schemas import (
|
||||
LLMModelCreate, LLMModelUpdate, LLMModelOut,
|
||||
LLMModelTestResponse
|
||||
LLMModelTestResponse, LLMPreviewRequest, LLMPreviewResponse
|
||||
)
|
||||
|
||||
router = APIRouter(prefix="/llm", tags=["LLM Models"])
|
||||
@@ -204,3 +204,66 @@ def chat_with_llm(
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/{id}/preview", response_model=LLMPreviewResponse)
|
||||
def preview_llm_model(
|
||||
id: str,
|
||||
request: LLMPreviewRequest,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""预览 LLM 输出,基于 OpenAI-compatible /chat/completions。"""
|
||||
model = db.query(LLMModel).filter(LLMModel.id == id).first()
|
||||
if not model:
|
||||
raise HTTPException(status_code=404, detail="LLM Model not found")
|
||||
|
||||
user_message = (request.message or "").strip()
|
||||
if not user_message:
|
||||
raise HTTPException(status_code=400, detail="Preview message cannot be empty")
|
||||
|
||||
messages = []
|
||||
if request.system_prompt and request.system_prompt.strip():
|
||||
messages.append({"role": "system", "content": request.system_prompt.strip()})
|
||||
messages.append({"role": "user", "content": user_message})
|
||||
|
||||
payload = {
|
||||
"model": model.model_name or "gpt-3.5-turbo",
|
||||
"messages": messages,
|
||||
"max_tokens": request.max_tokens or 512,
|
||||
"temperature": request.temperature if request.temperature is not None else (model.temperature or 0.7),
|
||||
}
|
||||
headers = {"Authorization": f"Bearer {(request.api_key or model.api_key).strip()}"}
|
||||
|
||||
start_time = time.time()
|
||||
try:
|
||||
with httpx.Client(timeout=60.0) as client:
|
||||
response = client.post(
|
||||
f"{model.base_url.rstrip('/')}/chat/completions",
|
||||
json=payload,
|
||||
headers=headers
|
||||
)
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=502, detail=f"LLM request failed: {exc}") from exc
|
||||
|
||||
if response.status_code != 200:
|
||||
detail = response.text
|
||||
try:
|
||||
detail_json = response.json()
|
||||
detail = detail_json.get("error", {}).get("message") or detail_json.get("detail") or detail
|
||||
except Exception:
|
||||
pass
|
||||
raise HTTPException(status_code=502, detail=f"LLM vendor error: {detail}")
|
||||
|
||||
result = response.json()
|
||||
reply = ""
|
||||
choices = result.get("choices", [])
|
||||
if choices:
|
||||
reply = choices[0].get("message", {}).get("content", "") or ""
|
||||
|
||||
return LLMPreviewResponse(
|
||||
success=bool(reply),
|
||||
reply=reply,
|
||||
usage=result.get("usage"),
|
||||
latency_ms=int((time.time() - start_time) * 1000),
|
||||
error=None if reply else "No response content",
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user