Add embedding preview
This commit is contained in:
@@ -214,7 +214,7 @@ def preview_llm_model(
|
||||
request: LLMPreviewRequest,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""预览 LLM 输出,基于 OpenAI-compatible /chat/completions。"""
|
||||
"""预览模型输出,支持 text(chat) 与 embedding 两类模型。"""
|
||||
model = db.query(LLMModel).filter(LLMModel.id == id).first()
|
||||
if not model:
|
||||
raise HTTPException(status_code=404, detail="LLM Model not found")
|
||||
@@ -223,24 +223,35 @@ def preview_llm_model(
|
||||
if not user_message:
|
||||
raise HTTPException(status_code=400, detail="Preview message cannot be empty")
|
||||
|
||||
messages = []
|
||||
if request.system_prompt and request.system_prompt.strip():
|
||||
messages.append({"role": "system", "content": request.system_prompt.strip()})
|
||||
messages.append({"role": "user", "content": user_message})
|
||||
|
||||
payload = {
|
||||
"model": model.model_name or "gpt-3.5-turbo",
|
||||
"messages": messages,
|
||||
"max_tokens": request.max_tokens or 512,
|
||||
"temperature": request.temperature if request.temperature is not None else (model.temperature or 0.7),
|
||||
}
|
||||
model_id = model.model_name or "gpt-3.5-turbo"
|
||||
headers = {"Authorization": f"Bearer {(request.api_key or model.api_key).strip()}"}
|
||||
|
||||
start_time = time.time()
|
||||
endpoint = "/chat/completions"
|
||||
payload = {}
|
||||
|
||||
if model.type == "embedding":
|
||||
endpoint = "/embeddings"
|
||||
payload = {
|
||||
"model": model_id,
|
||||
"input": user_message,
|
||||
}
|
||||
else:
|
||||
messages = []
|
||||
if request.system_prompt and request.system_prompt.strip():
|
||||
messages.append({"role": "system", "content": request.system_prompt.strip()})
|
||||
messages.append({"role": "user", "content": user_message})
|
||||
payload = {
|
||||
"model": model_id,
|
||||
"messages": messages,
|
||||
"max_tokens": request.max_tokens or 512,
|
||||
"temperature": request.temperature if request.temperature is not None else (model.temperature or 0.7),
|
||||
}
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=60.0) as client:
|
||||
response = client.post(
|
||||
f"{model.base_url.rstrip('/')}/chat/completions",
|
||||
f"{model.base_url.rstrip('/')}{endpoint}",
|
||||
json=payload,
|
||||
headers=headers
|
||||
)
|
||||
@@ -258,9 +269,23 @@ def preview_llm_model(
|
||||
|
||||
result = response.json()
|
||||
reply = ""
|
||||
choices = result.get("choices", [])
|
||||
if choices:
|
||||
reply = choices[0].get("message", {}).get("content", "") or ""
|
||||
if model.type == "embedding":
|
||||
data_list = result.get("data", [])
|
||||
embedding = []
|
||||
if data_list and isinstance(data_list, list):
|
||||
embedding = data_list[0].get("embedding", []) or []
|
||||
dims = len(embedding) if isinstance(embedding, list) else 0
|
||||
preview_values = []
|
||||
if isinstance(embedding, list):
|
||||
preview_values = embedding[:8]
|
||||
values_text = ", ".join(
|
||||
[f"{float(v):.6f}" if isinstance(v, (float, int)) else str(v) for v in preview_values]
|
||||
)
|
||||
reply = f"Embedding generated successfully. dims={dims}. head=[{values_text}]"
|
||||
else:
|
||||
choices = result.get("choices", [])
|
||||
if choices:
|
||||
reply = choices[0].get("message", {}).get("content", "") or ""
|
||||
|
||||
return LLMPreviewResponse(
|
||||
success=bool(reply),
|
||||
|
||||
Reference in New Issue
Block a user