Add embedding preview
This commit is contained in:
@@ -214,7 +214,7 @@ def preview_llm_model(
|
|||||||
request: LLMPreviewRequest,
|
request: LLMPreviewRequest,
|
||||||
db: Session = Depends(get_db)
|
db: Session = Depends(get_db)
|
||||||
):
|
):
|
||||||
"""预览 LLM 输出,基于 OpenAI-compatible /chat/completions。"""
|
"""预览模型输出,支持 text(chat) 与 embedding 两类模型。"""
|
||||||
model = db.query(LLMModel).filter(LLMModel.id == id).first()
|
model = db.query(LLMModel).filter(LLMModel.id == id).first()
|
||||||
if not model:
|
if not model:
|
||||||
raise HTTPException(status_code=404, detail="LLM Model not found")
|
raise HTTPException(status_code=404, detail="LLM Model not found")
|
||||||
@@ -223,24 +223,35 @@ def preview_llm_model(
|
|||||||
if not user_message:
|
if not user_message:
|
||||||
raise HTTPException(status_code=400, detail="Preview message cannot be empty")
|
raise HTTPException(status_code=400, detail="Preview message cannot be empty")
|
||||||
|
|
||||||
messages = []
|
model_id = model.model_name or "gpt-3.5-turbo"
|
||||||
if request.system_prompt and request.system_prompt.strip():
|
|
||||||
messages.append({"role": "system", "content": request.system_prompt.strip()})
|
|
||||||
messages.append({"role": "user", "content": user_message})
|
|
||||||
|
|
||||||
payload = {
|
|
||||||
"model": model.model_name or "gpt-3.5-turbo",
|
|
||||||
"messages": messages,
|
|
||||||
"max_tokens": request.max_tokens or 512,
|
|
||||||
"temperature": request.temperature if request.temperature is not None else (model.temperature or 0.7),
|
|
||||||
}
|
|
||||||
headers = {"Authorization": f"Bearer {(request.api_key or model.api_key).strip()}"}
|
headers = {"Authorization": f"Bearer {(request.api_key or model.api_key).strip()}"}
|
||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
endpoint = "/chat/completions"
|
||||||
|
payload = {}
|
||||||
|
|
||||||
|
if model.type == "embedding":
|
||||||
|
endpoint = "/embeddings"
|
||||||
|
payload = {
|
||||||
|
"model": model_id,
|
||||||
|
"input": user_message,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
messages = []
|
||||||
|
if request.system_prompt and request.system_prompt.strip():
|
||||||
|
messages.append({"role": "system", "content": request.system_prompt.strip()})
|
||||||
|
messages.append({"role": "user", "content": user_message})
|
||||||
|
payload = {
|
||||||
|
"model": model_id,
|
||||||
|
"messages": messages,
|
||||||
|
"max_tokens": request.max_tokens or 512,
|
||||||
|
"temperature": request.temperature if request.temperature is not None else (model.temperature or 0.7),
|
||||||
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with httpx.Client(timeout=60.0) as client:
|
with httpx.Client(timeout=60.0) as client:
|
||||||
response = client.post(
|
response = client.post(
|
||||||
f"{model.base_url.rstrip('/')}/chat/completions",
|
f"{model.base_url.rstrip('/')}{endpoint}",
|
||||||
json=payload,
|
json=payload,
|
||||||
headers=headers
|
headers=headers
|
||||||
)
|
)
|
||||||
@@ -258,9 +269,23 @@ def preview_llm_model(
|
|||||||
|
|
||||||
result = response.json()
|
result = response.json()
|
||||||
reply = ""
|
reply = ""
|
||||||
choices = result.get("choices", [])
|
if model.type == "embedding":
|
||||||
if choices:
|
data_list = result.get("data", [])
|
||||||
reply = choices[0].get("message", {}).get("content", "") or ""
|
embedding = []
|
||||||
|
if data_list and isinstance(data_list, list):
|
||||||
|
embedding = data_list[0].get("embedding", []) or []
|
||||||
|
dims = len(embedding) if isinstance(embedding, list) else 0
|
||||||
|
preview_values = []
|
||||||
|
if isinstance(embedding, list):
|
||||||
|
preview_values = embedding[:8]
|
||||||
|
values_text = ", ".join(
|
||||||
|
[f"{float(v):.6f}" if isinstance(v, (float, int)) else str(v) for v in preview_values]
|
||||||
|
)
|
||||||
|
reply = f"Embedding generated successfully. dims={dims}. head=[{values_text}]"
|
||||||
|
else:
|
||||||
|
choices = result.get("choices", [])
|
||||||
|
if choices:
|
||||||
|
reply = choices[0].get("message", {}).get("content", "") or ""
|
||||||
|
|
||||||
return LLMPreviewResponse(
|
return LLMPreviewResponse(
|
||||||
success=bool(reply),
|
success=bool(reply),
|
||||||
|
|||||||
@@ -300,3 +300,53 @@ class TestLLMModelAPI:
|
|||||||
|
|
||||||
response = client.post(f"/api/llm/{model_id}/preview", json={"message": " "})
|
response = client.post(f"/api/llm/{model_id}/preview", json={"message": " "})
|
||||||
assert response.status_code == 400
|
assert response.status_code == 400
|
||||||
|
|
||||||
|
def test_preview_embedding_model_success(self, client, monkeypatch):
|
||||||
|
"""Test embedding model preview endpoint returns embedding summary."""
|
||||||
|
from app.routers import llm as llm_router
|
||||||
|
|
||||||
|
embedding_model_data = {
|
||||||
|
"id": "preview-emb",
|
||||||
|
"name": "Preview Embedding",
|
||||||
|
"vendor": "OpenAI",
|
||||||
|
"type": "embedding",
|
||||||
|
"base_url": "https://api.openai.com/v1",
|
||||||
|
"api_key": "test-key",
|
||||||
|
"model_name": "text-embedding-3-small"
|
||||||
|
}
|
||||||
|
create_response = client.post("/api/llm", json=embedding_model_data)
|
||||||
|
model_id = create_response.json()["id"]
|
||||||
|
|
||||||
|
class DummyResponse:
|
||||||
|
status_code = 200
|
||||||
|
|
||||||
|
def json(self):
|
||||||
|
return {"data": [{"embedding": [0.1, 0.2, 0.3, 0.4]}], "usage": {"total_tokens": 7}}
|
||||||
|
|
||||||
|
@property
|
||||||
|
def text(self):
|
||||||
|
return '{"ok":true}'
|
||||||
|
|
||||||
|
class DummyClient:
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc, tb):
|
||||||
|
return False
|
||||||
|
|
||||||
|
def post(self, url, json=None, headers=None):
|
||||||
|
assert url.endswith("/embeddings")
|
||||||
|
assert json["input"] == "hello embedding"
|
||||||
|
assert headers["Authorization"] == "Bearer test-key"
|
||||||
|
return DummyResponse()
|
||||||
|
|
||||||
|
monkeypatch.setattr(llm_router.httpx, "Client", DummyClient)
|
||||||
|
|
||||||
|
response = client.post(f"/api/llm/{model_id}/preview", json={"message": "hello embedding"})
|
||||||
|
assert response.status_code == 200
|
||||||
|
data = response.json()
|
||||||
|
assert data["success"] is True
|
||||||
|
assert "dims=4" in data["reply"]
|
||||||
|
|||||||
@@ -144,7 +144,13 @@ export const LLMLibraryPage: React.FC = () => {
|
|||||||
<TableCell className="font-mono text-xs text-muted-foreground max-w-[240px] truncate">{model.baseUrl}</TableCell>
|
<TableCell className="font-mono text-xs text-muted-foreground max-w-[240px] truncate">{model.baseUrl}</TableCell>
|
||||||
<TableCell className="font-mono text-xs text-muted-foreground">{maskApiKey(model.apiKey)}</TableCell>
|
<TableCell className="font-mono text-xs text-muted-foreground">{maskApiKey(model.apiKey)}</TableCell>
|
||||||
<TableCell className="text-right">
|
<TableCell className="text-right">
|
||||||
<Button variant="ghost" size="icon" onClick={() => setPreviewingModel(model)} disabled={model.type !== 'text'} title={model.type !== 'text' ? '仅 text 模型可预览' : '预览模型'}>
|
<Button
|
||||||
|
variant="ghost"
|
||||||
|
size="icon"
|
||||||
|
onClick={() => setPreviewingModel(model)}
|
||||||
|
disabled={model.type === 'rerank'}
|
||||||
|
title={model.type === 'rerank' ? '暂不支持 rerank 预览' : (model.type === 'embedding' ? '预览 embedding 向量' : '预览模型')}
|
||||||
|
>
|
||||||
<Play className="h-4 w-4" />
|
<Play className="h-4 w-4" />
|
||||||
</Button>
|
</Button>
|
||||||
<Button variant="ghost" size="icon" onClick={() => setEditingModel(model)}>
|
<Button variant="ghost" size="icon" onClick={() => setEditingModel(model)}>
|
||||||
@@ -358,6 +364,7 @@ const LLMPreviewModal: React.FC<{
|
|||||||
onClose: () => void;
|
onClose: () => void;
|
||||||
model: LLMModel | null;
|
model: LLMModel | null;
|
||||||
}> = ({ isOpen, onClose, model }) => {
|
}> = ({ isOpen, onClose, model }) => {
|
||||||
|
const isEmbeddingModel = model?.type === 'embedding';
|
||||||
const [systemPrompt, setSystemPrompt] = useState('You are a concise helpful assistant.');
|
const [systemPrompt, setSystemPrompt] = useState('You are a concise helpful assistant.');
|
||||||
const [message, setMessage] = useState('Hello, please introduce yourself in one sentence.');
|
const [message, setMessage] = useState('Hello, please introduce yourself in one sentence.');
|
||||||
const [temperature, setTemperature] = useState(0.7);
|
const [temperature, setTemperature] = useState(0.7);
|
||||||
@@ -419,28 +426,29 @@ const LLMPreviewModal: React.FC<{
|
|||||||
value={systemPrompt}
|
value={systemPrompt}
|
||||||
onChange={(e) => setSystemPrompt(e.target.value)}
|
onChange={(e) => setSystemPrompt(e.target.value)}
|
||||||
className="flex min-h-[70px] w-full rounded-md border-0 bg-white/5 px-3 py-2 text-sm shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-white"
|
className="flex min-h-[70px] w-full rounded-md border-0 bg-white/5 px-3 py-2 text-sm shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-white"
|
||||||
placeholder="可选系统提示词"
|
placeholder={isEmbeddingModel ? 'embedding 预览无需 system prompt(可留空)' : '可选系统提示词'}
|
||||||
|
disabled={isEmbeddingModel}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div className="space-y-1.5">
|
<div className="space-y-1.5">
|
||||||
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">User Message</label>
|
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">{isEmbeddingModel ? 'Input Text' : 'User Message'}</label>
|
||||||
<textarea
|
<textarea
|
||||||
value={message}
|
value={message}
|
||||||
onChange={(e) => setMessage(e.target.value)}
|
onChange={(e) => setMessage(e.target.value)}
|
||||||
className="flex min-h-[90px] w-full rounded-md border-0 bg-white/5 px-3 py-2 text-sm shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-white"
|
className="flex min-h-[90px] w-full rounded-md border-0 bg-white/5 px-3 py-2 text-sm shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-white"
|
||||||
placeholder="输入用户消息"
|
placeholder={isEmbeddingModel ? '输入需要生成向量的文本' : '输入用户消息'}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div className="grid grid-cols-2 gap-4">
|
<div className="grid grid-cols-2 gap-4">
|
||||||
<div className="space-y-1.5">
|
<div className="space-y-1.5">
|
||||||
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">Temperature</label>
|
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">Temperature</label>
|
||||||
<Input type="number" min={0} max={2} step={0.1} value={temperature} onChange={(e) => setTemperature(parseFloat(e.target.value || '0'))} />
|
<Input type="number" min={0} max={2} step={0.1} value={temperature} onChange={(e) => setTemperature(parseFloat(e.target.value || '0'))} disabled={isEmbeddingModel} />
|
||||||
</div>
|
</div>
|
||||||
<div className="space-y-1.5">
|
<div className="space-y-1.5">
|
||||||
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">Max Tokens</label>
|
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">Max Tokens</label>
|
||||||
<Input type="number" min={1} value={maxTokens} onChange={(e) => setMaxTokens(parseInt(e.target.value || '1', 10))} />
|
<Input type="number" min={1} value={maxTokens} onChange={(e) => setMaxTokens(parseInt(e.target.value || '1', 10))} disabled={isEmbeddingModel} />
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user