Update asr library preview

This commit is contained in:
Xin Wang
2026-02-08 23:38:34 +08:00
parent 97e3236e76
commit 4bf2f788ad
5 changed files with 781 additions and 183 deletions

View File

@@ -1,12 +1,11 @@
from fastapi import APIRouter, Depends, HTTPException import os
from sqlalchemy.orm import Session
from typing import List, Optional
import uuid
import httpx
import time import time
import base64 import uuid
import json from typing import List, Optional
from datetime import datetime
import httpx
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
from sqlalchemy.orm import Session
from ..db import get_db from ..db import get_db
from ..models import ASRModel from ..models import ASRModel
@@ -17,6 +16,18 @@ from ..schemas import (
router = APIRouter(prefix="/asr", tags=["ASR Models"]) router = APIRouter(prefix="/asr", tags=["ASR Models"])
SILICONFLOW_DEFAULT_ASR_MODEL = "FunAudioLLM/SenseVoiceSmall"
def _is_siliconflow_vendor(vendor: str) -> bool:
return (vendor or "").strip().lower() in {"siliconflow", "硅基流动"}
def _default_asr_model(vendor: str) -> str:
if _is_siliconflow_vendor(vendor):
return SILICONFLOW_DEFAULT_ASR_MODEL
return "whisper-1"
# ============ ASR Models CRUD ============ # ============ ASR Models CRUD ============
@router.get("") @router.get("")
@@ -219,3 +230,99 @@ def transcribe_audio(
except Exception as e: except Exception as e:
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(e))
@router.post("/{id}/preview", response_model=ASRTestResponse)
async def preview_asr_model(
id: str,
file: UploadFile = File(...),
language: Optional[str] = Form(None),
api_key: Optional[str] = Form(None),
db: Session = Depends(get_db),
):
"""预览 ASR上传音频并调用 OpenAI-compatible /audio/transcriptions。"""
model = db.query(ASRModel).filter(ASRModel.id == id).first()
if not model:
raise HTTPException(status_code=404, detail="ASR Model not found")
if not file:
raise HTTPException(status_code=400, detail="Audio file is required")
filename = file.filename or "preview.wav"
content_type = file.content_type or "application/octet-stream"
if not content_type.startswith("audio/"):
raise HTTPException(status_code=400, detail="Only audio files are supported")
audio_bytes = await file.read()
if not audio_bytes:
raise HTTPException(status_code=400, detail="Uploaded audio file is empty")
effective_api_key = (api_key or "").strip() or (model.api_key or "").strip()
if not effective_api_key and _is_siliconflow_vendor(model.vendor):
effective_api_key = os.getenv("SILICONFLOW_API_KEY", "").strip()
if not effective_api_key:
raise HTTPException(status_code=400, detail=f"API key is required for ASR model: {model.name}")
base_url = (model.base_url or "").strip().rstrip("/")
if not base_url:
raise HTTPException(status_code=400, detail=f"Base URL is required for ASR model: {model.name}")
selected_model = (model.model_name or "").strip() or _default_asr_model(model.vendor)
data = {"model": selected_model}
effective_language = (language or "").strip() or None
if effective_language:
data["language"] = effective_language
if model.hotwords:
data["prompt"] = " ".join(model.hotwords)
headers = {"Authorization": f"Bearer {effective_api_key}"}
files = {"file": (filename, audio_bytes, content_type)}
start_time = time.time()
try:
with httpx.Client(timeout=90.0) as client:
response = client.post(
f"{base_url}/audio/transcriptions",
headers=headers,
data=data,
files=files,
)
except Exception as exc:
raise HTTPException(status_code=502, detail=f"ASR request failed: {exc}") from exc
if response.status_code != 200:
detail = response.text
try:
detail_json = response.json()
detail = detail_json.get("error", {}).get("message") or detail_json.get("detail") or detail
except Exception:
pass
raise HTTPException(status_code=502, detail=f"ASR vendor error: {detail}")
try:
payload = response.json()
except Exception:
payload = {"text": response.text}
transcript = ""
response_language = model.language
confidence = None
if isinstance(payload, dict):
transcript = str(payload.get("text") or payload.get("transcript") or "")
response_language = str(payload.get("language") or effective_language or model.language)
raw_confidence = payload.get("confidence")
if raw_confidence is not None:
try:
confidence = float(raw_confidence)
except (TypeError, ValueError):
confidence = None
latency_ms = int((time.time() - start_time) * 1000)
return ASRTestResponse(
success=bool(transcript),
transcript=transcript,
language=response_language,
confidence=confidence,
latency_ms=latency_ms,
message=None if transcript else "No transcript in response",
)

View File

@@ -287,3 +287,61 @@ class TestASRModelAPI:
response = client.post("/api/asr", json=data) response = client.post("/api/asr", json=data)
assert response.status_code == 200 assert response.status_code == 200
assert response.json()["vendor"] == vendor assert response.json()["vendor"] == vendor
def test_preview_asr_model_success(self, client, sample_asr_model_data, monkeypatch):
"""Test ASR preview endpoint with OpenAI-compatible transcriptions API."""
from app.routers import asr as asr_router
create_response = client.post("/api/asr", json=sample_asr_model_data)
model_id = create_response.json()["id"]
class DummyResponse:
status_code = 200
def json(self):
return {"text": "你好,这是测试转写", "language": "zh", "confidence": 0.98}
@property
def text(self):
return '{"text":"ok"}'
class DummyClient:
def __init__(self, *args, **kwargs):
pass
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def post(self, url, headers=None, data=None, files=None):
assert url.endswith("/audio/transcriptions")
assert headers["Authorization"] == f"Bearer {sample_asr_model_data['api_key']}"
assert data["model"] == sample_asr_model_data["model_name"]
assert files["file"][0] == "sample.wav"
return DummyResponse()
monkeypatch.setattr(asr_router.httpx, "Client", DummyClient)
response = client.post(
f"/api/asr/{model_id}/preview",
files={"file": ("sample.wav", b"fake-wav-bytes", "audio/wav")},
)
assert response.status_code == 200
payload = response.json()
assert payload["success"] is True
assert payload["transcript"] == "你好,这是测试转写"
assert payload["language"] == "zh"
def test_preview_asr_model_reject_non_audio(self, client, sample_asr_model_data):
"""Test ASR preview endpoint rejects non-audio file."""
create_response = client.post("/api/asr", json=sample_asr_model_data)
model_id = create_response.json()["id"]
response = client.post(
f"/api/asr/{model_id}/preview",
files={"file": ("sample.txt", b"text-data", "text/plain")},
)
assert response.status_code == 400
assert "Only audio files are supported" in response.text

View File

@@ -1,103 +1,122 @@
import React, { useEffect, useRef, useState } from 'react';
import React, { useState } from 'react'; import { Search, Filter, Plus, Trash2, Key, Server, Ear, Globe, Languages, Pencil, Mic, Square, Upload } from 'lucide-react';
import { Search, Filter, Plus, Trash2, Key, Server, Ear, Globe, Languages } from 'lucide-react';
import { Button, Input, TableHeader, TableRow, TableHead, TableCell, Dialog, Badge } from '../components/UI'; import { Button, Input, TableHeader, TableRow, TableHead, TableCell, Dialog, Badge } from '../components/UI';
import { mockASRModels } from '../services/mockData';
import { ASRModel } from '../types'; import { ASRModel } from '../types';
import { createASRModel, deleteASRModel, fetchASRModels, previewASRModel, updateASRModel } from '../services/backendApi';
const maskApiKey = (key?: string) => {
if (!key) return '********';
if (key.length < 8) return '********';
return `${key.slice(0, 3)}****${key.slice(-4)}`;
};
const parseHotwords = (value: string): string[] => {
return value
.split(/[\n,]/)
.map((item) => item.trim())
.filter(Boolean);
};
const toHotwordsValue = (hotwords?: string[]): string => (hotwords || []).join(', ');
export const ASRLibraryPage: React.FC = () => { export const ASRLibraryPage: React.FC = () => {
const [models, setModels] = useState<ASRModel[]>(mockASRModels); const [models, setModels] = useState<ASRModel[]>([]);
const [searchTerm, setSearchTerm] = useState(''); const [searchTerm, setSearchTerm] = useState('');
const [vendorFilter, setVendorFilter] = useState<string>('all'); const [vendorFilter, setVendorFilter] = useState<string>('all');
const [langFilter, setLangFilter] = useState<string>('all'); const [langFilter, setLangFilter] = useState<string>('all');
const [isAddModalOpen, setIsAddModalOpen] = useState(false); const [isAddModalOpen, setIsAddModalOpen] = useState(false);
const [editingModel, setEditingModel] = useState<ASRModel | null>(null);
const [previewingModel, setPreviewingModel] = useState<ASRModel | null>(null);
const [isLoading, setIsLoading] = useState(true);
// Form State const loadModels = async () => {
const [newModel, setNewModel] = useState<Partial<ASRModel>>({ setIsLoading(true);
vendor: 'OpenAI Compatible', try {
language: 'zh' setModels(await fetchASRModels());
}); } catch (error) {
console.error(error);
setModels([]);
} finally {
setIsLoading(false);
}
};
const filteredModels = models.filter(m => { useEffect(() => {
const matchesSearch = m.name.toLowerCase().includes(searchTerm.toLowerCase()); loadModels();
}, []);
const filteredModels = models.filter((m) => {
const q = searchTerm.toLowerCase();
const matchesSearch = m.name.toLowerCase().includes(q) || (m.modelName || '').toLowerCase().includes(q);
const matchesVendor = vendorFilter === 'all' || m.vendor === vendorFilter; const matchesVendor = vendorFilter === 'all' || m.vendor === vendorFilter;
const matchesLang = langFilter === 'all' || m.language === langFilter || (langFilter !== 'all' && m.language === 'Multi-lingual'); const matchesLang = langFilter === 'all' || m.language === langFilter || (langFilter !== 'all' && m.language === 'Multi-lingual');
return matchesSearch && matchesVendor && matchesLang; return matchesSearch && matchesVendor && matchesLang;
}); });
const handleAddModel = () => { const handleCreate = async (data: Partial<ASRModel>) => {
if (!newModel.name || !newModel.baseUrl || !newModel.apiKey) { const created = await createASRModel(data);
alert("请填写完整信息"); setModels((prev) => [created, ...prev]);
return;
}
const model: ASRModel = {
id: `asr_${Date.now()}`,
name: newModel.name,
vendor: newModel.vendor as 'OpenAI Compatible',
language: newModel.language || 'zh',
baseUrl: newModel.baseUrl,
apiKey: newModel.apiKey
};
setModels([model, ...models]);
setIsAddModalOpen(false); setIsAddModalOpen(false);
setNewModel({ vendor: 'OpenAI Compatible', language: 'zh', name: '', baseUrl: '', apiKey: '' });
}; };
const handleDeleteModel = (id: string) => { const handleUpdate = async (id: string, data: Partial<ASRModel>) => {
if (confirm('确认删除该语音识别模型吗?')) { const updated = await updateASRModel(id, data);
setModels(prev => prev.filter(m => m.id !== id)); setModels((prev) => prev.map((m) => (m.id === id ? updated : m)));
} setEditingModel(null);
}; };
const maskApiKey = (key: string) => { const handleDelete = async (id: string) => {
if (!key || key.length < 8) return '********'; if (!confirm('确认删除该语音识别模型吗?')) return;
return `${key.substring(0, 3)}****${key.substring(key.length - 4)}`; await deleteASRModel(id);
setModels((prev) => prev.filter((m) => m.id !== id));
}; };
const vendorOptions = Array.from(new Set(models.map((m) => m.vendor).filter(Boolean)));
return ( return (
<div className="space-y-6 animate-in fade-in py-4 pb-10"> <div className="space-y-6 animate-in fade-in py-4 pb-10">
<div className="flex items-center justify-between"> <div className="flex items-center justify-between">
<h1 className="text-2xl font-bold tracking-tight text-white"></h1> <h1 className="text-2xl font-bold tracking-tight text-white"></h1>
<Button onClick={() => setIsAddModalOpen(true)} className="shadow-[0_0_15px_rgba(6,182,212,0.4)]"> <Button onClick={() => setIsAddModalOpen(true)} className="shadow-[0_0_15px_rgba(6,182,212,0.4)]">
<Plus className="mr-2 h-4 w-4" /> <Plus className="mr-2 h-4 w-4" />
</Button> </Button>
</div> </div>
<div className="grid grid-cols-1 md:grid-cols-4 gap-4 bg-card/50 p-4 rounded-lg border border-white/5 shadow-sm"> <div className="grid grid-cols-1 md:grid-cols-4 gap-4 bg-card/50 p-4 rounded-lg border border-white/5 shadow-sm">
<div className="relative col-span-1 md:col-span-2"> <div className="relative col-span-1 md:col-span-2">
<Search className="absolute left-2.5 top-2.5 h-4 w-4 text-muted-foreground" /> <Search className="absolute left-2.5 top-2.5 h-4 w-4 text-muted-foreground" />
<Input <Input
placeholder="搜索模型名称..." placeholder="搜索模型名称/Model Name..."
className="pl-9 border-0 bg-white/5" className="pl-9 border-0 bg-white/5"
value={searchTerm} value={searchTerm}
onChange={e => setSearchTerm(e.target.value)} onChange={(e) => setSearchTerm(e.target.value)}
/> />
</div> </div>
<div className="flex items-center space-x-2"> <div className="flex items-center space-x-2">
<Filter className="h-4 w-4 text-muted-foreground" /> <Filter className="h-4 w-4 text-muted-foreground" />
<select <select
className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 [&>option]:bg-card text-foreground" className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 [&>option]:bg-card text-foreground"
value={vendorFilter} value={vendorFilter}
onChange={(e) => setVendorFilter(e.target.value)} onChange={(e) => setVendorFilter(e.target.value)}
> >
<option value="all"></option> <option value="all"></option>
<option value="OpenAI Compatible">OpenAI Compatible</option> {vendorOptions.map((vendor) => (
</select> <option key={vendor} value={vendor}>{vendor}</option>
</div> ))}
<div className="flex items-center space-x-2"> </select>
<select </div>
className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 [&>option]:bg-card text-foreground" <div className="flex items-center space-x-2">
value={langFilter} <select
onChange={(e) => setLangFilter(e.target.value)} className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 [&>option]:bg-card text-foreground"
> value={langFilter}
<option value="all"></option> onChange={(e) => setLangFilter(e.target.value)}
<option value="zh"> (Chinese)</option> >
<option value="en"> (English)</option> <option value="all"></option>
<option value="Multi-lingual"> (Multi-lingual)</option> <option value="zh"> (Chinese)</option>
</select> <option value="en"> (English)</option>
</div> <option value="Multi-lingual"> (Multi-lingual)</option>
</select>
</div>
</div> </div>
<div className="rounded-md border border-white/5 bg-card/40 backdrop-blur-md overflow-hidden"> <div className="rounded-md border border-white/5 bg-card/40 backdrop-blur-md overflow-hidden">
@@ -105,131 +124,435 @@ export const ASRLibraryPage: React.FC = () => {
<TableHeader> <TableHeader>
<TableRow> <TableRow>
<TableHead></TableHead> <TableHead></TableHead>
<TableHead></TableHead> <TableHead></TableHead>
<TableHead></TableHead> <TableHead></TableHead>
<TableHead></TableHead>
<TableHead>Base URL</TableHead> <TableHead>Base URL</TableHead>
<TableHead>API Key</TableHead> <TableHead>API Key</TableHead>
<TableHead className="text-right"></TableHead> <TableHead className="text-right"></TableHead>
</TableRow> </TableRow>
</TableHeader> </TableHeader>
<tbody> <tbody>
{filteredModels.map(model => ( {!isLoading && filteredModels.map((model) => (
<TableRow key={model.id}> <TableRow key={model.id}>
<TableCell className="font-medium text-white flex items-center"> <TableCell className="font-medium text-white">
<Ear className="w-4 h-4 mr-2 text-primary" /> <div className="flex flex-col">
{model.name} <span className="flex items-center">
</TableCell> <Ear className="w-4 h-4 mr-2 text-primary" />
<TableCell> {model.name}
<Badge variant="outline">{model.vendor}</Badge> </span>
</TableCell> {model.hotwords && model.hotwords.length > 0 && (
<TableCell> <span className="text-xs text-muted-foreground">: {model.hotwords.join(', ')}</span>
<Badge variant="default" className="bg-purple-500/10 text-purple-400 border-purple-500/20"> )}
{model.language} </div>
</Badge>
</TableCell>
<TableCell className="font-mono text-xs text-muted-foreground">
{model.baseUrl}
</TableCell>
<TableCell className="font-mono text-xs text-muted-foreground">
{maskApiKey(model.apiKey)}
</TableCell> </TableCell>
<TableCell><Badge variant="outline">{model.vendor}</Badge></TableCell>
<TableCell>{model.language}</TableCell>
<TableCell className="font-mono text-xs text-muted-foreground">{model.modelName || '-'}</TableCell>
<TableCell className="font-mono text-xs text-muted-foreground max-w-[220px] truncate">{model.baseUrl}</TableCell>
<TableCell className="font-mono text-xs text-muted-foreground">{maskApiKey(model.apiKey)}</TableCell>
<TableCell className="text-right"> <TableCell className="text-right">
<Button <Button variant="ghost" size="icon" onClick={() => setPreviewingModel(model)}>
variant="ghost" <Ear className="h-4 w-4" />
size="icon" </Button>
onClick={() => handleDeleteModel(model.id)} <Button variant="ghost" size="icon" onClick={() => setEditingModel(model)}>
className="text-muted-foreground hover:text-destructive transition-colors" <Pencil className="h-4 w-4" />
> </Button>
<Trash2 className="h-4 w-4" /> <Button variant="ghost" size="icon" onClick={() => handleDelete(model.id)} className="text-red-400">
</Button> <Trash2 className="h-4 w-4" />
</Button>
</TableCell> </TableCell>
</TableRow> </TableRow>
))} ))}
{filteredModels.length === 0 && ( {!isLoading && filteredModels.length === 0 && (
<TableRow> <TableRow>
<TableCell colSpan={6} className="text-center py-8 text-muted-foreground"></TableCell> <TableCell colSpan={7} className="text-center py-8 text-muted-foreground"></TableCell>
</TableRow> </TableRow>
)} )}
{isLoading && (
<TableRow>
<TableCell colSpan={7} className="text-center py-8 text-muted-foreground">...</TableCell>
</TableRow>
)}
</tbody> </tbody>
</table> </table>
</div> </div>
<Dialog <ASRModelModal
isOpen={isAddModalOpen} isOpen={isAddModalOpen}
onClose={() => setIsAddModalOpen(false)} onClose={() => setIsAddModalOpen(false)}
title="添加语音识别模型" onSubmit={handleCreate}
footer={ />
<>
<Button variant="ghost" onClick={() => setIsAddModalOpen(false)}></Button>
<Button onClick={handleAddModel}></Button>
</>
}
>
<div className="space-y-4">
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"> (Interface Type)</label>
<select
className="flex h-10 w-full rounded-md border border-white/10 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-foreground appearance-none cursor-pointer [&>option]:bg-card"
value={newModel.vendor}
onChange={e => setNewModel({...newModel, vendor: e.target.value as any})}
>
<option value="OpenAI Compatible">OpenAI Compatible</option>
</select>
</div>
<div className="space-y-1.5"> <ASRModelModal
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"> (Language)</label> isOpen={!!editingModel}
<div className="flex bg-white/5 p-1 rounded-lg border border-white/10"> onClose={() => setEditingModel(null)}
{(['zh', 'en', 'Multi-lingual'] as const).map(l => ( onSubmit={(data) => handleUpdate(editingModel!.id, data)}
<button initialModel={editingModel || undefined}
key={l} />
onClick={() => setNewModel({...newModel, language: l})}
className={`flex-1 flex items-center justify-center py-1.5 text-xs font-bold rounded-md transition-all ${newModel.language === l ? 'bg-primary text-primary-foreground shadow-lg' : 'text-muted-foreground hover:text-foreground'}`}
>
{l === 'zh' && <span className="mr-1">🇨🇳</span>}
{l === 'en' && <span className="mr-1">🇺🇸</span>}
{l === 'Multi-lingual' && <Globe className="w-3 h-3 mr-1.5" />}
{l === 'zh' ? '中文' : l === 'en' ? '英文' : '多语言'}
</button>
))}
</div>
</div>
<div className="space-y-1.5"> <ASRPreviewModal
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"> (Model Name)</label> isOpen={!!previewingModel}
<Input onClose={() => setPreviewingModel(null)}
value={newModel.name} model={previewingModel}
onChange={e => setNewModel({...newModel, name: e.target.value})} />
placeholder="例如: whisper-1, funasr"
/>
</div>
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block flex items-center">
<Server className="w-3 h-3 mr-1.5" /> Base URL
</label>
<Input
value={newModel.baseUrl}
onChange={e => setNewModel({...newModel, baseUrl: e.target.value})}
placeholder="https://api.openai.com/v1"
className="font-mono text-xs"
/>
</div>
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block flex items-center">
<Key className="w-3 h-3 mr-1.5" /> API Key
</label>
<Input
type="password"
value={newModel.apiKey}
onChange={e => setNewModel({...newModel, apiKey: e.target.value})}
placeholder="sk-..."
className="font-mono text-xs"
/>
</div>
</div>
</Dialog>
</div> </div>
); );
}; };
const ASRModelModal: React.FC<{
isOpen: boolean;
onClose: () => void;
onSubmit: (model: Partial<ASRModel>) => Promise<void>;
initialModel?: ASRModel;
}> = ({ isOpen, onClose, onSubmit, initialModel }) => {
const [name, setName] = useState('');
const [vendor, setVendor] = useState('OpenAI Compatible');
const [language, setLanguage] = useState('zh');
const [modelName, setModelName] = useState('FunAudioLLM/SenseVoiceSmall');
const [baseUrl, setBaseUrl] = useState('https://api.siliconflow.cn/v1');
const [apiKey, setApiKey] = useState('');
const [hotwords, setHotwords] = useState('');
const [enablePunctuation, setEnablePunctuation] = useState(true);
const [enableNormalization, setEnableNormalization] = useState(true);
const [enabled, setEnabled] = useState(true);
const [saving, setSaving] = useState(false);
useEffect(() => {
if (!isOpen) return;
if (initialModel) {
setName(initialModel.name || '');
setVendor(initialModel.vendor || 'OpenAI Compatible');
setLanguage(initialModel.language || 'zh');
setModelName(initialModel.modelName || 'FunAudioLLM/SenseVoiceSmall');
setBaseUrl(initialModel.baseUrl || 'https://api.siliconflow.cn/v1');
setApiKey(initialModel.apiKey || '');
setHotwords(toHotwordsValue(initialModel.hotwords));
setEnablePunctuation(initialModel.enablePunctuation ?? true);
setEnableNormalization(initialModel.enableNormalization ?? true);
setEnabled(initialModel.enabled ?? true);
return;
}
setName('');
setVendor('OpenAI Compatible');
setLanguage('zh');
setModelName('FunAudioLLM/SenseVoiceSmall');
setBaseUrl('https://api.siliconflow.cn/v1');
setApiKey('');
setHotwords('');
setEnablePunctuation(true);
setEnableNormalization(true);
setEnabled(true);
}, [initialModel, isOpen]);
const handleSubmit = async () => {
if (!name.trim()) {
alert('请填写模型名称');
return;
}
if (!baseUrl.trim()) {
alert('请填写 Base URL');
return;
}
if (!apiKey.trim()) {
alert('请填写 API Key');
return;
}
try {
setSaving(true);
await onSubmit({
name: name.trim(),
vendor: vendor.trim(),
language,
modelName: modelName.trim(),
baseUrl: baseUrl.trim(),
apiKey: apiKey.trim(),
hotwords: parseHotwords(hotwords),
enablePunctuation,
enableNormalization,
enabled,
});
} catch (error: any) {
alert(error?.message || '保存失败');
} finally {
setSaving(false);
}
};
return (
<Dialog
isOpen={isOpen}
onClose={onClose}
title={initialModel ? '编辑语音识别模型' : '添加语音识别模型'}
footer={
<>
<Button variant="ghost" onClick={onClose}></Button>
<Button onClick={handleSubmit} disabled={saving}>{saving ? '保存中...' : (initialModel ? '保存修改' : '确认添加')}</Button>
</>
}
>
<div className="space-y-4 max-h-[75vh] overflow-y-auto px-1 custom-scrollbar">
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"></label>
<Input value={name} onChange={(e) => setName(e.target.value)} placeholder="例如: SenseVoice CN" />
</div>
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"></label>
<select
className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-foreground [&>option]:bg-card"
value={vendor}
onChange={(e) => setVendor(e.target.value)}
>
<option value="OpenAI Compatible">OpenAI Compatible</option>
<option value="SiliconFlow">SiliconFlow</option>
</select>
</div>
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block flex items-center"><Languages className="w-3 h-3 mr-1.5" /></label>
<select
className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-foreground [&>option]:bg-card"
value={language}
onChange={(e) => setLanguage(e.target.value)}
>
<option value="zh"> (Chinese)</option>
<option value="en"> (English)</option>
<option value="Multi-lingual"> (Multi-lingual)</option>
</select>
</div>
</div>
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">Model Name</label>
<Input value={modelName} onChange={(e) => setModelName(e.target.value)} placeholder="FunAudioLLM/SenseVoiceSmall" />
</div>
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block flex items-center"><Server className="w-3 h-3 mr-1.5" />Base URL</label>
<Input value={baseUrl} onChange={(e) => setBaseUrl(e.target.value)} placeholder="https://api.siliconflow.cn/v1" className="font-mono text-xs" />
</div>
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block flex items-center"><Key className="w-3 h-3 mr-1.5" />API Key</label>
<Input value={apiKey} onChange={(e) => setApiKey(e.target.value)} type="password" placeholder="sk-..." className="font-mono text-xs" />
</div>
</div>
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"> (comma separated)</label>
<Input value={hotwords} onChange={(e) => setHotwords(e.target.value)} placeholder="品牌名, 人名, 专有词" />
</div>
<div className="grid grid-cols-1 md:grid-cols-3 gap-2">
<label className="flex items-center space-x-2 text-xs text-muted-foreground">
<input type="checkbox" checked={enablePunctuation} onChange={(e) => setEnablePunctuation(e.target.checked)} />
<span></span>
</label>
<label className="flex items-center space-x-2 text-xs text-muted-foreground">
<input type="checkbox" checked={enableNormalization} onChange={(e) => setEnableNormalization(e.target.checked)} />
<span></span>
</label>
<label className="flex items-center space-x-2 text-xs text-muted-foreground">
<input type="checkbox" checked={enabled} onChange={(e) => setEnabled(e.target.checked)} />
<span></span>
</label>
</div>
</div>
</Dialog>
);
};
const ASRPreviewModal: React.FC<{
isOpen: boolean;
onClose: () => void;
model: ASRModel | null;
}> = ({ isOpen, onClose, model }) => {
const [selectedFile, setSelectedFile] = useState<File | null>(null);
const [isDragging, setIsDragging] = useState(false);
const [isTranscribing, setIsTranscribing] = useState(false);
const [transcript, setTranscript] = useState('');
const [latency, setLatency] = useState<number | null>(null);
const [confidence, setConfidence] = useState<number | null>(null);
const [language, setLanguage] = useState('');
const [isRecording, setIsRecording] = useState(false);
const inputRef = useRef<HTMLInputElement>(null);
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const streamRef = useRef<MediaStream | null>(null);
const chunksRef = useRef<Blob[]>([]);
useEffect(() => {
if (!isOpen) return;
setSelectedFile(null);
setTranscript('');
setLatency(null);
setConfidence(null);
setLanguage(model?.language || '');
setIsTranscribing(false);
setIsRecording(false);
}, [isOpen, model]);
useEffect(() => {
return () => {
if (streamRef.current) {
streamRef.current.getTracks().forEach((track) => track.stop());
}
};
}, []);
const pickFile = (file: File | null) => {
if (!file) return;
if (!file.type.startsWith('audio/')) {
alert('仅支持音频文件');
return;
}
setSelectedFile(file);
};
const handleDrop = (event: React.DragEvent<HTMLDivElement>) => {
event.preventDefault();
setIsDragging(false);
const file = event.dataTransfer.files?.[0] || null;
pickFile(file);
};
const startRecording = async () => {
if (!navigator.mediaDevices?.getUserMedia) {
alert('当前浏览器不支持麦克风录音');
return;
}
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
const recorder = new MediaRecorder(stream);
chunksRef.current = [];
streamRef.current = stream;
mediaRecorderRef.current = recorder;
recorder.ondataavailable = (event) => {
if (event.data.size > 0) {
chunksRef.current.push(event.data);
}
};
recorder.onstop = () => {
const blob = new Blob(chunksRef.current, { type: recorder.mimeType || 'audio/webm' });
const file = new File([blob], `mic-preview-${Date.now()}.webm`, { type: blob.type || 'audio/webm' });
setSelectedFile(file);
if (streamRef.current) {
streamRef.current.getTracks().forEach((track) => track.stop());
streamRef.current = null;
}
};
recorder.start();
setIsRecording(true);
} catch (error: any) {
alert(error?.message || '无法访问麦克风');
}
};
const stopRecording = () => {
if (!mediaRecorderRef.current) return;
mediaRecorderRef.current.stop();
setIsRecording(false);
};
const runPreview = async () => {
if (!model?.id) return;
if (!selectedFile) {
alert('请先上传或录制音频');
return;
}
try {
setIsTranscribing(true);
const result = await previewASRModel(model.id, selectedFile, { language: language || undefined });
setTranscript(result.transcript || result.message || '无识别内容');
setLatency(result.latency_ms ?? null);
setConfidence(result.confidence ?? null);
} catch (error: any) {
alert(error?.message || '识别失败');
} finally {
setIsTranscribing(false);
}
};
return (
<Dialog
isOpen={isOpen}
onClose={onClose}
title={`试听识别: ${model?.name || ''}`}
footer={
<>
<Button variant="ghost" onClick={onClose}></Button>
<Button onClick={runPreview} disabled={isTranscribing || !selectedFile}>
{isTranscribing ? '识别中...' : '开始识别'}
</Button>
</>
}
>
<div className="space-y-4">
<div
className={`rounded-lg border-2 border-dashed p-4 transition-colors ${isDragging ? 'border-primary bg-primary/10' : 'border-white/10 bg-white/5'}`}
onDragOver={(e) => {
e.preventDefault();
setIsDragging(true);
}}
onDragLeave={() => setIsDragging(false)}
onDrop={handleDrop}
>
<input
ref={inputRef}
type="file"
accept="audio/*"
className="hidden"
onChange={(e) => pickFile(e.target.files?.[0] || null)}
/>
<div className="flex flex-col items-center justify-center gap-2 text-sm text-muted-foreground">
<Upload className="h-6 w-6 text-primary" />
<p></p>
<Button variant="outline" size="sm" onClick={() => inputRef.current?.click()}></Button>
{selectedFile && <p className="text-primary text-xs">: {selectedFile.name}</p>}
</div>
</div>
<div className="flex items-center justify-between rounded-lg border border-white/10 bg-white/5 p-3">
<div className="text-sm text-muted-foreground"></div>
{!isRecording ? (
<Button size="sm" variant="outline" onClick={startRecording}><Mic className="h-4 w-4 mr-1" /></Button>
) : (
<Button size="sm" variant="destructive" onClick={stopRecording}><Square className="h-4 w-4 mr-1" /></Button>
)}
</div>
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block flex items-center">
<Globe className="w-3 h-3 mr-1.5" /> (Optional)
</label>
<Input value={language} onChange={(e) => setLanguage(e.target.value)} placeholder="zh / en / auto" />
</div>
<div className="rounded-lg border border-primary/20 bg-primary/5 p-3 space-y-2">
<div className="flex items-center justify-between text-xs text-primary">
<span></span>
<span>
{latency !== null ? `Latency: ${latency}ms` : ''}
{confidence !== null ? ` Confidence: ${confidence.toFixed(3)}` : ''}
</span>
</div>
<textarea
readOnly
value={transcript}
className="flex min-h-[120px] w-full rounded-md border-0 bg-black/20 px-3 py-2 text-sm shadow-sm text-white"
placeholder="识别结果会显示在这里"
/>
</div>
</div>
</Dialog>
);
};

View File

@@ -1,4 +1,4 @@
import { Assistant, CallLog, InteractionDetail, KnowledgeBase, KnowledgeDocument, Voice, Workflow, WorkflowEdge, WorkflowNode } from '../types'; import { ASRModel, Assistant, CallLog, InteractionDetail, KnowledgeBase, KnowledgeDocument, Voice, Workflow, WorkflowEdge, WorkflowNode } from '../types';
import { apiRequest } from './apiClient'; import { apiRequest } from './apiClient';
type AnyRecord = Record<string, any>; type AnyRecord = Record<string, any>;
@@ -64,6 +64,20 @@ const mapVoice = (raw: AnyRecord): Voice => ({
isSystem: Boolean(readField(raw, ['isSystem', 'is_system'], false)), isSystem: Boolean(readField(raw, ['isSystem', 'is_system'], false)),
}); });
const mapASRModel = (raw: AnyRecord): ASRModel => ({
id: String(readField(raw, ['id'], '')),
name: readField(raw, ['name'], ''),
vendor: readField(raw, ['vendor'], 'OpenAI Compatible'),
language: readField(raw, ['language'], 'zh'),
baseUrl: readField(raw, ['baseUrl', 'base_url'], ''),
apiKey: readField(raw, ['apiKey', 'api_key'], ''),
modelName: readField(raw, ['modelName', 'model_name'], ''),
hotwords: readField(raw, ['hotwords'], []),
enablePunctuation: Boolean(readField(raw, ['enablePunctuation', 'enable_punctuation'], true)),
enableNormalization: Boolean(readField(raw, ['enableNormalization', 'enable_normalization'], true)),
enabled: Boolean(readField(raw, ['enabled'], true)),
});
const mapWorkflowNode = (raw: AnyRecord): WorkflowNode => ({ const mapWorkflowNode = (raw: AnyRecord): WorkflowNode => ({
name: readField(raw, ['name'], ''), name: readField(raw, ['name'], ''),
type: readField(raw, ['type'], 'conversation') as 'conversation' | 'tool' | 'human' | 'end', type: readField(raw, ['type'], 'conversation') as 'conversation' | 'tool' | 'human' | 'end',
@@ -246,6 +260,97 @@ export const previewVoice = async (id: string, text: string, speed?: number, api
return response.audio_url; return response.audio_url;
}; };
export const fetchASRModels = async (): Promise<ASRModel[]> => {
const response = await apiRequest<{ list?: AnyRecord[] } | AnyRecord[]>('/asr');
const list = Array.isArray(response) ? response : (response.list || []);
return list.map((item) => mapASRModel(item));
};
export const createASRModel = async (data: Partial<ASRModel>): Promise<ASRModel> => {
const payload = {
id: data.id || undefined,
name: data.name || 'New ASR Model',
vendor: data.vendor || 'OpenAI Compatible',
language: data.language || 'zh',
base_url: data.baseUrl || 'https://api.siliconflow.cn/v1',
api_key: data.apiKey || '',
model_name: data.modelName || undefined,
hotwords: data.hotwords || [],
enable_punctuation: data.enablePunctuation ?? true,
enable_normalization: data.enableNormalization ?? true,
enabled: data.enabled ?? true,
};
const response = await apiRequest<AnyRecord>('/asr', { method: 'POST', body: payload });
return mapASRModel(response);
};
export const updateASRModel = async (id: string, data: Partial<ASRModel>): Promise<ASRModel> => {
const payload = {
name: data.name,
vendor: data.vendor,
language: data.language,
base_url: data.baseUrl,
api_key: data.apiKey,
model_name: data.modelName,
hotwords: data.hotwords,
enable_punctuation: data.enablePunctuation,
enable_normalization: data.enableNormalization,
enabled: data.enabled,
};
const response = await apiRequest<AnyRecord>(`/asr/${id}`, { method: 'PUT', body: payload });
return mapASRModel(response);
};
export const deleteASRModel = async (id: string): Promise<void> => {
await apiRequest(`/asr/${id}`, { method: 'DELETE' });
};
export type ASRPreviewResult = {
success: boolean;
transcript?: string;
language?: string;
confidence?: number;
latency_ms?: number;
message?: string;
error?: string;
};
export const previewASRModel = async (
id: string,
file: File,
options?: { language?: string; apiKey?: string }
): Promise<ASRPreviewResult> => {
const formData = new FormData();
formData.append('file', file);
if (options?.language) {
formData.append('language', options.language);
}
if (options?.apiKey) {
formData.append('api_key', options.apiKey);
}
const base = (import.meta.env.VITE_API_BASE_URL || 'http://127.0.0.1:8100/api').replace(/\/+$/, '');
const url = `${base}/asr/${id}/preview`;
const response = await fetch(url, {
method: 'POST',
body: formData,
});
let data: ASRPreviewResult | null = null;
try {
data = await response.json();
} catch {
data = null;
}
if (!response.ok) {
const detail = (data as AnyRecord | null)?.error || (data as AnyRecord | null)?.detail || `Request failed: ${response.status}`;
throw new Error(typeof detail === 'string' ? detail : `Request failed: ${response.status}`);
}
return data || { success: false, error: 'Invalid preview response' };
};
export const fetchWorkflows = async (): Promise<Workflow[]> => { export const fetchWorkflows = async (): Promise<Workflow[]> => {
const response = await apiRequest<{ list?: AnyRecord[] } | AnyRecord[]>('/workflows'); const response = await apiRequest<{ list?: AnyRecord[] } | AnyRecord[]>('/workflows');
const list = Array.isArray(response) ? response : (response.list || []); const list = Array.isArray(response) ? response : (response.list || []);

View File

@@ -176,8 +176,13 @@ export interface LLMModel {
export interface ASRModel { export interface ASRModel {
id: string; id: string;
name: string; name: string;
vendor: 'OpenAI Compatible'; vendor: string;
language: string; language: string;
baseUrl: string; baseUrl: string;
apiKey: string; apiKey: string;
modelName?: string;
hotwords?: string[];
enablePunctuation?: boolean;
enableNormalization?: boolean;
enabled?: boolean;
} }