Update asr library preview

2026-02-08 23:38:34 +08:00
parent 97e3236e76
commit 4bf2f788ad
5 changed files with 781 additions and 183 deletions
--- a/api/app/routers/asr.py
+++ b/api/app/routers/asr.py
@@ -1,12 +1,11 @@
-from fastapi import APIRouter, Depends, HTTPException
+import os
 from sqlalchemy.orm import Session
 from typing import List, Optional
 import uuid
 import httpx
 import time
-import base64
+import uuid
-import json
+from typing import List, Optional
-from datetime import datetime
+
 import httpx
 from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
 from sqlalchemy.orm import Session
 from ..db import get_db
 from ..models import ASRModel
@@ -17,6 +16,18 @@ from ..schemas import (
 router = APIRouter(prefix="/asr", tags=["ASR Models"])
 SILICONFLOW_DEFAULT_ASR_MODEL = "FunAudioLLM/SenseVoiceSmall"
 def _is_siliconflow_vendor(vendor: str) -> bool:
    return (vendor or "").strip().lower() in {"siliconflow", "硅基流动"}
 def _default_asr_model(vendor: str) -> str:
    if _is_siliconflow_vendor(vendor):
        return SILICONFLOW_DEFAULT_ASR_MODEL
    return "whisper-1"
 # ============ ASR Models CRUD ============
@router.get("")
@@ -219,3 +230,99 @@ def transcribe_audio(
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))
@router.post("/{id}/preview", response_model=ASRTestResponse)
 async def preview_asr_model(
    id: str,
    file: UploadFile = File(...),
    language: Optional[str] = Form(None),
    api_key: Optional[str] = Form(None),
    db: Session = Depends(get_db),
 ):
    """预览 ASR：上传音频并调用 OpenAI-compatible /audio/transcriptions。"""
    model = db.query(ASRModel).filter(ASRModel.id == id).first()
    if not model:
        raise HTTPException(status_code=404, detail="ASR Model not found")
    if not file:
        raise HTTPException(status_code=400, detail="Audio file is required")
    filename = file.filename or "preview.wav"
    content_type = file.content_type or "application/octet-stream"
    if not content_type.startswith("audio/"):
        raise HTTPException(status_code=400, detail="Only audio files are supported")
    audio_bytes = await file.read()
    if not audio_bytes:
        raise HTTPException(status_code=400, detail="Uploaded audio file is empty")
    effective_api_key = (api_key or "").strip() or (model.api_key or "").strip()
    if not effective_api_key and _is_siliconflow_vendor(model.vendor):
        effective_api_key = os.getenv("SILICONFLOW_API_KEY", "").strip()
    if not effective_api_key:
        raise HTTPException(status_code=400, detail=f"API key is required for ASR model: {model.name}")
    base_url = (model.base_url or "").strip().rstrip("/")
    if not base_url:
        raise HTTPException(status_code=400, detail=f"Base URL is required for ASR model: {model.name}")
    selected_model = (model.model_name or "").strip() or _default_asr_model(model.vendor)
    data = {"model": selected_model}
    effective_language = (language or "").strip() or None
    if effective_language:
        data["language"] = effective_language
    if model.hotwords:
        data["prompt"] = " ".join(model.hotwords)
    headers = {"Authorization": f"Bearer {effective_api_key}"}
    files = {"file": (filename, audio_bytes, content_type)}
    start_time = time.time()
    try:
        with httpx.Client(timeout=90.0) as client:
            response = client.post(
                f"{base_url}/audio/transcriptions",
                headers=headers,
                data=data,
                files=files,
            )
    except Exception as exc:
        raise HTTPException(status_code=502, detail=f"ASR request failed: {exc}") from exc
    if response.status_code != 200:
        detail = response.text
        try:
            detail_json = response.json()
            detail = detail_json.get("error", {}).get("message") or detail_json.get("detail") or detail
        except Exception:
            pass
        raise HTTPException(status_code=502, detail=f"ASR vendor error: {detail}")
    try:
        payload = response.json()
    except Exception:
        payload = {"text": response.text}
    transcript = ""
    response_language = model.language
    confidence = None
    if isinstance(payload, dict):
        transcript = str(payload.get("text") or payload.get("transcript") or "")
        response_language = str(payload.get("language") or effective_language or model.language)
        raw_confidence = payload.get("confidence")
        if raw_confidence is not None:
            try:
                confidence = float(raw_confidence)
            except (TypeError, ValueError):
                confidence = None
    latency_ms = int((time.time() - start_time) * 1000)
    return ASRTestResponse(
        success=bool(transcript),
        transcript=transcript,
        language=response_language,
        confidence=confidence,
        latency_ms=latency_ms,
        message=None if transcript else "No transcript in response",
    )
--- a/api/tests/test_asr.py
+++ b/api/tests/test_asr.py
@@ -287,3 +287,61 @@ class TestASRModelAPI:
            response = client.post("/api/asr", json=data)
            assert response.status_code == 200
            assert response.json()["vendor"] == vendor
    def test_preview_asr_model_success(self, client, sample_asr_model_data, monkeypatch):
        """Test ASR preview endpoint with OpenAI-compatible transcriptions API."""
        from app.routers import asr as asr_router
        create_response = client.post("/api/asr", json=sample_asr_model_data)
        model_id = create_response.json()["id"]
        class DummyResponse:
            status_code = 200
            def json(self):
                return {"text": "你好，这是测试转写", "language": "zh", "confidence": 0.98}
            @property
            def text(self):
                return '{"text":"ok"}'
        class DummyClient:
            def __init__(self, *args, **kwargs):
                pass
            def __enter__(self):
                return self
            def __exit__(self, exc_type, exc, tb):
                return False
            def post(self, url, headers=None, data=None, files=None):
                assert url.endswith("/audio/transcriptions")
                assert headers["Authorization"] == f"Bearer {sample_asr_model_data['api_key']}"
                assert data["model"] == sample_asr_model_data["model_name"]
                assert files["file"][0] == "sample.wav"
                return DummyResponse()
        monkeypatch.setattr(asr_router.httpx, "Client", DummyClient)
        response = client.post(
            f"/api/asr/{model_id}/preview",
            files={"file": ("sample.wav", b"fake-wav-bytes", "audio/wav")},
        )
        assert response.status_code == 200
        payload = response.json()
        assert payload["success"] is True
        assert payload["transcript"] == "你好，这是测试转写"
        assert payload["language"] == "zh"
    def test_preview_asr_model_reject_non_audio(self, client, sample_asr_model_data):
        """Test ASR preview endpoint rejects non-audio file."""
        create_response = client.post("/api/asr", json=sample_asr_model_data)
        model_id = create_response.json()["id"]
        response = client.post(
            f"/api/asr/{model_id}/preview",
            files={"file": ("sample.txt", b"text-data", "text/plain")},
        )
        assert response.status_code == 400
        assert "Only audio files are supported" in response.text
--- a/web/pages/ASRLibrary.tsx
+++ b/web/pages/ASRLibrary.tsx
@@ -1,103 +1,122 @@
-
+import React, { useEffect, useRef, useState } from 'react';
-import React, { useState } from 'react';
+import { Search, Filter, Plus, Trash2, Key, Server, Ear, Globe, Languages, Pencil, Mic, Square, Upload } from 'lucide-react';
 import { Search, Filter, Plus, Trash2, Key, Server, Ear, Globe, Languages } from 'lucide-react';
 import { Button, Input, TableHeader, TableRow, TableHead, TableCell, Dialog, Badge } from '../components/UI';
 import { mockASRModels } from '../services/mockData';
 import { ASRModel } from '../types';
 import { createASRModel, deleteASRModel, fetchASRModels, previewASRModel, updateASRModel } from '../services/backendApi';
 const maskApiKey = (key?: string) => {
  if (!key) return '********';
  if (key.length < 8) return '********';
  return `${key.slice(0, 3)}****${key.slice(-4)}`;
 };
 const parseHotwords = (value: string): string[] => {
  return value
    .split(/[\n,]/)
    .map((item) => item.trim())
    .filter(Boolean);
 };
 const toHotwordsValue = (hotwords?: string[]): string => (hotwords || []).join(', ');
 export const ASRLibraryPage: React.FC = () => {
-  const [models, setModels] = useState<ASRModel[]>(mockASRModels);
+  const [models, setModels] = useState<ASRModel[]>([]);
  const [searchTerm, setSearchTerm] = useState('');
  const [vendorFilter, setVendorFilter] = useState<string>('all');
  const [langFilter, setLangFilter] = useState<string>('all');
  const [isAddModalOpen, setIsAddModalOpen] = useState(false);
  const [editingModel, setEditingModel] = useState<ASRModel | null>(null);
  const [previewingModel, setPreviewingModel] = useState<ASRModel | null>(null);
  const [isLoading, setIsLoading] = useState(true);
-  // Form State
+  const loadModels = async () => {
-  const [newModel, setNewModel] = useState<Partial<ASRModel>>({
+    setIsLoading(true);
-    vendor: 'OpenAI Compatible',
+    try {
-    language: 'zh'
+      setModels(await fetchASRModels());
-  });
+    } catch (error) {
      console.error(error);
      setModels([]);
    } finally {
      setIsLoading(false);
    }
  };
-  const filteredModels = models.filter(m => {
+  useEffect(() => {
-    const matchesSearch = m.name.toLowerCase().includes(searchTerm.toLowerCase());
+    loadModels();
  }, []);
  const filteredModels = models.filter((m) => {
    const q = searchTerm.toLowerCase();
    const matchesSearch = m.name.toLowerCase().includes(q) || (m.modelName || '').toLowerCase().includes(q);
    const matchesVendor = vendorFilter === 'all' || m.vendor === vendorFilter;
    const matchesLang = langFilter === 'all' || m.language === langFilter || (langFilter !== 'all' && m.language === 'Multi-lingual');
    return matchesSearch && matchesVendor && matchesLang;
  });
-  const handleAddModel = () => {
+  const handleCreate = async (data: Partial<ASRModel>) => {
-    if (!newModel.name || !newModel.baseUrl || !newModel.apiKey) {
+    const created = await createASRModel(data);
-      alert("请填写完整信息");
+    setModels((prev) => [created, ...prev]);
      return;
    }
    const model: ASRModel = {
      id: `asr_${Date.now()}`,
      name: newModel.name,
      vendor: newModel.vendor as 'OpenAI Compatible',
      language: newModel.language || 'zh',
      baseUrl: newModel.baseUrl,
      apiKey: newModel.apiKey
    };
    setModels([model, ...models]);
    setIsAddModalOpen(false);
    setNewModel({ vendor: 'OpenAI Compatible', language: 'zh', name: '', baseUrl: '', apiKey: '' });
  };
-  const handleDeleteModel = (id: string) => {
+  const handleUpdate = async (id: string, data: Partial<ASRModel>) => {
-    if (confirm('确认删除该语音识别模型吗？')) {
+    const updated = await updateASRModel(id, data);
-      setModels(prev => prev.filter(m => m.id !== id));
+    setModels((prev) => prev.map((m) => (m.id === id ? updated : m)));
-    }
+    setEditingModel(null);
  };
-  const maskApiKey = (key: string) => {
+  const handleDelete = async (id: string) => {
-    if (!key || key.length < 8) return '********';
+    if (!confirm('确认删除该语音识别模型吗？')) return;
-    return `${key.substring(0, 3)}****${key.substring(key.length - 4)}`;
+    await deleteASRModel(id);
    setModels((prev) => prev.filter((m) => m.id !== id));
  };
  const vendorOptions = Array.from(new Set(models.map((m) => m.vendor).filter(Boolean)));
  return (
    <div className="space-y-6 animate-in fade-in py-4 pb-10">
      <div className="flex items-center justify-between">
        <h1 className="text-2xl font-bold tracking-tight text-white">语音识别</h1>
        <Button onClick={() => setIsAddModalOpen(true)} className="shadow-[0_0_15px_rgba(6,182,212,0.4)]">
-            <Plus className="mr-2 h-4 w-4" /> 添加模型
+          <Plus className="mr-2 h-4 w-4" /> 添加模型
        </Button>
      </div>
      <div className="grid grid-cols-1 md:grid-cols-4 gap-4 bg-card/50 p-4 rounded-lg border border-white/5 shadow-sm">
-         <div className="relative col-span-1 md:col-span-2">
+        <div className="relative col-span-1 md:col-span-2">
-             <Search className="absolute left-2.5 top-2.5 h-4 w-4 text-muted-foreground" />
+          <Search className="absolute left-2.5 top-2.5 h-4 w-4 text-muted-foreground" />
-             <Input 
+          <Input
-                placeholder="搜索模型名称..." 
+            placeholder="搜索模型名称/Model Name..."
-                className="pl-9 border-0 bg-white/5" 
+            className="pl-9 border-0 bg-white/5"
-                value={searchTerm} 
+            value={searchTerm}
-                onChange={e => setSearchTerm(e.target.value)}
+            onChange={(e) => setSearchTerm(e.target.value)}
-             />
+          />
-         </div>
+        </div>
-         <div className="flex items-center space-x-2">
+        <div className="flex items-center space-x-2">
-            <Filter className="h-4 w-4 text-muted-foreground" />
+          <Filter className="h-4 w-4 text-muted-foreground" />
-            <select 
+          <select
-                className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 [&>option]:bg-card text-foreground"
+            className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 [&>option]:bg-card text-foreground"
-                value={vendorFilter}
+            value={vendorFilter}
-                onChange={(e) => setVendorFilter(e.target.value)}
+            onChange={(e) => setVendorFilter(e.target.value)}
-            >
+          >
-                <option value="all">所有接口类型</option>
+            <option value="all">所有厂商</option>
-                <option value="OpenAI Compatible">OpenAI Compatible</option>
+            {vendorOptions.map((vendor) => (
-            </select>
+              <option key={vendor} value={vendor}>{vendor}</option>
-         </div>
+            ))}
-         <div className="flex items-center space-x-2">
+          </select>
-            <select 
+        </div>
-                className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 [&>option]:bg-card text-foreground"
+        <div className="flex items-center space-x-2">
-                value={langFilter}
+          <select
-                onChange={(e) => setLangFilter(e.target.value)}
+            className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 [&>option]:bg-card text-foreground"
-            >
+            value={langFilter}
-                <option value="all">所有语言</option>
+            onChange={(e) => setLangFilter(e.target.value)}
-                <option value="zh">中文 (Chinese)</option>
+          >
-                <option value="en">英文 (English)</option>
+            <option value="all">所有语言</option>
-                <option value="Multi-lingual">多语言 (Multi-lingual)</option>
+            <option value="zh">中文 (Chinese)</option>
-            </select>
+            <option value="en">英文 (English)</option>
-         </div>
+            <option value="Multi-lingual">多语言 (Multi-lingual)</option>
          </select>
        </div>
      </div>
      <div className="rounded-md border border-white/5 bg-card/40 backdrop-blur-md overflow-hidden">
@@ -105,131 +124,435 @@ export const ASRLibraryPage: React.FC = () => {
          <TableHeader>
            <TableRow>
              <TableHead>模型名称</TableHead>
-              <TableHead>接口类型</TableHead>
+              <TableHead>厂商</TableHead>
              <TableHead>语言</TableHead>
              <TableHead>模型标识</TableHead>
              <TableHead>Base URL</TableHead>
              <TableHead>API Key</TableHead>
              <TableHead className="text-right">操作</TableHead>
            </TableRow>
          </TableHeader>
          <tbody>
-            {filteredModels.map(model => (
+            {!isLoading && filteredModels.map((model) => (
              <TableRow key={model.id}>
-                <TableCell className="font-medium text-white flex items-center">
+                <TableCell className="font-medium text-white">
-                    <Ear className="w-4 h-4 mr-2 text-primary" />
+                  <div className="flex flex-col">
-                    {model.name}
+                    <span className="flex items-center">
-                </TableCell>
+                      <Ear className="w-4 h-4 mr-2 text-primary" />
-                <TableCell>
+                      {model.name}
-                    <Badge variant="outline">{model.vendor}</Badge>
+                    </span>
-                </TableCell>
+                    {model.hotwords && model.hotwords.length > 0 && (
-                <TableCell>
+                      <span className="text-xs text-muted-foreground">热词: {model.hotwords.join(', ')}</span>
-                    <Badge variant="default" className="bg-purple-500/10 text-purple-400 border-purple-500/20">
+                    )}
-                        {model.language}
+                  </div>
                    </Badge>
                </TableCell>
                <TableCell className="font-mono text-xs text-muted-foreground">
                    {model.baseUrl}
                </TableCell>
                <TableCell className="font-mono text-xs text-muted-foreground">
                    {maskApiKey(model.apiKey)}
                </TableCell>
                <TableCell><Badge variant="outline">{model.vendor}</Badge></TableCell>
                <TableCell>{model.language}</TableCell>
                <TableCell className="font-mono text-xs text-muted-foreground">{model.modelName || '-'}</TableCell>
                <TableCell className="font-mono text-xs text-muted-foreground max-w-[220px] truncate">{model.baseUrl}</TableCell>
                <TableCell className="font-mono text-xs text-muted-foreground">{maskApiKey(model.apiKey)}</TableCell>
                <TableCell className="text-right">
-                    <Button 
+                  <Button variant="ghost" size="icon" onClick={() => setPreviewingModel(model)}>
-                        variant="ghost" 
+                    <Ear className="h-4 w-4" />
-                        size="icon" 
+                  </Button>
-                        onClick={() => handleDeleteModel(model.id)}
+                  <Button variant="ghost" size="icon" onClick={() => setEditingModel(model)}>
-                        className="text-muted-foreground hover:text-destructive transition-colors"
+                    <Pencil className="h-4 w-4" />
-                    >
+                  </Button>
-                        <Trash2 className="h-4 w-4" />
+                  <Button variant="ghost" size="icon" onClick={() => handleDelete(model.id)} className="text-red-400">
-                    </Button>
+                    <Trash2 className="h-4 w-4" />
                  </Button>
                </TableCell>
              </TableRow>
            ))}
-             {filteredModels.length === 0 && (
+            {!isLoading && filteredModels.length === 0 && (
-                 <TableRow>
+              <TableRow>
-                     <TableCell colSpan={6} className="text-center py-8 text-muted-foreground">暂无语音识别模型</TableCell>
+                <TableCell colSpan={7} className="text-center py-8 text-muted-foreground">暂无语音识别模型</TableCell>
-                 </TableRow>
+              </TableRow>
-             )}
+            )}
            {isLoading && (
              <TableRow>
                <TableCell colSpan={7} className="text-center py-8 text-muted-foreground">加载中...</TableCell>
              </TableRow>
            )}
          </tbody>
        </table>
      </div>
-      <Dialog
+      <ASRModelModal
        isOpen={isAddModalOpen}
        onClose={() => setIsAddModalOpen(false)}
-        title="添加语音识别模型"
+        onSubmit={handleCreate}
-        footer={
+      />
          <>
            <Button variant="ghost" onClick={() => setIsAddModalOpen(false)}>取消</Button>
            <Button onClick={handleAddModel}>确认添加</Button>
          </>
        }
      >
        <div className="space-y-4">
          <div className="space-y-1.5">
            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">接口类型 (Interface Type)</label>
            <select 
                className="flex h-10 w-full rounded-md border border-white/10 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-foreground appearance-none cursor-pointer [&>option]:bg-card"
                value={newModel.vendor}
                onChange={e => setNewModel({...newModel, vendor: e.target.value as any})}
            >
                <option value="OpenAI Compatible">OpenAI Compatible</option>
            </select>
          </div>
-          <div className="space-y-1.5">
+      <ASRModelModal
-            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">支持语言 (Language)</label>
+        isOpen={!!editingModel}
-            <div className="flex bg-white/5 p-1 rounded-lg border border-white/10">
+        onClose={() => setEditingModel(null)}
-                {(['zh', 'en', 'Multi-lingual'] as const).map(l => (
+        onSubmit={(data) => handleUpdate(editingModel!.id, data)}
-                    <button 
+        initialModel={editingModel || undefined}
-                        key={l}
+      />
                        onClick={() => setNewModel({...newModel, language: l})}
                        className={`flex-1 flex items-center justify-center py-1.5 text-xs font-bold rounded-md transition-all ${newModel.language === l ? 'bg-primary text-primary-foreground shadow-lg' : 'text-muted-foreground hover:text-foreground'}`}
                    >
                        {l === 'zh' && <span className="mr-1">🇨🇳</span>}
                        {l === 'en' && <span className="mr-1">🇺🇸</span>}
                        {l === 'Multi-lingual' && <Globe className="w-3 h-3 mr-1.5" />}
                        {l === 'zh' ? '中文' : l === 'en' ? '英文' : '多语言'}
                    </button>
                ))}
            </div>
          </div>
-          <div className="space-y-1.5">
+      <ASRPreviewModal
-            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">模型名称 (Model Name)</label>
+        isOpen={!!previewingModel}
-            <Input 
+        onClose={() => setPreviewingModel(null)}
-                value={newModel.name} 
+        model={previewingModel}
-                onChange={e => setNewModel({...newModel, name: e.target.value})} 
+      />
                placeholder="例如: whisper-1, funasr" 
            />
          </div>
          <div className="space-y-1.5">
            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block flex items-center">
                <Server className="w-3 h-3 mr-1.5" /> Base URL
            </label>
            <Input 
                value={newModel.baseUrl} 
                onChange={e => setNewModel({...newModel, baseUrl: e.target.value})} 
                placeholder="https://api.openai.com/v1" 
                className="font-mono text-xs"
            />
          </div>
          <div className="space-y-1.5">
            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block flex items-center">
                <Key className="w-3 h-3 mr-1.5" /> API Key
            </label>
            <Input 
                type="password"
                value={newModel.apiKey} 
                onChange={e => setNewModel({...newModel, apiKey: e.target.value})} 
                placeholder="sk-..." 
                className="font-mono text-xs"
            />
          </div>
        </div>
      </Dialog>
    </div>
  );
 };
 const ASRModelModal: React.FC<{
  isOpen: boolean;
  onClose: () => void;
  onSubmit: (model: Partial<ASRModel>) => Promise<void>;
  initialModel?: ASRModel;
 }> = ({ isOpen, onClose, onSubmit, initialModel }) => {
  const [name, setName] = useState('');
  const [vendor, setVendor] = useState('OpenAI Compatible');
  const [language, setLanguage] = useState('zh');
  const [modelName, setModelName] = useState('FunAudioLLM/SenseVoiceSmall');
  const [baseUrl, setBaseUrl] = useState('https://api.siliconflow.cn/v1');
  const [apiKey, setApiKey] = useState('');
  const [hotwords, setHotwords] = useState('');
  const [enablePunctuation, setEnablePunctuation] = useState(true);
  const [enableNormalization, setEnableNormalization] = useState(true);
  const [enabled, setEnabled] = useState(true);
  const [saving, setSaving] = useState(false);
  useEffect(() => {
    if (!isOpen) return;
    if (initialModel) {
      setName(initialModel.name || '');
      setVendor(initialModel.vendor || 'OpenAI Compatible');
      setLanguage(initialModel.language || 'zh');
      setModelName(initialModel.modelName || 'FunAudioLLM/SenseVoiceSmall');
      setBaseUrl(initialModel.baseUrl || 'https://api.siliconflow.cn/v1');
      setApiKey(initialModel.apiKey || '');
      setHotwords(toHotwordsValue(initialModel.hotwords));
      setEnablePunctuation(initialModel.enablePunctuation ?? true);
      setEnableNormalization(initialModel.enableNormalization ?? true);
      setEnabled(initialModel.enabled ?? true);
      return;
    }
    setName('');
    setVendor('OpenAI Compatible');
    setLanguage('zh');
    setModelName('FunAudioLLM/SenseVoiceSmall');
    setBaseUrl('https://api.siliconflow.cn/v1');
    setApiKey('');
    setHotwords('');
    setEnablePunctuation(true);
    setEnableNormalization(true);
    setEnabled(true);
  }, [initialModel, isOpen]);
  const handleSubmit = async () => {
    if (!name.trim()) {
      alert('请填写模型名称');
      return;
    }
    if (!baseUrl.trim()) {
      alert('请填写 Base URL');
      return;
    }
    if (!apiKey.trim()) {
      alert('请填写 API Key');
      return;
    }
    try {
      setSaving(true);
      await onSubmit({
        name: name.trim(),
        vendor: vendor.trim(),
        language,
        modelName: modelName.trim(),
        baseUrl: baseUrl.trim(),
        apiKey: apiKey.trim(),
        hotwords: parseHotwords(hotwords),
        enablePunctuation,
        enableNormalization,
        enabled,
      });
    } catch (error: any) {
      alert(error?.message || '保存失败');
    } finally {
      setSaving(false);
    }
  };
  return (
    <Dialog
      isOpen={isOpen}
      onClose={onClose}
      title={initialModel ? '编辑语音识别模型' : '添加语音识别模型'}
      footer={
        <>
          <Button variant="ghost" onClick={onClose}>取消</Button>
          <Button onClick={handleSubmit} disabled={saving}>{saving ? '保存中...' : (initialModel ? '保存修改' : '确认添加')}</Button>
        </>
      }
    >
      <div className="space-y-4 max-h-[75vh] overflow-y-auto px-1 custom-scrollbar">
        <div className="space-y-1.5">
          <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">模型名称</label>
          <Input value={name} onChange={(e) => setName(e.target.value)} placeholder="例如: SenseVoice CN" />
        </div>
        <div className="grid grid-cols-1 md:grid-cols-2 gap-4">
          <div className="space-y-1.5">
            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">接口类型</label>
            <select
              className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-foreground [&>option]:bg-card"
              value={vendor}
              onChange={(e) => setVendor(e.target.value)}
            >
              <option value="OpenAI Compatible">OpenAI Compatible</option>
              <option value="SiliconFlow">SiliconFlow</option>
            </select>
          </div>
          <div className="space-y-1.5">
            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block flex items-center"><Languages className="w-3 h-3 mr-1.5" />语言</label>
            <select
              className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-foreground [&>option]:bg-card"
              value={language}
              onChange={(e) => setLanguage(e.target.value)}
            >
              <option value="zh">中文 (Chinese)</option>
              <option value="en">英文 (English)</option>
              <option value="Multi-lingual">多语言 (Multi-lingual)</option>
            </select>
          </div>
        </div>
        <div className="space-y-1.5">
          <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">Model Name</label>
          <Input value={modelName} onChange={(e) => setModelName(e.target.value)} placeholder="FunAudioLLM/SenseVoiceSmall" />
        </div>
        <div className="grid grid-cols-1 md:grid-cols-2 gap-4">
          <div className="space-y-1.5">
            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block flex items-center"><Server className="w-3 h-3 mr-1.5" />Base URL</label>
            <Input value={baseUrl} onChange={(e) => setBaseUrl(e.target.value)} placeholder="https://api.siliconflow.cn/v1" className="font-mono text-xs" />
          </div>
          <div className="space-y-1.5">
            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block flex items-center"><Key className="w-3 h-3 mr-1.5" />API Key</label>
            <Input value={apiKey} onChange={(e) => setApiKey(e.target.value)} type="password" placeholder="sk-..." className="font-mono text-xs" />
          </div>
        </div>
        <div className="space-y-1.5">
          <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">热词 (comma separated)</label>
          <Input value={hotwords} onChange={(e) => setHotwords(e.target.value)} placeholder="品牌名, 人名, 专有词" />
        </div>
        <div className="grid grid-cols-1 md:grid-cols-3 gap-2">
          <label className="flex items-center space-x-2 text-xs text-muted-foreground">
            <input type="checkbox" checked={enablePunctuation} onChange={(e) => setEnablePunctuation(e.target.checked)} />
            <span>标点增强</span>
          </label>
          <label className="flex items-center space-x-2 text-xs text-muted-foreground">
            <input type="checkbox" checked={enableNormalization} onChange={(e) => setEnableNormalization(e.target.checked)} />
            <span>文本归一化</span>
          </label>
          <label className="flex items-center space-x-2 text-xs text-muted-foreground">
            <input type="checkbox" checked={enabled} onChange={(e) => setEnabled(e.target.checked)} />
            <span>启用</span>
          </label>
        </div>
      </div>
    </Dialog>
  );
 };
 const ASRPreviewModal: React.FC<{
  isOpen: boolean;
  onClose: () => void;
  model: ASRModel | null;
 }> = ({ isOpen, onClose, model }) => {
  const [selectedFile, setSelectedFile] = useState<File | null>(null);
  const [isDragging, setIsDragging] = useState(false);
  const [isTranscribing, setIsTranscribing] = useState(false);
  const [transcript, setTranscript] = useState('');
  const [latency, setLatency] = useState<number | null>(null);
  const [confidence, setConfidence] = useState<number | null>(null);
  const [language, setLanguage] = useState('');
  const [isRecording, setIsRecording] = useState(false);
  const inputRef = useRef<HTMLInputElement>(null);
  const mediaRecorderRef = useRef<MediaRecorder | null>(null);
  const streamRef = useRef<MediaStream | null>(null);
  const chunksRef = useRef<Blob[]>([]);
  useEffect(() => {
    if (!isOpen) return;
    setSelectedFile(null);
    setTranscript('');
    setLatency(null);
    setConfidence(null);
    setLanguage(model?.language || '');
    setIsTranscribing(false);
    setIsRecording(false);
  }, [isOpen, model]);
  useEffect(() => {
    return () => {
      if (streamRef.current) {
        streamRef.current.getTracks().forEach((track) => track.stop());
      }
    };
  }, []);
  const pickFile = (file: File | null) => {
    if (!file) return;
    if (!file.type.startsWith('audio/')) {
      alert('仅支持音频文件');
      return;
    }
    setSelectedFile(file);
  };
  const handleDrop = (event: React.DragEvent<HTMLDivElement>) => {
    event.preventDefault();
    setIsDragging(false);
    const file = event.dataTransfer.files?.[0] || null;
    pickFile(file);
  };
  const startRecording = async () => {
    if (!navigator.mediaDevices?.getUserMedia) {
      alert('当前浏览器不支持麦克风录音');
      return;
    }
    try {
      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
      const recorder = new MediaRecorder(stream);
      chunksRef.current = [];
      streamRef.current = stream;
      mediaRecorderRef.current = recorder;
      recorder.ondataavailable = (event) => {
        if (event.data.size > 0) {
          chunksRef.current.push(event.data);
        }
      };
      recorder.onstop = () => {
        const blob = new Blob(chunksRef.current, { type: recorder.mimeType || 'audio/webm' });
        const file = new File([blob], `mic-preview-${Date.now()}.webm`, { type: blob.type || 'audio/webm' });
        setSelectedFile(file);
        if (streamRef.current) {
          streamRef.current.getTracks().forEach((track) => track.stop());
          streamRef.current = null;
        }
      };
      recorder.start();
      setIsRecording(true);
    } catch (error: any) {
      alert(error?.message || '无法访问麦克风');
    }
  };
  const stopRecording = () => {
    if (!mediaRecorderRef.current) return;
    mediaRecorderRef.current.stop();
    setIsRecording(false);
  };
  const runPreview = async () => {
    if (!model?.id) return;
    if (!selectedFile) {
      alert('请先上传或录制音频');
      return;
    }
    try {
      setIsTranscribing(true);
      const result = await previewASRModel(model.id, selectedFile, { language: language || undefined });
      setTranscript(result.transcript || result.message || '无识别内容');
      setLatency(result.latency_ms ?? null);
      setConfidence(result.confidence ?? null);
    } catch (error: any) {
      alert(error?.message || '识别失败');
    } finally {
      setIsTranscribing(false);
    }
  };
  return (
    <Dialog
      isOpen={isOpen}
      onClose={onClose}
      title={`试听识别: ${model?.name || ''}`}
      footer={
        <>
          <Button variant="ghost" onClick={onClose}>关闭</Button>
          <Button onClick={runPreview} disabled={isTranscribing || !selectedFile}>
            {isTranscribing ? '识别中...' : '开始识别'}
          </Button>
        </>
      }
    >
      <div className="space-y-4">
        <div
          className={`rounded-lg border-2 border-dashed p-4 transition-colors ${isDragging ? 'border-primary bg-primary/10' : 'border-white/10 bg-white/5'}`}
          onDragOver={(e) => {
            e.preventDefault();
            setIsDragging(true);
          }}
          onDragLeave={() => setIsDragging(false)}
          onDrop={handleDrop}
        >
          <input
            ref={inputRef}
            type="file"
            accept="audio/*"
            className="hidden"
            onChange={(e) => pickFile(e.target.files?.[0] || null)}
          />
          <div className="flex flex-col items-center justify-center gap-2 text-sm text-muted-foreground">
            <Upload className="h-6 w-6 text-primary" />
            <p>拖拽音频文件到这里，或</p>
            <Button variant="outline" size="sm" onClick={() => inputRef.current?.click()}>选择文件</Button>
            {selectedFile && <p className="text-primary text-xs">已选择: {selectedFile.name}</p>}
          </div>
        </div>
        <div className="flex items-center justify-between rounded-lg border border-white/10 bg-white/5 p-3">
          <div className="text-sm text-muted-foreground">麦克风测试</div>
          {!isRecording ? (
            <Button size="sm" variant="outline" onClick={startRecording}><Mic className="h-4 w-4 mr-1" />开始录音</Button>
          ) : (
            <Button size="sm" variant="destructive" onClick={stopRecording}><Square className="h-4 w-4 mr-1" />停止录音</Button>
          )}
        </div>
        <div className="space-y-1.5">
          <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block flex items-center">
            <Globe className="w-3 h-3 mr-1.5" />识别语言 (Optional)
          </label>
          <Input value={language} onChange={(e) => setLanguage(e.target.value)} placeholder="zh / en / auto" />
        </div>
        <div className="rounded-lg border border-primary/20 bg-primary/5 p-3 space-y-2">
          <div className="flex items-center justify-between text-xs text-primary">
            <span>识别结果</span>
            <span>
              {latency !== null ? `Latency: ${latency}ms` : ''}
              {confidence !== null ? `  Confidence: ${confidence.toFixed(3)}` : ''}
            </span>
          </div>
          <textarea
            readOnly
            value={transcript}
            className="flex min-h-[120px] w-full rounded-md border-0 bg-black/20 px-3 py-2 text-sm shadow-sm text-white"
            placeholder="识别结果会显示在这里"
          />
        </div>
      </div>
    </Dialog>
  );
 };
--- a/web/services/backendApi.ts
+++ b/web/services/backendApi.ts
@@ -1,4 +1,4 @@
-import { Assistant, CallLog, InteractionDetail, KnowledgeBase, KnowledgeDocument, Voice, Workflow, WorkflowEdge, WorkflowNode } from '../types';
+import { ASRModel, Assistant, CallLog, InteractionDetail, KnowledgeBase, KnowledgeDocument, Voice, Workflow, WorkflowEdge, WorkflowNode } from '../types';
 import { apiRequest } from './apiClient';
 type AnyRecord = Record<string, any>;
@@ -64,6 +64,20 @@ const mapVoice = (raw: AnyRecord): Voice => ({
  isSystem: Boolean(readField(raw, ['isSystem', 'is_system'], false)),
 });
 const mapASRModel = (raw: AnyRecord): ASRModel => ({
  id: String(readField(raw, ['id'], '')),
  name: readField(raw, ['name'], ''),
  vendor: readField(raw, ['vendor'], 'OpenAI Compatible'),
  language: readField(raw, ['language'], 'zh'),
  baseUrl: readField(raw, ['baseUrl', 'base_url'], ''),
  apiKey: readField(raw, ['apiKey', 'api_key'], ''),
  modelName: readField(raw, ['modelName', 'model_name'], ''),
  hotwords: readField(raw, ['hotwords'], []),
  enablePunctuation: Boolean(readField(raw, ['enablePunctuation', 'enable_punctuation'], true)),
  enableNormalization: Boolean(readField(raw, ['enableNormalization', 'enable_normalization'], true)),
  enabled: Boolean(readField(raw, ['enabled'], true)),
 });
 const mapWorkflowNode = (raw: AnyRecord): WorkflowNode => ({
  name: readField(raw, ['name'], ''),
  type: readField(raw, ['type'], 'conversation') as 'conversation' | 'tool' | 'human' | 'end',
@@ -246,6 +260,97 @@ export const previewVoice = async (id: string, text: string, speed?: number, api
  return response.audio_url;
 };
 export const fetchASRModels = async (): Promise<ASRModel[]> => {
  const response = await apiRequest<{ list?: AnyRecord[] } | AnyRecord[]>('/asr');
  const list = Array.isArray(response) ? response : (response.list || []);
  return list.map((item) => mapASRModel(item));
 };
 export const createASRModel = async (data: Partial<ASRModel>): Promise<ASRModel> => {
  const payload = {
    id: data.id || undefined,
    name: data.name || 'New ASR Model',
    vendor: data.vendor || 'OpenAI Compatible',
    language: data.language || 'zh',
    base_url: data.baseUrl || 'https://api.siliconflow.cn/v1',
    api_key: data.apiKey || '',
    model_name: data.modelName || undefined,
    hotwords: data.hotwords || [],
    enable_punctuation: data.enablePunctuation ?? true,
    enable_normalization: data.enableNormalization ?? true,
    enabled: data.enabled ?? true,
  };
  const response = await apiRequest<AnyRecord>('/asr', { method: 'POST', body: payload });
  return mapASRModel(response);
 };
 export const updateASRModel = async (id: string, data: Partial<ASRModel>): Promise<ASRModel> => {
  const payload = {
    name: data.name,
    vendor: data.vendor,
    language: data.language,
    base_url: data.baseUrl,
    api_key: data.apiKey,
    model_name: data.modelName,
    hotwords: data.hotwords,
    enable_punctuation: data.enablePunctuation,
    enable_normalization: data.enableNormalization,
    enabled: data.enabled,
  };
  const response = await apiRequest<AnyRecord>(`/asr/${id}`, { method: 'PUT', body: payload });
  return mapASRModel(response);
 };
 export const deleteASRModel = async (id: string): Promise<void> => {
  await apiRequest(`/asr/${id}`, { method: 'DELETE' });
 };
 export type ASRPreviewResult = {
  success: boolean;
  transcript?: string;
  language?: string;
  confidence?: number;
  latency_ms?: number;
  message?: string;
  error?: string;
 };
 export const previewASRModel = async (
  id: string,
  file: File,
  options?: { language?: string; apiKey?: string }
 ): Promise<ASRPreviewResult> => {
  const formData = new FormData();
  formData.append('file', file);
  if (options?.language) {
    formData.append('language', options.language);
  }
  if (options?.apiKey) {
    formData.append('api_key', options.apiKey);
  }
  const base = (import.meta.env.VITE_API_BASE_URL || 'http://127.0.0.1:8100/api').replace(/\/+$/, '');
  const url = `${base}/asr/${id}/preview`;
  const response = await fetch(url, {
    method: 'POST',
    body: formData,
  });
  let data: ASRPreviewResult | null = null;
  try {
    data = await response.json();
  } catch {
    data = null;
  }
  if (!response.ok) {
    const detail = (data as AnyRecord | null)?.error || (data as AnyRecord | null)?.detail || `Request failed: ${response.status}`;
    throw new Error(typeof detail === 'string' ? detail : `Request failed: ${response.status}`);
  }
  return data || { success: false, error: 'Invalid preview response' };
 };
 export const fetchWorkflows = async (): Promise<Workflow[]> => {
  const response = await apiRequest<{ list?: AnyRecord[] } | AnyRecord[]>('/workflows');
  const list = Array.isArray(response) ? response : (response.list || []);
--- a/web/types.ts
+++ b/web/types.ts
@@ -176,8 +176,13 @@ export interface LLMModel {
 export interface ASRModel {
  id: string;
  name: string;
-  vendor: 'OpenAI Compatible';
+  vendor: string;
  language: string;
  baseUrl: string;
  apiKey: string;
  modelName?: string;
  hotwords?: string[];
  enablePunctuation?: boolean;
  enableNormalization?: boolean;
  enabled?: boolean;
 }