Update asr library preview

2026-02-08 23:38:34 +08:00
parent 97e3236e76
commit 4bf2f788ad
5 changed files with 781 additions and 183 deletions
--- a/api/app/routers/asr.py
+++ b/api/app/routers/asr.py
@@ -1,12 +1,11 @@
-from fastapi import APIRouter, Depends, HTTPException
-from sqlalchemy.orm import Session
-from typing import List, Optional
-import uuid
-import httpx
+import os
 import time
-import base64
-import json
-from datetime import datetime
+import uuid
+from typing import List, Optional
+
+import httpx
+from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
+from sqlalchemy.orm import Session

 from ..db import get_db
 from ..models import ASRModel
@@ -17,6 +16,18 @@ from ..schemas import (

 router = APIRouter(prefix="/asr", tags=["ASR Models"])

+SILICONFLOW_DEFAULT_ASR_MODEL = "FunAudioLLM/SenseVoiceSmall"
+
+
+def _is_siliconflow_vendor(vendor: str) -> bool:
+    return (vendor or "").strip().lower() in {"siliconflow", "硅基流动"}
+
+
+def _default_asr_model(vendor: str) -> str:
+    if _is_siliconflow_vendor(vendor):
+        return SILICONFLOW_DEFAULT_ASR_MODEL
+    return "whisper-1"
+

 # ============ ASR Models CRUD ============
@router.get("")
@@ -219,3 +230,99 @@ def transcribe_audio(

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/{id}/preview", response_model=ASRTestResponse)
+async def preview_asr_model(
+    id: str,
+    file: UploadFile = File(...),
+    language: Optional[str] = Form(None),
+    api_key: Optional[str] = Form(None),
+    db: Session = Depends(get_db),
+):
+    """预览 ASR：上传音频并调用 OpenAI-compatible /audio/transcriptions。"""
+    model = db.query(ASRModel).filter(ASRModel.id == id).first()
+    if not model:
+        raise HTTPException(status_code=404, detail="ASR Model not found")
+
+    if not file:
+        raise HTTPException(status_code=400, detail="Audio file is required")
+
+    filename = file.filename or "preview.wav"
+    content_type = file.content_type or "application/octet-stream"
+    if not content_type.startswith("audio/"):
+        raise HTTPException(status_code=400, detail="Only audio files are supported")
+
+    audio_bytes = await file.read()
+    if not audio_bytes:
+        raise HTTPException(status_code=400, detail="Uploaded audio file is empty")
+
+    effective_api_key = (api_key or "").strip() or (model.api_key or "").strip()
+    if not effective_api_key and _is_siliconflow_vendor(model.vendor):
+        effective_api_key = os.getenv("SILICONFLOW_API_KEY", "").strip()
+    if not effective_api_key:
+        raise HTTPException(status_code=400, detail=f"API key is required for ASR model: {model.name}")
+
+    base_url = (model.base_url or "").strip().rstrip("/")
+    if not base_url:
+        raise HTTPException(status_code=400, detail=f"Base URL is required for ASR model: {model.name}")
+
+    selected_model = (model.model_name or "").strip() or _default_asr_model(model.vendor)
+    data = {"model": selected_model}
+    effective_language = (language or "").strip() or None
+    if effective_language:
+        data["language"] = effective_language
+    if model.hotwords:
+        data["prompt"] = " ".join(model.hotwords)
+
+    headers = {"Authorization": f"Bearer {effective_api_key}"}
+    files = {"file": (filename, audio_bytes, content_type)}
+
+    start_time = time.time()
+    try:
+        with httpx.Client(timeout=90.0) as client:
+            response = client.post(
+                f"{base_url}/audio/transcriptions",
+                headers=headers,
+                data=data,
+                files=files,
+            )
+    except Exception as exc:
+        raise HTTPException(status_code=502, detail=f"ASR request failed: {exc}") from exc
+
+    if response.status_code != 200:
+        detail = response.text
+        try:
+            detail_json = response.json()
+            detail = detail_json.get("error", {}).get("message") or detail_json.get("detail") or detail
+        except Exception:
+            pass
+        raise HTTPException(status_code=502, detail=f"ASR vendor error: {detail}")
+
+    try:
+        payload = response.json()
+    except Exception:
+        payload = {"text": response.text}
+
+    transcript = ""
+    response_language = model.language
+    confidence = None
+    if isinstance(payload, dict):
+        transcript = str(payload.get("text") or payload.get("transcript") or "")
+        response_language = str(payload.get("language") or effective_language or model.language)
+        raw_confidence = payload.get("confidence")
+        if raw_confidence is not None:
+            try:
+                confidence = float(raw_confidence)
+            except (TypeError, ValueError):
+                confidence = None
+
+    latency_ms = int((time.time() - start_time) * 1000)
+    return ASRTestResponse(
+        success=bool(transcript),
+        transcript=transcript,
+        language=response_language,
+        confidence=confidence,
+        latency_ms=latency_ms,
+        message=None if transcript else "No transcript in response",
+    )
--- a/api/tests/test_asr.py
+++ b/api/tests/test_asr.py
@@ -287,3 +287,61 @@ class TestASRModelAPI:
            response = client.post("/api/asr", json=data)
            assert response.status_code == 200
            assert response.json()["vendor"] == vendor
+
+    def test_preview_asr_model_success(self, client, sample_asr_model_data, monkeypatch):
+        """Test ASR preview endpoint with OpenAI-compatible transcriptions API."""
+        from app.routers import asr as asr_router
+
+        create_response = client.post("/api/asr", json=sample_asr_model_data)
+        model_id = create_response.json()["id"]
+
+        class DummyResponse:
+            status_code = 200
+
+            def json(self):
+                return {"text": "你好，这是测试转写", "language": "zh", "confidence": 0.98}
+
+            @property
+            def text(self):
+                return '{"text":"ok"}'
+
+        class DummyClient:
+            def __init__(self, *args, **kwargs):
+                pass
+
+            def __enter__(self):
+                return self
+
+            def __exit__(self, exc_type, exc, tb):
+                return False
+
+            def post(self, url, headers=None, data=None, files=None):
+                assert url.endswith("/audio/transcriptions")
+                assert headers["Authorization"] == f"Bearer {sample_asr_model_data['api_key']}"
+                assert data["model"] == sample_asr_model_data["model_name"]
+                assert files["file"][0] == "sample.wav"
+                return DummyResponse()
+
+        monkeypatch.setattr(asr_router.httpx, "Client", DummyClient)
+
+        response = client.post(
+            f"/api/asr/{model_id}/preview",
+            files={"file": ("sample.wav", b"fake-wav-bytes", "audio/wav")},
+        )
+        assert response.status_code == 200
+        payload = response.json()
+        assert payload["success"] is True
+        assert payload["transcript"] == "你好，这是测试转写"
+        assert payload["language"] == "zh"
+
+    def test_preview_asr_model_reject_non_audio(self, client, sample_asr_model_data):
+        """Test ASR preview endpoint rejects non-audio file."""
+        create_response = client.post("/api/asr", json=sample_asr_model_data)
+        model_id = create_response.json()["id"]
+
+        response = client.post(
+            f"/api/asr/{model_id}/preview",
+            files={"file": ("sample.txt", b"text-data", "text/plain")},
+        )
+        assert response.status_code == 400
+        assert "Only audio files are supported" in response.text
--- a/web/pages/ASRLibrary.tsx
+++ b/web/pages/ASRLibrary.tsx
@@ -1,61 +1,78 @@
-
-import React, { useState } from 'react';
-import { Search, Filter, Plus, Trash2, Key, Server, Ear, Globe, Languages } from 'lucide-react';
+import React, { useEffect, useRef, useState } from 'react';
+import { Search, Filter, Plus, Trash2, Key, Server, Ear, Globe, Languages, Pencil, Mic, Square, Upload } from 'lucide-react';
 import { Button, Input, TableHeader, TableRow, TableHead, TableCell, Dialog, Badge } from '../components/UI';
-import { mockASRModels } from '../services/mockData';
 import { ASRModel } from '../types';
+import { createASRModel, deleteASRModel, fetchASRModels, previewASRModel, updateASRModel } from '../services/backendApi';
+
+const maskApiKey = (key?: string) => {
+  if (!key) return '********';
+  if (key.length < 8) return '********';
+  return `${key.slice(0, 3)}****${key.slice(-4)}`;
+};
+
+const parseHotwords = (value: string): string[] => {
+  return value
+    .split(/[\n,]/)
+    .map((item) => item.trim())
+    .filter(Boolean);
+};
+
+const toHotwordsValue = (hotwords?: string[]): string => (hotwords || []).join(', ');

 export const ASRLibraryPage: React.FC = () => {
-  const [models, setModels] = useState<ASRModel[]>(mockASRModels);
+  const [models, setModels] = useState<ASRModel[]>([]);
  const [searchTerm, setSearchTerm] = useState('');
  const [vendorFilter, setVendorFilter] = useState<string>('all');
  const [langFilter, setLangFilter] = useState<string>('all');
  const [isAddModalOpen, setIsAddModalOpen] = useState(false);
+  const [editingModel, setEditingModel] = useState<ASRModel | null>(null);
+  const [previewingModel, setPreviewingModel] = useState<ASRModel | null>(null);
+  const [isLoading, setIsLoading] = useState(true);

-  // Form State
-  const [newModel, setNewModel] = useState<Partial<ASRModel>>({
-    vendor: 'OpenAI Compatible',
-    language: 'zh'
-  });
+  const loadModels = async () => {
+    setIsLoading(true);
+    try {
+      setModels(await fetchASRModels());
+    } catch (error) {
+      console.error(error);
+      setModels([]);
+    } finally {
+      setIsLoading(false);
+    }
+  };

-  const filteredModels = models.filter(m => {
-    const matchesSearch = m.name.toLowerCase().includes(searchTerm.toLowerCase());
+  useEffect(() => {
+    loadModels();
+  }, []);
+
+  const filteredModels = models.filter((m) => {
+    const q = searchTerm.toLowerCase();
+    const matchesSearch = m.name.toLowerCase().includes(q) || (m.modelName || '').toLowerCase().includes(q);
    const matchesVendor = vendorFilter === 'all' || m.vendor === vendorFilter;
    const matchesLang = langFilter === 'all' || m.language === langFilter || (langFilter !== 'all' && m.language === 'Multi-lingual');
    return matchesSearch && matchesVendor && matchesLang;
  });

-  const handleAddModel = () => {
-    if (!newModel.name || !newModel.baseUrl || !newModel.apiKey) {
-      alert("请填写完整信息");
-      return;
-    }
-    
-    const model: ASRModel = {
-      id: `asr_${Date.now()}`,
-      name: newModel.name,
-      vendor: newModel.vendor as 'OpenAI Compatible',
-      language: newModel.language || 'zh',
-      baseUrl: newModel.baseUrl,
-      apiKey: newModel.apiKey
-    };
-
-    setModels([model, ...models]);
+  const handleCreate = async (data: Partial<ASRModel>) => {
+    const created = await createASRModel(data);
+    setModels((prev) => [created, ...prev]);
    setIsAddModalOpen(false);
-    setNewModel({ vendor: 'OpenAI Compatible', language: 'zh', name: '', baseUrl: '', apiKey: '' });
  };

-  const handleDeleteModel = (id: string) => {
-    if (confirm('确认删除该语音识别模型吗？')) {
-      setModels(prev => prev.filter(m => m.id !== id));
-    }
+  const handleUpdate = async (id: string, data: Partial<ASRModel>) => {
+    const updated = await updateASRModel(id, data);
+    setModels((prev) => prev.map((m) => (m.id === id ? updated : m)));
+    setEditingModel(null);
  };

-  const maskApiKey = (key: string) => {
-    if (!key || key.length < 8) return '********';
-    return `${key.substring(0, 3)}****${key.substring(key.length - 4)}`;
+  const handleDelete = async (id: string) => {
+    if (!confirm('确认删除该语音识别模型吗？')) return;
+    await deleteASRModel(id);
+    setModels((prev) => prev.filter((m) => m.id !== id));
  };

+  const vendorOptions = Array.from(new Set(models.map((m) => m.vendor).filter(Boolean)));
+
  return (
    <div className="space-y-6 animate-in fade-in py-4 pb-10">
      <div className="flex items-center justify-between">
@@ -69,10 +86,10 @@ export const ASRLibraryPage: React.FC = () => {
        <div className="relative col-span-1 md:col-span-2">
          <Search className="absolute left-2.5 top-2.5 h-4 w-4 text-muted-foreground" />
          <Input
-                placeholder="搜索模型名称..." 
+            placeholder="搜索模型名称/Model Name..."
            className="pl-9 border-0 bg-white/5"
            value={searchTerm}
-                onChange={e => setSearchTerm(e.target.value)}
+            onChange={(e) => setSearchTerm(e.target.value)}
          />
        </div>
        <div className="flex items-center space-x-2">
@@ -82,8 +99,10 @@ export const ASRLibraryPage: React.FC = () => {
            value={vendorFilter}
            onChange={(e) => setVendorFilter(e.target.value)}
          >
-                <option value="all">所有接口类型</option>
-                <option value="OpenAI Compatible">OpenAI Compatible</option>
+            <option value="all">所有厂商</option>
+            {vendorOptions.map((vendor) => (
+              <option key={vendor} value={vendor}>{vendor}</option>
+            ))}
          </select>
        </div>
        <div className="flex items-center space-x-2">
@@ -105,131 +124,435 @@ export const ASRLibraryPage: React.FC = () => {
          <TableHeader>
            <TableRow>
              <TableHead>模型名称</TableHead>
-              <TableHead>接口类型</TableHead>
+              <TableHead>厂商</TableHead>
              <TableHead>语言</TableHead>
+              <TableHead>模型标识</TableHead>
              <TableHead>Base URL</TableHead>
              <TableHead>API Key</TableHead>
              <TableHead className="text-right">操作</TableHead>
            </TableRow>
          </TableHeader>
          <tbody>
-            {filteredModels.map(model => (
+            {!isLoading && filteredModels.map((model) => (
              <TableRow key={model.id}>
-                <TableCell className="font-medium text-white flex items-center">
+                <TableCell className="font-medium text-white">
+                  <div className="flex flex-col">
+                    <span className="flex items-center">
                      <Ear className="w-4 h-4 mr-2 text-primary" />
                      {model.name}
+                    </span>
+                    {model.hotwords && model.hotwords.length > 0 && (
+                      <span className="text-xs text-muted-foreground">热词: {model.hotwords.join(', ')}</span>
+                    )}
+                  </div>
                </TableCell>
-                <TableCell>
-                    <Badge variant="outline">{model.vendor}</Badge>
-                </TableCell>
-                <TableCell>
-                    <Badge variant="default" className="bg-purple-500/10 text-purple-400 border-purple-500/20">
-                        {model.language}
-                    </Badge>
-                </TableCell>
-                <TableCell className="font-mono text-xs text-muted-foreground">
-                    {model.baseUrl}
-                </TableCell>
-                <TableCell className="font-mono text-xs text-muted-foreground">
-                    {maskApiKey(model.apiKey)}
-                </TableCell>
+                <TableCell><Badge variant="outline">{model.vendor}</Badge></TableCell>
+                <TableCell>{model.language}</TableCell>
+                <TableCell className="font-mono text-xs text-muted-foreground">{model.modelName || '-'}</TableCell>
+                <TableCell className="font-mono text-xs text-muted-foreground max-w-[220px] truncate">{model.baseUrl}</TableCell>
+                <TableCell className="font-mono text-xs text-muted-foreground">{maskApiKey(model.apiKey)}</TableCell>
                <TableCell className="text-right">
-                    <Button 
-                        variant="ghost" 
-                        size="icon" 
-                        onClick={() => handleDeleteModel(model.id)}
-                        className="text-muted-foreground hover:text-destructive transition-colors"
-                    >
+                  <Button variant="ghost" size="icon" onClick={() => setPreviewingModel(model)}>
+                    <Ear className="h-4 w-4" />
+                  </Button>
+                  <Button variant="ghost" size="icon" onClick={() => setEditingModel(model)}>
+                    <Pencil className="h-4 w-4" />
+                  </Button>
+                  <Button variant="ghost" size="icon" onClick={() => handleDelete(model.id)} className="text-red-400">
                    <Trash2 className="h-4 w-4" />
                  </Button>
                </TableCell>
              </TableRow>
            ))}
-             {filteredModels.length === 0 && (
+            {!isLoading && filteredModels.length === 0 && (
              <TableRow>
-                     <TableCell colSpan={6} className="text-center py-8 text-muted-foreground">暂无语音识别模型</TableCell>
+                <TableCell colSpan={7} className="text-center py-8 text-muted-foreground">暂无语音识别模型</TableCell>
+              </TableRow>
+            )}
+            {isLoading && (
+              <TableRow>
+                <TableCell colSpan={7} className="text-center py-8 text-muted-foreground">加载中...</TableCell>
              </TableRow>
            )}
          </tbody>
        </table>
      </div>

-      <Dialog
+      <ASRModelModal
        isOpen={isAddModalOpen}
        onClose={() => setIsAddModalOpen(false)}
-        title="添加语音识别模型"
+        onSubmit={handleCreate}
+      />
+
+      <ASRModelModal
+        isOpen={!!editingModel}
+        onClose={() => setEditingModel(null)}
+        onSubmit={(data) => handleUpdate(editingModel!.id, data)}
+        initialModel={editingModel || undefined}
+      />
+
+      <ASRPreviewModal
+        isOpen={!!previewingModel}
+        onClose={() => setPreviewingModel(null)}
+        model={previewingModel}
+      />
+    </div>
+  );
+};
+
+const ASRModelModal: React.FC<{
+  isOpen: boolean;
+  onClose: () => void;
+  onSubmit: (model: Partial<ASRModel>) => Promise<void>;
+  initialModel?: ASRModel;
+}> = ({ isOpen, onClose, onSubmit, initialModel }) => {
+  const [name, setName] = useState('');
+  const [vendor, setVendor] = useState('OpenAI Compatible');
+  const [language, setLanguage] = useState('zh');
+  const [modelName, setModelName] = useState('FunAudioLLM/SenseVoiceSmall');
+  const [baseUrl, setBaseUrl] = useState('https://api.siliconflow.cn/v1');
+  const [apiKey, setApiKey] = useState('');
+  const [hotwords, setHotwords] = useState('');
+  const [enablePunctuation, setEnablePunctuation] = useState(true);
+  const [enableNormalization, setEnableNormalization] = useState(true);
+  const [enabled, setEnabled] = useState(true);
+  const [saving, setSaving] = useState(false);
+
+  useEffect(() => {
+    if (!isOpen) return;
+    if (initialModel) {
+      setName(initialModel.name || '');
+      setVendor(initialModel.vendor || 'OpenAI Compatible');
+      setLanguage(initialModel.language || 'zh');
+      setModelName(initialModel.modelName || 'FunAudioLLM/SenseVoiceSmall');
+      setBaseUrl(initialModel.baseUrl || 'https://api.siliconflow.cn/v1');
+      setApiKey(initialModel.apiKey || '');
+      setHotwords(toHotwordsValue(initialModel.hotwords));
+      setEnablePunctuation(initialModel.enablePunctuation ?? true);
+      setEnableNormalization(initialModel.enableNormalization ?? true);
+      setEnabled(initialModel.enabled ?? true);
+      return;
+    }
+
+    setName('');
+    setVendor('OpenAI Compatible');
+    setLanguage('zh');
+    setModelName('FunAudioLLM/SenseVoiceSmall');
+    setBaseUrl('https://api.siliconflow.cn/v1');
+    setApiKey('');
+    setHotwords('');
+    setEnablePunctuation(true);
+    setEnableNormalization(true);
+    setEnabled(true);
+  }, [initialModel, isOpen]);
+
+  const handleSubmit = async () => {
+    if (!name.trim()) {
+      alert('请填写模型名称');
+      return;
+    }
+    if (!baseUrl.trim()) {
+      alert('请填写 Base URL');
+      return;
+    }
+    if (!apiKey.trim()) {
+      alert('请填写 API Key');
+      return;
+    }
+
+    try {
+      setSaving(true);
+      await onSubmit({
+        name: name.trim(),
+        vendor: vendor.trim(),
+        language,
+        modelName: modelName.trim(),
+        baseUrl: baseUrl.trim(),
+        apiKey: apiKey.trim(),
+        hotwords: parseHotwords(hotwords),
+        enablePunctuation,
+        enableNormalization,
+        enabled,
+      });
+    } catch (error: any) {
+      alert(error?.message || '保存失败');
+    } finally {
+      setSaving(false);
+    }
+  };
+
+  return (
+    <Dialog
+      isOpen={isOpen}
+      onClose={onClose}
+      title={initialModel ? '编辑语音识别模型' : '添加语音识别模型'}
      footer={
        <>
-            <Button variant="ghost" onClick={() => setIsAddModalOpen(false)}>取消</Button>
-            <Button onClick={handleAddModel}>确认添加</Button>
+          <Button variant="ghost" onClick={onClose}>取消</Button>
+          <Button onClick={handleSubmit} disabled={saving}>{saving ? '保存中...' : (initialModel ? '保存修改' : '确认添加')}</Button>
+        </>
+      }
+    >
+      <div className="space-y-4 max-h-[75vh] overflow-y-auto px-1 custom-scrollbar">
+        <div className="space-y-1.5">
+          <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">模型名称</label>
+          <Input value={name} onChange={(e) => setName(e.target.value)} placeholder="例如: SenseVoice CN" />
+        </div>
+
+        <div className="grid grid-cols-1 md:grid-cols-2 gap-4">
+          <div className="space-y-1.5">
+            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">接口类型</label>
+            <select
+              className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-foreground [&>option]:bg-card"
+              value={vendor}
+              onChange={(e) => setVendor(e.target.value)}
+            >
+              <option value="OpenAI Compatible">OpenAI Compatible</option>
+              <option value="SiliconFlow">SiliconFlow</option>
+            </select>
+          </div>
+          <div className="space-y-1.5">
+            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block flex items-center"><Languages className="w-3 h-3 mr-1.5" />语言</label>
+            <select
+              className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-foreground [&>option]:bg-card"
+              value={language}
+              onChange={(e) => setLanguage(e.target.value)}
+            >
+              <option value="zh">中文 (Chinese)</option>
+              <option value="en">英文 (English)</option>
+              <option value="Multi-lingual">多语言 (Multi-lingual)</option>
+            </select>
+          </div>
+        </div>
+
+        <div className="space-y-1.5">
+          <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">Model Name</label>
+          <Input value={modelName} onChange={(e) => setModelName(e.target.value)} placeholder="FunAudioLLM/SenseVoiceSmall" />
+        </div>
+
+        <div className="grid grid-cols-1 md:grid-cols-2 gap-4">
+          <div className="space-y-1.5">
+            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block flex items-center"><Server className="w-3 h-3 mr-1.5" />Base URL</label>
+            <Input value={baseUrl} onChange={(e) => setBaseUrl(e.target.value)} placeholder="https://api.siliconflow.cn/v1" className="font-mono text-xs" />
+          </div>
+          <div className="space-y-1.5">
+            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block flex items-center"><Key className="w-3 h-3 mr-1.5" />API Key</label>
+            <Input value={apiKey} onChange={(e) => setApiKey(e.target.value)} type="password" placeholder="sk-..." className="font-mono text-xs" />
+          </div>
+        </div>
+
+        <div className="space-y-1.5">
+          <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">热词 (comma separated)</label>
+          <Input value={hotwords} onChange={(e) => setHotwords(e.target.value)} placeholder="品牌名, 人名, 专有词" />
+        </div>
+
+        <div className="grid grid-cols-1 md:grid-cols-3 gap-2">
+          <label className="flex items-center space-x-2 text-xs text-muted-foreground">
+            <input type="checkbox" checked={enablePunctuation} onChange={(e) => setEnablePunctuation(e.target.checked)} />
+            <span>标点增强</span>
+          </label>
+          <label className="flex items-center space-x-2 text-xs text-muted-foreground">
+            <input type="checkbox" checked={enableNormalization} onChange={(e) => setEnableNormalization(e.target.checked)} />
+            <span>文本归一化</span>
+          </label>
+          <label className="flex items-center space-x-2 text-xs text-muted-foreground">
+            <input type="checkbox" checked={enabled} onChange={(e) => setEnabled(e.target.checked)} />
+            <span>启用</span>
+          </label>
+        </div>
+      </div>
+    </Dialog>
+  );
+};
+
+const ASRPreviewModal: React.FC<{
+  isOpen: boolean;
+  onClose: () => void;
+  model: ASRModel | null;
+}> = ({ isOpen, onClose, model }) => {
+  const [selectedFile, setSelectedFile] = useState<File | null>(null);
+  const [isDragging, setIsDragging] = useState(false);
+  const [isTranscribing, setIsTranscribing] = useState(false);
+  const [transcript, setTranscript] = useState('');
+  const [latency, setLatency] = useState<number | null>(null);
+  const [confidence, setConfidence] = useState<number | null>(null);
+  const [language, setLanguage] = useState('');
+  const [isRecording, setIsRecording] = useState(false);
+
+  const inputRef = useRef<HTMLInputElement>(null);
+  const mediaRecorderRef = useRef<MediaRecorder | null>(null);
+  const streamRef = useRef<MediaStream | null>(null);
+  const chunksRef = useRef<Blob[]>([]);
+
+  useEffect(() => {
+    if (!isOpen) return;
+    setSelectedFile(null);
+    setTranscript('');
+    setLatency(null);
+    setConfidence(null);
+    setLanguage(model?.language || '');
+    setIsTranscribing(false);
+    setIsRecording(false);
+  }, [isOpen, model]);
+
+  useEffect(() => {
+    return () => {
+      if (streamRef.current) {
+        streamRef.current.getTracks().forEach((track) => track.stop());
+      }
+    };
+  }, []);
+
+  const pickFile = (file: File | null) => {
+    if (!file) return;
+    if (!file.type.startsWith('audio/')) {
+      alert('仅支持音频文件');
+      return;
+    }
+    setSelectedFile(file);
+  };
+
+  const handleDrop = (event: React.DragEvent<HTMLDivElement>) => {
+    event.preventDefault();
+    setIsDragging(false);
+    const file = event.dataTransfer.files?.[0] || null;
+    pickFile(file);
+  };
+
+  const startRecording = async () => {
+    if (!navigator.mediaDevices?.getUserMedia) {
+      alert('当前浏览器不支持麦克风录音');
+      return;
+    }
+
+    try {
+      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+      const recorder = new MediaRecorder(stream);
+
+      chunksRef.current = [];
+      streamRef.current = stream;
+      mediaRecorderRef.current = recorder;
+
+      recorder.ondataavailable = (event) => {
+        if (event.data.size > 0) {
+          chunksRef.current.push(event.data);
+        }
+      };
+
+      recorder.onstop = () => {
+        const blob = new Blob(chunksRef.current, { type: recorder.mimeType || 'audio/webm' });
+        const file = new File([blob], `mic-preview-${Date.now()}.webm`, { type: blob.type || 'audio/webm' });
+        setSelectedFile(file);
+        if (streamRef.current) {
+          streamRef.current.getTracks().forEach((track) => track.stop());
+          streamRef.current = null;
+        }
+      };
+
+      recorder.start();
+      setIsRecording(true);
+    } catch (error: any) {
+      alert(error?.message || '无法访问麦克风');
+    }
+  };
+
+  const stopRecording = () => {
+    if (!mediaRecorderRef.current) return;
+    mediaRecorderRef.current.stop();
+    setIsRecording(false);
+  };
+
+  const runPreview = async () => {
+    if (!model?.id) return;
+    if (!selectedFile) {
+      alert('请先上传或录制音频');
+      return;
+    }
+
+    try {
+      setIsTranscribing(true);
+      const result = await previewASRModel(model.id, selectedFile, { language: language || undefined });
+      setTranscript(result.transcript || result.message || '无识别内容');
+      setLatency(result.latency_ms ?? null);
+      setConfidence(result.confidence ?? null);
+    } catch (error: any) {
+      alert(error?.message || '识别失败');
+    } finally {
+      setIsTranscribing(false);
+    }
+  };
+
+  return (
+    <Dialog
+      isOpen={isOpen}
+      onClose={onClose}
+      title={`试听识别: ${model?.name || ''}`}
+      footer={
+        <>
+          <Button variant="ghost" onClick={onClose}>关闭</Button>
+          <Button onClick={runPreview} disabled={isTranscribing || !selectedFile}>
+            {isTranscribing ? '识别中...' : '开始识别'}
+          </Button>
        </>
      }
    >
      <div className="space-y-4">
-          <div className="space-y-1.5">
-            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">接口类型 (Interface Type)</label>
-            <select 
-                className="flex h-10 w-full rounded-md border border-white/10 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-foreground appearance-none cursor-pointer [&>option]:bg-card"
-                value={newModel.vendor}
-                onChange={e => setNewModel({...newModel, vendor: e.target.value as any})}
+        <div
+          className={`rounded-lg border-2 border-dashed p-4 transition-colors ${isDragging ? 'border-primary bg-primary/10' : 'border-white/10 bg-white/5'}`}
+          onDragOver={(e) => {
+            e.preventDefault();
+            setIsDragging(true);
+          }}
+          onDragLeave={() => setIsDragging(false)}
+          onDrop={handleDrop}
        >
-                <option value="OpenAI Compatible">OpenAI Compatible</option>
-            </select>
-          </div>
-
-          <div className="space-y-1.5">
-            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">支持语言 (Language)</label>
-            <div className="flex bg-white/5 p-1 rounded-lg border border-white/10">
-                {(['zh', 'en', 'Multi-lingual'] as const).map(l => (
-                    <button 
-                        key={l}
-                        onClick={() => setNewModel({...newModel, language: l})}
-                        className={`flex-1 flex items-center justify-center py-1.5 text-xs font-bold rounded-md transition-all ${newModel.language === l ? 'bg-primary text-primary-foreground shadow-lg' : 'text-muted-foreground hover:text-foreground'}`}
-                    >
-                        {l === 'zh' && <span className="mr-1">🇨🇳</span>}
-                        {l === 'en' && <span className="mr-1">🇺🇸</span>}
-                        {l === 'Multi-lingual' && <Globe className="w-3 h-3 mr-1.5" />}
-                        {l === 'zh' ? '中文' : l === 'en' ? '英文' : '多语言'}
-                    </button>
-                ))}
-            </div>
-          </div>
-
-          <div className="space-y-1.5">
-            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">模型名称 (Model Name)</label>
-            <Input 
-                value={newModel.name} 
-                onChange={e => setNewModel({...newModel, name: e.target.value})} 
-                placeholder="例如: whisper-1, funasr" 
+          <input
+            ref={inputRef}
+            type="file"
+            accept="audio/*"
+            className="hidden"
+            onChange={(e) => pickFile(e.target.files?.[0] || null)}
          />
+          <div className="flex flex-col items-center justify-center gap-2 text-sm text-muted-foreground">
+            <Upload className="h-6 w-6 text-primary" />
+            <p>拖拽音频文件到这里，或</p>
+            <Button variant="outline" size="sm" onClick={() => inputRef.current?.click()}>选择文件</Button>
+            {selectedFile && <p className="text-primary text-xs">已选择: {selectedFile.name}</p>}
+          </div>
+        </div>
+
+        <div className="flex items-center justify-between rounded-lg border border-white/10 bg-white/5 p-3">
+          <div className="text-sm text-muted-foreground">麦克风测试</div>
+          {!isRecording ? (
+            <Button size="sm" variant="outline" onClick={startRecording}><Mic className="h-4 w-4 mr-1" />开始录音</Button>
+          ) : (
+            <Button size="sm" variant="destructive" onClick={stopRecording}><Square className="h-4 w-4 mr-1" />停止录音</Button>
+          )}
        </div>

        <div className="space-y-1.5">
          <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block flex items-center">
-                <Server className="w-3 h-3 mr-1.5" /> Base URL
+            <Globe className="w-3 h-3 mr-1.5" />识别语言 (Optional)
          </label>
-            <Input 
-                value={newModel.baseUrl} 
-                onChange={e => setNewModel({...newModel, baseUrl: e.target.value})} 
-                placeholder="https://api.openai.com/v1" 
-                className="font-mono text-xs"
-            />
+          <Input value={language} onChange={(e) => setLanguage(e.target.value)} placeholder="zh / en / auto" />
        </div>

-          <div className="space-y-1.5">
-            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block flex items-center">
-                <Key className="w-3 h-3 mr-1.5" /> API Key
-            </label>
-            <Input 
-                type="password"
-                value={newModel.apiKey} 
-                onChange={e => setNewModel({...newModel, apiKey: e.target.value})} 
-                placeholder="sk-..." 
-                className="font-mono text-xs"
+        <div className="rounded-lg border border-primary/20 bg-primary/5 p-3 space-y-2">
+          <div className="flex items-center justify-between text-xs text-primary">
+            <span>识别结果</span>
+            <span>
+              {latency !== null ? `Latency: ${latency}ms` : ''}
+              {confidence !== null ? `  Confidence: ${confidence.toFixed(3)}` : ''}
+            </span>
+          </div>
+          <textarea
+            readOnly
+            value={transcript}
+            className="flex min-h-[120px] w-full rounded-md border-0 bg-black/20 px-3 py-2 text-sm shadow-sm text-white"
+            placeholder="识别结果会显示在这里"
          />
        </div>
      </div>
    </Dialog>
-    </div>
  );
 };
--- a/web/services/backendApi.ts
+++ b/web/services/backendApi.ts
@@ -1,4 +1,4 @@
-import { Assistant, CallLog, InteractionDetail, KnowledgeBase, KnowledgeDocument, Voice, Workflow, WorkflowEdge, WorkflowNode } from '../types';
+import { ASRModel, Assistant, CallLog, InteractionDetail, KnowledgeBase, KnowledgeDocument, Voice, Workflow, WorkflowEdge, WorkflowNode } from '../types';
 import { apiRequest } from './apiClient';

 type AnyRecord = Record<string, any>;
@@ -64,6 +64,20 @@ const mapVoice = (raw: AnyRecord): Voice => ({
  isSystem: Boolean(readField(raw, ['isSystem', 'is_system'], false)),
 });

+const mapASRModel = (raw: AnyRecord): ASRModel => ({
+  id: String(readField(raw, ['id'], '')),
+  name: readField(raw, ['name'], ''),
+  vendor: readField(raw, ['vendor'], 'OpenAI Compatible'),
+  language: readField(raw, ['language'], 'zh'),
+  baseUrl: readField(raw, ['baseUrl', 'base_url'], ''),
+  apiKey: readField(raw, ['apiKey', 'api_key'], ''),
+  modelName: readField(raw, ['modelName', 'model_name'], ''),
+  hotwords: readField(raw, ['hotwords'], []),
+  enablePunctuation: Boolean(readField(raw, ['enablePunctuation', 'enable_punctuation'], true)),
+  enableNormalization: Boolean(readField(raw, ['enableNormalization', 'enable_normalization'], true)),
+  enabled: Boolean(readField(raw, ['enabled'], true)),
+});
+
 const mapWorkflowNode = (raw: AnyRecord): WorkflowNode => ({
  name: readField(raw, ['name'], ''),
  type: readField(raw, ['type'], 'conversation') as 'conversation' | 'tool' | 'human' | 'end',
@@ -246,6 +260,97 @@ export const previewVoice = async (id: string, text: string, speed?: number, api
  return response.audio_url;
 };

+export const fetchASRModels = async (): Promise<ASRModel[]> => {
+  const response = await apiRequest<{ list?: AnyRecord[] } | AnyRecord[]>('/asr');
+  const list = Array.isArray(response) ? response : (response.list || []);
+  return list.map((item) => mapASRModel(item));
+};
+
+export const createASRModel = async (data: Partial<ASRModel>): Promise<ASRModel> => {
+  const payload = {
+    id: data.id || undefined,
+    name: data.name || 'New ASR Model',
+    vendor: data.vendor || 'OpenAI Compatible',
+    language: data.language || 'zh',
+    base_url: data.baseUrl || 'https://api.siliconflow.cn/v1',
+    api_key: data.apiKey || '',
+    model_name: data.modelName || undefined,
+    hotwords: data.hotwords || [],
+    enable_punctuation: data.enablePunctuation ?? true,
+    enable_normalization: data.enableNormalization ?? true,
+    enabled: data.enabled ?? true,
+  };
+  const response = await apiRequest<AnyRecord>('/asr', { method: 'POST', body: payload });
+  return mapASRModel(response);
+};
+
+export const updateASRModel = async (id: string, data: Partial<ASRModel>): Promise<ASRModel> => {
+  const payload = {
+    name: data.name,
+    vendor: data.vendor,
+    language: data.language,
+    base_url: data.baseUrl,
+    api_key: data.apiKey,
+    model_name: data.modelName,
+    hotwords: data.hotwords,
+    enable_punctuation: data.enablePunctuation,
+    enable_normalization: data.enableNormalization,
+    enabled: data.enabled,
+  };
+  const response = await apiRequest<AnyRecord>(`/asr/${id}`, { method: 'PUT', body: payload });
+  return mapASRModel(response);
+};
+
+export const deleteASRModel = async (id: string): Promise<void> => {
+  await apiRequest(`/asr/${id}`, { method: 'DELETE' });
+};
+
+export type ASRPreviewResult = {
+  success: boolean;
+  transcript?: string;
+  language?: string;
+  confidence?: number;
+  latency_ms?: number;
+  message?: string;
+  error?: string;
+};
+
+export const previewASRModel = async (
+  id: string,
+  file: File,
+  options?: { language?: string; apiKey?: string }
+): Promise<ASRPreviewResult> => {
+  const formData = new FormData();
+  formData.append('file', file);
+  if (options?.language) {
+    formData.append('language', options.language);
+  }
+  if (options?.apiKey) {
+    formData.append('api_key', options.apiKey);
+  }
+
+  const base = (import.meta.env.VITE_API_BASE_URL || 'http://127.0.0.1:8100/api').replace(/\/+$/, '');
+  const url = `${base}/asr/${id}/preview`;
+  const response = await fetch(url, {
+    method: 'POST',
+    body: formData,
+  });
+
+  let data: ASRPreviewResult | null = null;
+  try {
+    data = await response.json();
+  } catch {
+    data = null;
+  }
+
+  if (!response.ok) {
+    const detail = (data as AnyRecord | null)?.error || (data as AnyRecord | null)?.detail || `Request failed: ${response.status}`;
+    throw new Error(typeof detail === 'string' ? detail : `Request failed: ${response.status}`);
+  }
+
+  return data || { success: false, error: 'Invalid preview response' };
+};
+
 export const fetchWorkflows = async (): Promise<Workflow[]> => {
  const response = await apiRequest<{ list?: AnyRecord[] } | AnyRecord[]>('/workflows');
  const list = Array.isArray(response) ? response : (response.list || []);
--- a/web/types.ts
+++ b/web/types.ts
@@ -176,8 +176,13 @@ export interface LLMModel {
 export interface ASRModel {
  id: string;
  name: string;
-  vendor: 'OpenAI Compatible';
+  vendor: string;
  language: string;
  baseUrl: string;
  apiKey: string;
+  modelName?: string;
+  hotwords?: string[];
+  enablePunctuation?: boolean;
+  enableNormalization?: boolean;
+  enabled?: boolean;
 }