import React, { useEffect, useRef, useState } from 'react'; import { Search, Filter, Plus, Trash2, Key, Server, Ear, Globe, Languages, Pencil, Mic, Square, Upload } from 'lucide-react'; import { Button, Input, TableHeader, TableRow, TableHead, TableCell, Dialog, Badge } from '../components/UI'; import { ASRModel } from '../types'; import { createASRModel, deleteASRModel, fetchASRModels, previewASRModel, updateASRModel } from '../services/backendApi'; const maskApiKey = (key?: string) => { if (!key) return '********'; if (key.length < 8) return '********'; return `${key.slice(0, 3)}****${key.slice(-4)}`; }; const parseHotwords = (value: string): string[] => { return value .split(/[\n,]/) .map((item) => item.trim()) .filter(Boolean); }; const toHotwordsValue = (hotwords?: string[]): string => (hotwords || []).join(', '); const createAudioContext = (): AudioContext => { const Ctx = (window as any).AudioContext || (window as any).webkitAudioContext; return new Ctx(); }; const encodeWav = (audioBuffer: AudioBuffer): Blob => { const numberOfChannels = audioBuffer.numberOfChannels; const sampleRate = audioBuffer.sampleRate; const format = 1; const bitDepth = 16; const channelData = Array.from({ length: numberOfChannels }, (_, ch) => audioBuffer.getChannelData(ch)); const sampleCount = audioBuffer.length; const blockAlign = numberOfChannels * (bitDepth / 8); const byteRate = sampleRate * blockAlign; const dataSize = sampleCount * blockAlign; const buffer = new ArrayBuffer(44 + dataSize); const view = new DataView(buffer); const writeString = (offset: number, value: string) => { for (let i = 0; i < value.length; i += 1) { view.setUint8(offset + i, value.charCodeAt(i)); } }; writeString(0, 'RIFF'); view.setUint32(4, 36 + dataSize, true); writeString(8, 'WAVE'); writeString(12, 'fmt '); view.setUint32(16, 16, true); view.setUint16(20, format, true); view.setUint16(22, numberOfChannels, true); view.setUint32(24, sampleRate, true); view.setUint32(28, byteRate, true); view.setUint16(32, blockAlign, true); view.setUint16(34, bitDepth, true); writeString(36, 'data'); view.setUint32(40, dataSize, true); let offset = 44; for (let i = 0; i < sampleCount; i += 1) { for (let ch = 0; ch < numberOfChannels; ch += 1) { const sample = Math.max(-1, Math.min(1, channelData[ch][i])); const pcm = sample < 0 ? sample * 0x8000 : sample * 0x7fff; view.setInt16(offset, pcm, true); offset += 2; } } return new Blob([buffer], { type: 'audio/wav' }); }; const convertRecordedBlobToWav = async (blob: Blob): Promise => { const audioContext = createAudioContext(); try { const inputArrayBuffer = await blob.arrayBuffer(); const decoded = await audioContext.decodeAudioData(inputArrayBuffer.slice(0)); const wavBlob = encodeWav(decoded); return new File([wavBlob], `mic-preview-${Date.now()}.wav`, { type: 'audio/wav' }); } finally { await audioContext.close(); } }; export const ASRLibraryPage: React.FC = () => { const [models, setModels] = useState([]); const [searchTerm, setSearchTerm] = useState(''); const [vendorFilter, setVendorFilter] = useState('OpenAI Compatible'); const [langFilter, setLangFilter] = useState('all'); const [isAddModalOpen, setIsAddModalOpen] = useState(false); const [editingModel, setEditingModel] = useState(null); const [previewingModel, setPreviewingModel] = useState(null); const [isLoading, setIsLoading] = useState(true); const loadModels = async () => { setIsLoading(true); try { setModels(await fetchASRModels()); } catch (error) { console.error(error); setModels([]); } finally { setIsLoading(false); } }; useEffect(() => { loadModels(); }, []); const filteredModels = models.filter((m) => { const q = searchTerm.toLowerCase(); const matchesSearch = m.name.toLowerCase().includes(q) || (m.modelName || '').toLowerCase().includes(q); const matchesVendor = m.vendor === vendorFilter; const matchesLang = langFilter === 'all' || m.language === langFilter || (langFilter !== 'all' && m.language === 'Multi-lingual'); return matchesSearch && matchesVendor && matchesLang; }); const handleCreate = async (data: Partial) => { const created = await createASRModel(data); setModels((prev) => [created, ...prev]); setIsAddModalOpen(false); }; const handleUpdate = async (id: string, data: Partial) => { const updated = await updateASRModel(id, data); setModels((prev) => prev.map((m) => (m.id === id ? updated : m))); setEditingModel(null); }; const handleDelete = async (id: string) => { if (!confirm('确认删除该语音识别模型吗？')) return; await deleteASRModel(id); setModels((prev) => prev.filter((m) => m.id !== id)); }; return (

语音识别

setSearchTerm(e.target.value)} />

模型名称厂商语言模型标识 Base URL API Key 操作 {!isLoading && filteredModels.map((model) => (

{model.name} {model.hotwords && model.hotwords.length > 0 && ( 热词: {model.hotwords.join(', ')} )}

{model.vendor} {model.language} {model.modelName || '-'} {model.baseUrl} {maskApiKey(model.apiKey)} ))} {!isLoading && filteredModels.length === 0 && ( 暂无语音识别模型 )} {isLoading && ( 加载中... )}

setIsAddModalOpen(false)} onSubmit={handleCreate} /> setEditingModel(null)} onSubmit={(data) => handleUpdate(editingModel!.id, data)} initialModel={editingModel || undefined} /> setPreviewingModel(null)} model={previewingModel} />

); }; const ASRModelModal: React.FC<{ isOpen: boolean; onClose: () => void; onSubmit: (model: Partial) => Promise; initialModel?: ASRModel; }> = ({ isOpen, onClose, onSubmit, initialModel }) => { const [name, setName] = useState(''); const [vendor, setVendor] = useState('OpenAI Compatible'); const [language, setLanguage] = useState('zh'); const [modelName, setModelName] = useState('FunAudioLLM/SenseVoiceSmall'); const [baseUrl, setBaseUrl] = useState('https://api.siliconflow.cn/v1'); const [apiKey, setApiKey] = useState(''); const [hotwords, setHotwords] = useState(''); const [enablePunctuation, setEnablePunctuation] = useState(true); const [enableNormalization, setEnableNormalization] = useState(true); const [enabled, setEnabled] = useState(true); const [saving, setSaving] = useState(false); useEffect(() => { if (!isOpen) return; if (initialModel) { setName(initialModel.name || ''); setVendor(initialModel.vendor || 'OpenAI Compatible'); setLanguage(initialModel.language || 'zh'); setModelName(initialModel.modelName || 'FunAudioLLM/SenseVoiceSmall'); setBaseUrl(initialModel.baseUrl || 'https://api.siliconflow.cn/v1'); setApiKey(initialModel.apiKey || ''); setHotwords(toHotwordsValue(initialModel.hotwords)); setEnablePunctuation(initialModel.enablePunctuation ?? true); setEnableNormalization(initialModel.enableNormalization ?? true); setEnabled(initialModel.enabled ?? true); return; } setName(''); setVendor('OpenAI Compatible'); setLanguage('zh'); setModelName('FunAudioLLM/SenseVoiceSmall'); setBaseUrl('https://api.siliconflow.cn/v1'); setApiKey(''); setHotwords(''); setEnablePunctuation(true); setEnableNormalization(true); setEnabled(true); }, [initialModel, isOpen]); const handleSubmit = async () => { if (!name.trim()) { alert('请填写模型名称'); return; } if (!baseUrl.trim()) { alert('请填写 Base URL'); return; } if (!apiKey.trim()) { alert('请填写 API Key'); return; } try { setSaving(true); await onSubmit({ name: name.trim(), vendor: vendor.trim(), language, modelName: modelName.trim(), baseUrl: baseUrl.trim(), apiKey: apiKey.trim(), hotwords: parseHotwords(hotwords), enablePunctuation, enableNormalization, enabled, }); } catch (error: any) { alert(error?.message || '保存失败'); } finally { setSaving(false); } }; return ( ); }; const ASRPreviewModal: React.FC<{ isOpen: boolean; onClose: () => void; model: ASRModel | null; }> = ({ isOpen, onClose, model }) => { const [selectedFile, setSelectedFile] = useState(null); const [isDragging, setIsDragging] = useState(false); const [isTranscribing, setIsTranscribing] = useState(false); const [transcript, setTranscript] = useState(''); const [latency, setLatency] = useState(null); const [confidence, setConfidence] = useState(null); const [language, setLanguage] = useState(''); const [isRecording, setIsRecording] = useState(false); const [isProcessingRecording, setIsProcessingRecording] = useState(false); const [inputLevel, setInputLevel] = useState(0); const [isSpeaking, setIsSpeaking] = useState(false); const inputRef = useRef(null); const mediaRecorderRef = useRef(null); const streamRef = useRef(null); const chunksRef = useRef([]); const analyserRef = useRef(null); const visualAudioContextRef = useRef(null); const rafRef = useRef(null); useEffect(() => { if (!isOpen) return; setSelectedFile(null); setTranscript(''); setLatency(null); setConfidence(null); setLanguage(model?.language || ''); setIsTranscribing(false); setIsRecording(false); setIsProcessingRecording(false); setInputLevel(0); setIsSpeaking(false); }, [isOpen, model]); const stopVisualization = () => { if (rafRef.current) { cancelAnimationFrame(rafRef.current); rafRef.current = null; } analyserRef.current = null; if (visualAudioContextRef.current) { visualAudioContextRef.current.close().catch(() => undefined); visualAudioContextRef.current = null; } setInputLevel(0); setIsSpeaking(false); }; const stopCurrentStream = () => { if (streamRef.current) { streamRef.current.getTracks().forEach((track) => track.stop()); streamRef.current = null; } }; useEffect(() => { return () => { stopVisualization(); stopCurrentStream(); }; }, []); useEffect(() => { if (!isOpen) { stopVisualization(); stopCurrentStream(); } }, [isOpen]); const pickFile = (file: File | null) => { if (!file) return; if (!file.type.startsWith('audio/')) { alert('仅支持音频文件'); return; } setSelectedFile(file); }; const handleDrop = (event: React.DragEvent) => { event.preventDefault(); setIsDragging(false); const file = event.dataTransfer.files?.[0] || null; pickFile(file); }; const startRecording = async () => { if (!navigator.mediaDevices?.getUserMedia) { alert('当前浏览器不支持麦克风录音'); return; } try { const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); const mimeType = MediaRecorder.isTypeSupported('audio/webm;codecs=opus') ? 'audio/webm;codecs=opus' : (MediaRecorder.isTypeSupported('audio/webm') ? 'audio/webm' : ''); const recorder = mimeType ? new MediaRecorder(stream, { mimeType }) : new MediaRecorder(stream); chunksRef.current = []; streamRef.current = stream; mediaRecorderRef.current = recorder; const visualizationContext = createAudioContext(); const source = visualizationContext.createMediaStreamSource(stream); const analyser = visualizationContext.createAnalyser(); analyser.fftSize = 1024; source.connect(analyser); analyserRef.current = analyser; visualAudioContextRef.current = visualizationContext; const timeData = new Uint8Array(analyser.frequencyBinCount); const tick = () => { if (!analyserRef.current) return; analyserRef.current.getByteTimeDomainData(timeData); let sumSquares = 0; for (let i = 0; i < timeData.length; i += 1) { const normalized = (timeData[i] - 128) / 128; sumSquares += normalized * normalized; } const rms = Math.sqrt(sumSquares / timeData.length); const level = Math.min(1, rms * 4); setInputLevel(level); setIsSpeaking(level > 0.08); rafRef.current = requestAnimationFrame(tick); }; tick(); recorder.ondataavailable = (event) => { if (event.data.size > 0) { chunksRef.current.push(event.data); } }; recorder.onstop = async () => { const blob = new Blob(chunksRef.current, { type: recorder.mimeType || 'audio/webm' }); setIsProcessingRecording(true); try { let outputFile: File; try { outputFile = await convertRecordedBlobToWav(blob); } catch { outputFile = new File([blob], `mic-preview-${Date.now()}.webm`, { type: blob.type || 'audio/webm' }); } setSelectedFile(outputFile); } finally { setIsProcessingRecording(false); stopVisualization(); stopCurrentStream(); } }; recorder.start(250); setIsRecording(true); } catch (error: any) { alert(error?.message || '无法访问麦克风'); } }; const stopRecording = () => { if (!mediaRecorderRef.current) return; mediaRecorderRef.current.stop(); setIsRecording(false); }; const runPreview = async () => { if (!model?.id) return; if (!selectedFile) { alert('请先上传或录制音频'); return; } try { setIsTranscribing(true); const result = await previewASRModel(model.id, selectedFile, { language: language || undefined }); setTranscript(result.transcript || result.message || '无识别内容'); setLatency(result.latency_ms ?? null); setConfidence(result.confidence ?? null); } catch (error: any) { alert(error?.message || '识别失败'); } finally { setIsTranscribing(false); } }; return (