708 lines
28 KiB
TypeScript
708 lines
28 KiB
TypeScript
import React, { useEffect, useRef, useState } from 'react';
|
||
import { Search, Filter, Plus, Trash2, Key, Server, Ear, Globe, Languages, Pencil, Mic, Square, Upload } from 'lucide-react';
|
||
import { Button, Input, TableHeader, TableRow, TableHead, TableCell, Dialog, Badge } from '../components/UI';
|
||
import { ASRModel } from '../types';
|
||
import { createASRModel, deleteASRModel, fetchASRModels, previewASRModel, updateASRModel } from '../services/backendApi';
|
||
|
||
const maskApiKey = (key?: string) => {
|
||
if (!key) return '********';
|
||
if (key.length < 8) return '********';
|
||
return `${key.slice(0, 3)}****${key.slice(-4)}`;
|
||
};
|
||
|
||
const parseHotwords = (value: string): string[] => {
|
||
return value
|
||
.split(/[\n,]/)
|
||
.map((item) => item.trim())
|
||
.filter(Boolean);
|
||
};
|
||
|
||
const toHotwordsValue = (hotwords?: string[]): string => (hotwords || []).join(', ');
|
||
|
||
const createAudioContext = (): AudioContext => {
|
||
const Ctx = (window as any).AudioContext || (window as any).webkitAudioContext;
|
||
return new Ctx();
|
||
};
|
||
|
||
const encodeWav = (audioBuffer: AudioBuffer): Blob => {
|
||
const numberOfChannels = audioBuffer.numberOfChannels;
|
||
const sampleRate = audioBuffer.sampleRate;
|
||
const format = 1;
|
||
const bitDepth = 16;
|
||
const channelData = Array.from({ length: numberOfChannels }, (_, ch) => audioBuffer.getChannelData(ch));
|
||
const sampleCount = audioBuffer.length;
|
||
const blockAlign = numberOfChannels * (bitDepth / 8);
|
||
const byteRate = sampleRate * blockAlign;
|
||
const dataSize = sampleCount * blockAlign;
|
||
const buffer = new ArrayBuffer(44 + dataSize);
|
||
const view = new DataView(buffer);
|
||
|
||
const writeString = (offset: number, value: string) => {
|
||
for (let i = 0; i < value.length; i += 1) {
|
||
view.setUint8(offset + i, value.charCodeAt(i));
|
||
}
|
||
};
|
||
|
||
writeString(0, 'RIFF');
|
||
view.setUint32(4, 36 + dataSize, true);
|
||
writeString(8, 'WAVE');
|
||
writeString(12, 'fmt ');
|
||
view.setUint32(16, 16, true);
|
||
view.setUint16(20, format, true);
|
||
view.setUint16(22, numberOfChannels, true);
|
||
view.setUint32(24, sampleRate, true);
|
||
view.setUint32(28, byteRate, true);
|
||
view.setUint16(32, blockAlign, true);
|
||
view.setUint16(34, bitDepth, true);
|
||
writeString(36, 'data');
|
||
view.setUint32(40, dataSize, true);
|
||
|
||
let offset = 44;
|
||
for (let i = 0; i < sampleCount; i += 1) {
|
||
for (let ch = 0; ch < numberOfChannels; ch += 1) {
|
||
const sample = Math.max(-1, Math.min(1, channelData[ch][i]));
|
||
const pcm = sample < 0 ? sample * 0x8000 : sample * 0x7fff;
|
||
view.setInt16(offset, pcm, true);
|
||
offset += 2;
|
||
}
|
||
}
|
||
|
||
return new Blob([buffer], { type: 'audio/wav' });
|
||
};
|
||
|
||
const convertRecordedBlobToWav = async (blob: Blob): Promise<File> => {
|
||
const audioContext = createAudioContext();
|
||
try {
|
||
const inputArrayBuffer = await blob.arrayBuffer();
|
||
const decoded = await audioContext.decodeAudioData(inputArrayBuffer.slice(0));
|
||
const wavBlob = encodeWav(decoded);
|
||
return new File([wavBlob], `mic-preview-${Date.now()}.wav`, { type: 'audio/wav' });
|
||
} finally {
|
||
await audioContext.close();
|
||
}
|
||
};
|
||
|
||
export const ASRLibraryPage: React.FC = () => {
|
||
const [models, setModels] = useState<ASRModel[]>([]);
|
||
const [searchTerm, setSearchTerm] = useState('');
|
||
const [vendorFilter, setVendorFilter] = useState<string>('OpenAI Compatible');
|
||
const [langFilter, setLangFilter] = useState<string>('all');
|
||
const [isAddModalOpen, setIsAddModalOpen] = useState(false);
|
||
const [editingModel, setEditingModel] = useState<ASRModel | null>(null);
|
||
const [previewingModel, setPreviewingModel] = useState<ASRModel | null>(null);
|
||
const [isLoading, setIsLoading] = useState(true);
|
||
|
||
const loadModels = async () => {
|
||
setIsLoading(true);
|
||
try {
|
||
setModels(await fetchASRModels());
|
||
} catch (error) {
|
||
console.error(error);
|
||
setModels([]);
|
||
} finally {
|
||
setIsLoading(false);
|
||
}
|
||
};
|
||
|
||
useEffect(() => {
|
||
loadModels();
|
||
}, []);
|
||
|
||
const filteredModels = models.filter((m) => {
|
||
const q = searchTerm.toLowerCase();
|
||
const matchesSearch = m.name.toLowerCase().includes(q) || (m.modelName || '').toLowerCase().includes(q);
|
||
const matchesVendor = m.vendor === vendorFilter;
|
||
const matchesLang = langFilter === 'all' || m.language === langFilter || (langFilter !== 'all' && m.language === 'Multi-lingual');
|
||
return matchesSearch && matchesVendor && matchesLang;
|
||
});
|
||
|
||
const handleCreate = async (data: Partial<ASRModel>) => {
|
||
const created = await createASRModel(data);
|
||
setModels((prev) => [created, ...prev]);
|
||
setIsAddModalOpen(false);
|
||
};
|
||
|
||
const handleUpdate = async (id: string, data: Partial<ASRModel>) => {
|
||
const updated = await updateASRModel(id, data);
|
||
setModels((prev) => prev.map((m) => (m.id === id ? updated : m)));
|
||
setEditingModel(null);
|
||
};
|
||
|
||
const handleDelete = async (id: string) => {
|
||
if (!confirm('确认删除该语音识别模型吗?')) return;
|
||
await deleteASRModel(id);
|
||
setModels((prev) => prev.filter((m) => m.id !== id));
|
||
};
|
||
|
||
return (
|
||
<div className="space-y-6 animate-in fade-in py-4 pb-10">
|
||
<div className="flex items-center justify-between">
|
||
<h1 className="text-2xl font-bold tracking-tight text-white">语音识别</h1>
|
||
<Button onClick={() => setIsAddModalOpen(true)} className="shadow-[0_0_15px_rgba(6,182,212,0.4)]">
|
||
<Plus className="mr-2 h-4 w-4" /> 添加模型
|
||
</Button>
|
||
</div>
|
||
|
||
<div className="grid grid-cols-1 md:grid-cols-4 gap-4 bg-card/50 p-4 rounded-lg border border-white/5 shadow-sm">
|
||
<div className="relative col-span-1 md:col-span-2">
|
||
<Search className="absolute left-2.5 top-2.5 h-4 w-4 text-muted-foreground" />
|
||
<Input
|
||
placeholder="搜索模型名称/Model Name..."
|
||
className="pl-9 border-0 bg-white/5"
|
||
value={searchTerm}
|
||
onChange={(e) => setSearchTerm(e.target.value)}
|
||
/>
|
||
</div>
|
||
<div className="flex items-center space-x-2">
|
||
<Filter className="h-4 w-4 text-muted-foreground" />
|
||
<select
|
||
className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 [&>option]:bg-card text-foreground"
|
||
value={vendorFilter}
|
||
onChange={(e) => setVendorFilter(e.target.value)}
|
||
>
|
||
<option value="OpenAI Compatible">OpenAI Compatible</option>
|
||
</select>
|
||
</div>
|
||
<div className="flex items-center space-x-2">
|
||
<select
|
||
className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 [&>option]:bg-card text-foreground"
|
||
value={langFilter}
|
||
onChange={(e) => setLangFilter(e.target.value)}
|
||
>
|
||
<option value="all">所有语言</option>
|
||
<option value="zh">中文 (Chinese)</option>
|
||
<option value="en">英文 (English)</option>
|
||
<option value="Multi-lingual">多语言 (Multi-lingual)</option>
|
||
</select>
|
||
</div>
|
||
</div>
|
||
|
||
<div className="rounded-md border border-white/5 bg-card/40 backdrop-blur-md overflow-hidden">
|
||
<table className="w-full text-sm">
|
||
<TableHeader>
|
||
<TableRow>
|
||
<TableHead>模型名称</TableHead>
|
||
<TableHead>厂商</TableHead>
|
||
<TableHead>语言</TableHead>
|
||
<TableHead>模型标识</TableHead>
|
||
<TableHead>Base URL</TableHead>
|
||
<TableHead>API Key</TableHead>
|
||
<TableHead className="text-right">操作</TableHead>
|
||
</TableRow>
|
||
</TableHeader>
|
||
<tbody>
|
||
{!isLoading && filteredModels.map((model) => (
|
||
<TableRow key={model.id}>
|
||
<TableCell className="font-medium text-white">
|
||
<div className="flex flex-col">
|
||
<span className="flex items-center">
|
||
<Ear className="w-4 h-4 mr-2 text-primary" />
|
||
{model.name}
|
||
</span>
|
||
{model.hotwords && model.hotwords.length > 0 && (
|
||
<span className="text-xs text-muted-foreground">热词: {model.hotwords.join(', ')}</span>
|
||
)}
|
||
</div>
|
||
</TableCell>
|
||
<TableCell><Badge variant="outline">{model.vendor}</Badge></TableCell>
|
||
<TableCell>{model.language}</TableCell>
|
||
<TableCell className="font-mono text-xs text-muted-foreground">{model.modelName || '-'}</TableCell>
|
||
<TableCell className="font-mono text-xs text-muted-foreground max-w-[220px] truncate">{model.baseUrl}</TableCell>
|
||
<TableCell className="font-mono text-xs text-muted-foreground">{maskApiKey(model.apiKey)}</TableCell>
|
||
<TableCell className="text-right">
|
||
<Button variant="ghost" size="icon" onClick={() => setPreviewingModel(model)}>
|
||
<Ear className="h-4 w-4" />
|
||
</Button>
|
||
<Button variant="ghost" size="icon" onClick={() => setEditingModel(model)}>
|
||
<Pencil className="h-4 w-4" />
|
||
</Button>
|
||
<Button variant="ghost" size="icon" onClick={() => handleDelete(model.id)} className="text-red-400">
|
||
<Trash2 className="h-4 w-4" />
|
||
</Button>
|
||
</TableCell>
|
||
</TableRow>
|
||
))}
|
||
{!isLoading && filteredModels.length === 0 && (
|
||
<TableRow>
|
||
<TableCell colSpan={7} className="text-center py-8 text-muted-foreground">暂无语音识别模型</TableCell>
|
||
</TableRow>
|
||
)}
|
||
{isLoading && (
|
||
<TableRow>
|
||
<TableCell colSpan={7} className="text-center py-8 text-muted-foreground">加载中...</TableCell>
|
||
</TableRow>
|
||
)}
|
||
</tbody>
|
||
</table>
|
||
</div>
|
||
|
||
<ASRModelModal
|
||
isOpen={isAddModalOpen}
|
||
onClose={() => setIsAddModalOpen(false)}
|
||
onSubmit={handleCreate}
|
||
/>
|
||
|
||
<ASRModelModal
|
||
isOpen={!!editingModel}
|
||
onClose={() => setEditingModel(null)}
|
||
onSubmit={(data) => handleUpdate(editingModel!.id, data)}
|
||
initialModel={editingModel || undefined}
|
||
/>
|
||
|
||
<ASRPreviewModal
|
||
isOpen={!!previewingModel}
|
||
onClose={() => setPreviewingModel(null)}
|
||
model={previewingModel}
|
||
/>
|
||
</div>
|
||
);
|
||
};
|
||
|
||
const ASRModelModal: React.FC<{
|
||
isOpen: boolean;
|
||
onClose: () => void;
|
||
onSubmit: (model: Partial<ASRModel>) => Promise<void>;
|
||
initialModel?: ASRModel;
|
||
}> = ({ isOpen, onClose, onSubmit, initialModel }) => {
|
||
const [name, setName] = useState('');
|
||
const [vendor, setVendor] = useState('OpenAI Compatible');
|
||
const [language, setLanguage] = useState('zh');
|
||
const [modelName, setModelName] = useState('FunAudioLLM/SenseVoiceSmall');
|
||
const [baseUrl, setBaseUrl] = useState('https://api.siliconflow.cn/v1');
|
||
const [apiKey, setApiKey] = useState('');
|
||
const [hotwords, setHotwords] = useState('');
|
||
const [enablePunctuation, setEnablePunctuation] = useState(true);
|
||
const [enableNormalization, setEnableNormalization] = useState(true);
|
||
const [enabled, setEnabled] = useState(true);
|
||
const [saving, setSaving] = useState(false);
|
||
|
||
useEffect(() => {
|
||
if (!isOpen) return;
|
||
if (initialModel) {
|
||
setName(initialModel.name || '');
|
||
setVendor(initialModel.vendor || 'OpenAI Compatible');
|
||
setLanguage(initialModel.language || 'zh');
|
||
setModelName(initialModel.modelName || 'FunAudioLLM/SenseVoiceSmall');
|
||
setBaseUrl(initialModel.baseUrl || 'https://api.siliconflow.cn/v1');
|
||
setApiKey(initialModel.apiKey || '');
|
||
setHotwords(toHotwordsValue(initialModel.hotwords));
|
||
setEnablePunctuation(initialModel.enablePunctuation ?? true);
|
||
setEnableNormalization(initialModel.enableNormalization ?? true);
|
||
setEnabled(initialModel.enabled ?? true);
|
||
return;
|
||
}
|
||
|
||
setName('');
|
||
setVendor('OpenAI Compatible');
|
||
setLanguage('zh');
|
||
setModelName('FunAudioLLM/SenseVoiceSmall');
|
||
setBaseUrl('https://api.siliconflow.cn/v1');
|
||
setApiKey('');
|
||
setHotwords('');
|
||
setEnablePunctuation(true);
|
||
setEnableNormalization(true);
|
||
setEnabled(true);
|
||
}, [initialModel, isOpen]);
|
||
|
||
const handleSubmit = async () => {
|
||
if (!name.trim()) {
|
||
alert('请填写模型名称');
|
||
return;
|
||
}
|
||
if (!baseUrl.trim()) {
|
||
alert('请填写 Base URL');
|
||
return;
|
||
}
|
||
if (!apiKey.trim()) {
|
||
alert('请填写 API Key');
|
||
return;
|
||
}
|
||
|
||
try {
|
||
setSaving(true);
|
||
await onSubmit({
|
||
name: name.trim(),
|
||
vendor: vendor.trim(),
|
||
language,
|
||
modelName: modelName.trim(),
|
||
baseUrl: baseUrl.trim(),
|
||
apiKey: apiKey.trim(),
|
||
hotwords: parseHotwords(hotwords),
|
||
enablePunctuation,
|
||
enableNormalization,
|
||
enabled,
|
||
});
|
||
} catch (error: any) {
|
||
alert(error?.message || '保存失败');
|
||
} finally {
|
||
setSaving(false);
|
||
}
|
||
};
|
||
|
||
return (
|
||
<Dialog
|
||
isOpen={isOpen}
|
||
onClose={onClose}
|
||
title={initialModel ? '编辑语音识别模型' : '添加语音识别模型'}
|
||
footer={
|
||
<>
|
||
<Button variant="ghost" onClick={onClose}>取消</Button>
|
||
<Button onClick={handleSubmit} disabled={saving}>{saving ? '保存中...' : (initialModel ? '保存修改' : '确认添加')}</Button>
|
||
</>
|
||
}
|
||
>
|
||
<div className="space-y-4 max-h-[75vh] overflow-y-auto px-1 custom-scrollbar">
|
||
<div className="space-y-1.5">
|
||
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">模型名称</label>
|
||
<Input value={name} onChange={(e) => setName(e.target.value)} placeholder="例如: SenseVoice CN" />
|
||
</div>
|
||
|
||
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
|
||
<div className="space-y-1.5">
|
||
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">接口类型</label>
|
||
<select
|
||
className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-foreground [&>option]:bg-card"
|
||
value={vendor}
|
||
onChange={(e) => setVendor(e.target.value)}
|
||
>
|
||
<option value="OpenAI Compatible">OpenAI Compatible</option>
|
||
</select>
|
||
</div>
|
||
<div className="space-y-1.5">
|
||
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block flex items-center"><Languages className="w-3 h-3 mr-1.5" />语言</label>
|
||
<select
|
||
className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-foreground [&>option]:bg-card"
|
||
value={language}
|
||
onChange={(e) => setLanguage(e.target.value)}
|
||
>
|
||
<option value="zh">中文 (Chinese)</option>
|
||
<option value="en">英文 (English)</option>
|
||
<option value="Multi-lingual">多语言 (Multi-lingual)</option>
|
||
</select>
|
||
</div>
|
||
</div>
|
||
|
||
<div className="space-y-1.5">
|
||
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">Model Name</label>
|
||
<Input value={modelName} onChange={(e) => setModelName(e.target.value)} placeholder="FunAudioLLM/SenseVoiceSmall" />
|
||
</div>
|
||
|
||
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
|
||
<div className="space-y-1.5">
|
||
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block flex items-center"><Server className="w-3 h-3 mr-1.5" />Base URL</label>
|
||
<Input value={baseUrl} onChange={(e) => setBaseUrl(e.target.value)} placeholder="https://api.siliconflow.cn/v1" className="font-mono text-xs" />
|
||
</div>
|
||
<div className="space-y-1.5">
|
||
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block flex items-center"><Key className="w-3 h-3 mr-1.5" />API Key</label>
|
||
<Input value={apiKey} onChange={(e) => setApiKey(e.target.value)} type="password" placeholder="sk-..." className="font-mono text-xs" />
|
||
</div>
|
||
</div>
|
||
|
||
<div className="space-y-1.5">
|
||
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">热词 (comma separated)</label>
|
||
<Input value={hotwords} onChange={(e) => setHotwords(e.target.value)} placeholder="品牌名, 人名, 专有词" />
|
||
</div>
|
||
|
||
<div className="grid grid-cols-1 md:grid-cols-3 gap-2">
|
||
<label className="flex items-center space-x-2 text-xs text-muted-foreground">
|
||
<input type="checkbox" checked={enablePunctuation} onChange={(e) => setEnablePunctuation(e.target.checked)} />
|
||
<span>标点增强</span>
|
||
</label>
|
||
<label className="flex items-center space-x-2 text-xs text-muted-foreground">
|
||
<input type="checkbox" checked={enableNormalization} onChange={(e) => setEnableNormalization(e.target.checked)} />
|
||
<span>文本归一化</span>
|
||
</label>
|
||
<label className="flex items-center space-x-2 text-xs text-muted-foreground">
|
||
<input type="checkbox" checked={enabled} onChange={(e) => setEnabled(e.target.checked)} />
|
||
<span>启用</span>
|
||
</label>
|
||
</div>
|
||
</div>
|
||
</Dialog>
|
||
);
|
||
};
|
||
|
||
const ASRPreviewModal: React.FC<{
|
||
isOpen: boolean;
|
||
onClose: () => void;
|
||
model: ASRModel | null;
|
||
}> = ({ isOpen, onClose, model }) => {
|
||
const [selectedFile, setSelectedFile] = useState<File | null>(null);
|
||
const [isDragging, setIsDragging] = useState(false);
|
||
const [isTranscribing, setIsTranscribing] = useState(false);
|
||
const [transcript, setTranscript] = useState('');
|
||
const [latency, setLatency] = useState<number | null>(null);
|
||
const [confidence, setConfidence] = useState<number | null>(null);
|
||
const [language, setLanguage] = useState('');
|
||
const [isRecording, setIsRecording] = useState(false);
|
||
const [isProcessingRecording, setIsProcessingRecording] = useState(false);
|
||
const [inputLevel, setInputLevel] = useState(0);
|
||
const [isSpeaking, setIsSpeaking] = useState(false);
|
||
|
||
const inputRef = useRef<HTMLInputElement>(null);
|
||
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
||
const streamRef = useRef<MediaStream | null>(null);
|
||
const chunksRef = useRef<Blob[]>([]);
|
||
const analyserRef = useRef<AnalyserNode | null>(null);
|
||
const visualAudioContextRef = useRef<AudioContext | null>(null);
|
||
const rafRef = useRef<number | null>(null);
|
||
|
||
useEffect(() => {
|
||
if (!isOpen) return;
|
||
setSelectedFile(null);
|
||
setTranscript('');
|
||
setLatency(null);
|
||
setConfidence(null);
|
||
setLanguage(model?.language || '');
|
||
setIsTranscribing(false);
|
||
setIsRecording(false);
|
||
setIsProcessingRecording(false);
|
||
setInputLevel(0);
|
||
setIsSpeaking(false);
|
||
}, [isOpen, model]);
|
||
|
||
const stopVisualization = () => {
|
||
if (rafRef.current) {
|
||
cancelAnimationFrame(rafRef.current);
|
||
rafRef.current = null;
|
||
}
|
||
analyserRef.current = null;
|
||
if (visualAudioContextRef.current) {
|
||
visualAudioContextRef.current.close().catch(() => undefined);
|
||
visualAudioContextRef.current = null;
|
||
}
|
||
setInputLevel(0);
|
||
setIsSpeaking(false);
|
||
};
|
||
|
||
const stopCurrentStream = () => {
|
||
if (streamRef.current) {
|
||
streamRef.current.getTracks().forEach((track) => track.stop());
|
||
streamRef.current = null;
|
||
}
|
||
};
|
||
|
||
useEffect(() => {
|
||
return () => {
|
||
stopVisualization();
|
||
stopCurrentStream();
|
||
};
|
||
}, []);
|
||
|
||
useEffect(() => {
|
||
if (!isOpen) {
|
||
stopVisualization();
|
||
stopCurrentStream();
|
||
}
|
||
}, [isOpen]);
|
||
|
||
const pickFile = (file: File | null) => {
|
||
if (!file) return;
|
||
if (!file.type.startsWith('audio/')) {
|
||
alert('仅支持音频文件');
|
||
return;
|
||
}
|
||
setSelectedFile(file);
|
||
};
|
||
|
||
const handleDrop = (event: React.DragEvent<HTMLDivElement>) => {
|
||
event.preventDefault();
|
||
setIsDragging(false);
|
||
const file = event.dataTransfer.files?.[0] || null;
|
||
pickFile(file);
|
||
};
|
||
|
||
const startRecording = async () => {
|
||
if (!navigator.mediaDevices?.getUserMedia) {
|
||
alert('当前浏览器不支持麦克风录音');
|
||
return;
|
||
}
|
||
|
||
try {
|
||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||
const mimeType = MediaRecorder.isTypeSupported('audio/webm;codecs=opus')
|
||
? 'audio/webm;codecs=opus'
|
||
: (MediaRecorder.isTypeSupported('audio/webm') ? 'audio/webm' : '');
|
||
const recorder = mimeType ? new MediaRecorder(stream, { mimeType }) : new MediaRecorder(stream);
|
||
|
||
chunksRef.current = [];
|
||
streamRef.current = stream;
|
||
mediaRecorderRef.current = recorder;
|
||
|
||
const visualizationContext = createAudioContext();
|
||
const source = visualizationContext.createMediaStreamSource(stream);
|
||
const analyser = visualizationContext.createAnalyser();
|
||
analyser.fftSize = 1024;
|
||
source.connect(analyser);
|
||
analyserRef.current = analyser;
|
||
visualAudioContextRef.current = visualizationContext;
|
||
|
||
const timeData = new Uint8Array(analyser.frequencyBinCount);
|
||
const tick = () => {
|
||
if (!analyserRef.current) return;
|
||
analyserRef.current.getByteTimeDomainData(timeData);
|
||
let sumSquares = 0;
|
||
for (let i = 0; i < timeData.length; i += 1) {
|
||
const normalized = (timeData[i] - 128) / 128;
|
||
sumSquares += normalized * normalized;
|
||
}
|
||
const rms = Math.sqrt(sumSquares / timeData.length);
|
||
const level = Math.min(1, rms * 4);
|
||
setInputLevel(level);
|
||
setIsSpeaking(level > 0.08);
|
||
rafRef.current = requestAnimationFrame(tick);
|
||
};
|
||
tick();
|
||
|
||
recorder.ondataavailable = (event) => {
|
||
if (event.data.size > 0) {
|
||
chunksRef.current.push(event.data);
|
||
}
|
||
};
|
||
|
||
recorder.onstop = async () => {
|
||
const blob = new Blob(chunksRef.current, { type: recorder.mimeType || 'audio/webm' });
|
||
setIsProcessingRecording(true);
|
||
try {
|
||
let outputFile: File;
|
||
try {
|
||
outputFile = await convertRecordedBlobToWav(blob);
|
||
} catch {
|
||
outputFile = new File([blob], `mic-preview-${Date.now()}.webm`, { type: blob.type || 'audio/webm' });
|
||
}
|
||
setSelectedFile(outputFile);
|
||
} finally {
|
||
setIsProcessingRecording(false);
|
||
stopVisualization();
|
||
stopCurrentStream();
|
||
}
|
||
};
|
||
|
||
recorder.start(250);
|
||
setIsRecording(true);
|
||
} catch (error: any) {
|
||
alert(error?.message || '无法访问麦克风');
|
||
}
|
||
};
|
||
|
||
const stopRecording = () => {
|
||
if (!mediaRecorderRef.current) return;
|
||
mediaRecorderRef.current.stop();
|
||
setIsRecording(false);
|
||
};
|
||
|
||
const runPreview = async () => {
|
||
if (!model?.id) return;
|
||
if (!selectedFile) {
|
||
alert('请先上传或录制音频');
|
||
return;
|
||
}
|
||
|
||
try {
|
||
setIsTranscribing(true);
|
||
const result = await previewASRModel(model.id, selectedFile, { language: language || undefined });
|
||
setTranscript(result.transcript || result.message || '无识别内容');
|
||
setLatency(result.latency_ms ?? null);
|
||
setConfidence(result.confidence ?? null);
|
||
} catch (error: any) {
|
||
alert(error?.message || '识别失败');
|
||
} finally {
|
||
setIsTranscribing(false);
|
||
}
|
||
};
|
||
|
||
return (
|
||
<Dialog
|
||
isOpen={isOpen}
|
||
onClose={onClose}
|
||
title={`试听识别: ${model?.name || ''}`}
|
||
footer={
|
||
<>
|
||
<Button variant="ghost" onClick={onClose}>关闭</Button>
|
||
<Button onClick={runPreview} disabled={isTranscribing || !selectedFile || isProcessingRecording}>
|
||
{isTranscribing ? '识别中...' : '开始识别'}
|
||
</Button>
|
||
</>
|
||
}
|
||
>
|
||
<div className="space-y-4">
|
||
<div
|
||
className={`rounded-lg border-2 border-dashed p-4 transition-colors ${isDragging ? 'border-primary bg-primary/10' : 'border-white/10 bg-white/5'}`}
|
||
onDragOver={(e) => {
|
||
e.preventDefault();
|
||
setIsDragging(true);
|
||
}}
|
||
onDragLeave={() => setIsDragging(false)}
|
||
onDrop={handleDrop}
|
||
>
|
||
<input
|
||
ref={inputRef}
|
||
type="file"
|
||
accept="audio/*"
|
||
className="hidden"
|
||
onChange={(e) => pickFile(e.target.files?.[0] || null)}
|
||
/>
|
||
<div className="flex flex-col items-center justify-center gap-2 text-sm text-muted-foreground">
|
||
<Upload className="h-6 w-6 text-primary" />
|
||
<p>拖拽音频文件到这里,或</p>
|
||
<Button variant="outline" size="sm" onClick={() => inputRef.current?.click()}>选择文件</Button>
|
||
{selectedFile && <p className="text-primary text-xs">已选择: {selectedFile.name}</p>}
|
||
{isProcessingRecording && <p className="text-yellow-400 text-xs">正在处理录音格式...</p>}
|
||
</div>
|
||
</div>
|
||
|
||
<div className="rounded-lg border border-white/10 bg-white/5 p-3 space-y-3">
|
||
<div className="flex items-center justify-between">
|
||
<div className="text-sm text-muted-foreground">麦克风测试</div>
|
||
<div className={`text-xs font-semibold ${isSpeaking ? 'text-green-400' : 'text-muted-foreground'}`}>
|
||
{isRecording ? (isSpeaking ? '正在说话' : '等待语音') : '未录音'}
|
||
</div>
|
||
</div>
|
||
<div className="h-10 rounded-md bg-black/30 border border-white/10 px-2 flex items-end gap-1">
|
||
{Array.from({ length: 20 }).map((_, index) => {
|
||
const threshold = (index + 1) / 20;
|
||
const active = inputLevel >= threshold;
|
||
const height = 6 + ((index % 5) * 6);
|
||
return (
|
||
<div
|
||
key={`meter-${index}`}
|
||
className={`w-1 rounded-sm transition-all ${active ? (isSpeaking ? 'bg-green-400' : 'bg-primary') : 'bg-white/10'}`}
|
||
style={{ height }}
|
||
/>
|
||
);
|
||
})}
|
||
</div>
|
||
{!isRecording ? (
|
||
<Button size="sm" variant="outline" onClick={startRecording}><Mic className="h-4 w-4 mr-1" />开始录音</Button>
|
||
) : (
|
||
<Button size="sm" variant="destructive" onClick={stopRecording}><Square className="h-4 w-4 mr-1" />停止录音</Button>
|
||
)}
|
||
</div>
|
||
|
||
<div className="space-y-1.5">
|
||
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block flex items-center">
|
||
<Globe className="w-3 h-3 mr-1.5" />识别语言 (Optional)
|
||
</label>
|
||
<Input value={language} onChange={(e) => setLanguage(e.target.value)} placeholder="zh / en / auto" />
|
||
</div>
|
||
|
||
<div className="rounded-lg border border-primary/20 bg-primary/5 p-3 space-y-2">
|
||
<div className="flex items-center justify-between text-xs text-primary">
|
||
<span>识别结果</span>
|
||
<span>
|
||
{latency !== null ? `Latency: ${latency}ms` : ''}
|
||
{confidence !== null ? ` Confidence: ${confidence.toFixed(3)}` : ''}
|
||
</span>
|
||
</div>
|
||
<textarea
|
||
readOnly
|
||
value={transcript}
|
||
className="flex min-h-[120px] w-full rounded-md border-0 bg-black/20 px-3 py-2 text-sm shadow-sm text-white"
|
||
placeholder="识别结果会显示在这里"
|
||
/>
|
||
</div>
|
||
</div>
|
||
</Dialog>
|
||
);
|
||
};
|