Update asr preview using microphone
This commit is contained in:
@@ -19,6 +19,69 @@ const parseHotwords = (value: string): string[] => {
|
||||
|
||||
const toHotwordsValue = (hotwords?: string[]): string => (hotwords || []).join(', ');
|
||||
|
||||
const createAudioContext = (): AudioContext => {
|
||||
const Ctx = (window as any).AudioContext || (window as any).webkitAudioContext;
|
||||
return new Ctx();
|
||||
};
|
||||
|
||||
const encodeWav = (audioBuffer: AudioBuffer): Blob => {
|
||||
const numberOfChannels = audioBuffer.numberOfChannels;
|
||||
const sampleRate = audioBuffer.sampleRate;
|
||||
const format = 1;
|
||||
const bitDepth = 16;
|
||||
const channelData = Array.from({ length: numberOfChannels }, (_, ch) => audioBuffer.getChannelData(ch));
|
||||
const sampleCount = audioBuffer.length;
|
||||
const blockAlign = numberOfChannels * (bitDepth / 8);
|
||||
const byteRate = sampleRate * blockAlign;
|
||||
const dataSize = sampleCount * blockAlign;
|
||||
const buffer = new ArrayBuffer(44 + dataSize);
|
||||
const view = new DataView(buffer);
|
||||
|
||||
const writeString = (offset: number, value: string) => {
|
||||
for (let i = 0; i < value.length; i += 1) {
|
||||
view.setUint8(offset + i, value.charCodeAt(i));
|
||||
}
|
||||
};
|
||||
|
||||
writeString(0, 'RIFF');
|
||||
view.setUint32(4, 36 + dataSize, true);
|
||||
writeString(8, 'WAVE');
|
||||
writeString(12, 'fmt ');
|
||||
view.setUint32(16, 16, true);
|
||||
view.setUint16(20, format, true);
|
||||
view.setUint16(22, numberOfChannels, true);
|
||||
view.setUint32(24, sampleRate, true);
|
||||
view.setUint32(28, byteRate, true);
|
||||
view.setUint16(32, blockAlign, true);
|
||||
view.setUint16(34, bitDepth, true);
|
||||
writeString(36, 'data');
|
||||
view.setUint32(40, dataSize, true);
|
||||
|
||||
let offset = 44;
|
||||
for (let i = 0; i < sampleCount; i += 1) {
|
||||
for (let ch = 0; ch < numberOfChannels; ch += 1) {
|
||||
const sample = Math.max(-1, Math.min(1, channelData[ch][i]));
|
||||
const pcm = sample < 0 ? sample * 0x8000 : sample * 0x7fff;
|
||||
view.setInt16(offset, pcm, true);
|
||||
offset += 2;
|
||||
}
|
||||
}
|
||||
|
||||
return new Blob([buffer], { type: 'audio/wav' });
|
||||
};
|
||||
|
||||
const convertRecordedBlobToWav = async (blob: Blob): Promise<File> => {
|
||||
const audioContext = createAudioContext();
|
||||
try {
|
||||
const inputArrayBuffer = await blob.arrayBuffer();
|
||||
const decoded = await audioContext.decodeAudioData(inputArrayBuffer.slice(0));
|
||||
const wavBlob = encodeWav(decoded);
|
||||
return new File([wavBlob], `mic-preview-${Date.now()}.wav`, { type: 'audio/wav' });
|
||||
} finally {
|
||||
await audioContext.close();
|
||||
}
|
||||
};
|
||||
|
||||
export const ASRLibraryPage: React.FC = () => {
|
||||
const [models, setModels] = useState<ASRModel[]>([]);
|
||||
const [searchTerm, setSearchTerm] = useState('');
|
||||
@@ -378,11 +441,17 @@ const ASRPreviewModal: React.FC<{
|
||||
const [confidence, setConfidence] = useState<number | null>(null);
|
||||
const [language, setLanguage] = useState('');
|
||||
const [isRecording, setIsRecording] = useState(false);
|
||||
const [isProcessingRecording, setIsProcessingRecording] = useState(false);
|
||||
const [inputLevel, setInputLevel] = useState(0);
|
||||
const [isSpeaking, setIsSpeaking] = useState(false);
|
||||
|
||||
const inputRef = useRef<HTMLInputElement>(null);
|
||||
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
||||
const streamRef = useRef<MediaStream | null>(null);
|
||||
const chunksRef = useRef<Blob[]>([]);
|
||||
const analyserRef = useRef<AnalyserNode | null>(null);
|
||||
const visualAudioContextRef = useRef<AudioContext | null>(null);
|
||||
const rafRef = useRef<number | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (!isOpen) return;
|
||||
@@ -393,16 +462,46 @@ const ASRPreviewModal: React.FC<{
|
||||
setLanguage(model?.language || '');
|
||||
setIsTranscribing(false);
|
||||
setIsRecording(false);
|
||||
setIsProcessingRecording(false);
|
||||
setInputLevel(0);
|
||||
setIsSpeaking(false);
|
||||
}, [isOpen, model]);
|
||||
|
||||
const stopVisualization = () => {
|
||||
if (rafRef.current) {
|
||||
cancelAnimationFrame(rafRef.current);
|
||||
rafRef.current = null;
|
||||
}
|
||||
analyserRef.current = null;
|
||||
if (visualAudioContextRef.current) {
|
||||
visualAudioContextRef.current.close().catch(() => undefined);
|
||||
visualAudioContextRef.current = null;
|
||||
}
|
||||
setInputLevel(0);
|
||||
setIsSpeaking(false);
|
||||
};
|
||||
|
||||
const stopCurrentStream = () => {
|
||||
if (streamRef.current) {
|
||||
streamRef.current.getTracks().forEach((track) => track.stop());
|
||||
streamRef.current = null;
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
if (streamRef.current) {
|
||||
streamRef.current.getTracks().forEach((track) => track.stop());
|
||||
}
|
||||
stopVisualization();
|
||||
stopCurrentStream();
|
||||
};
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
if (!isOpen) {
|
||||
stopVisualization();
|
||||
stopCurrentStream();
|
||||
}
|
||||
}, [isOpen]);
|
||||
|
||||
const pickFile = (file: File | null) => {
|
||||
if (!file) return;
|
||||
if (!file.type.startsWith('audio/')) {
|
||||
@@ -427,29 +526,65 @@ const ASRPreviewModal: React.FC<{
|
||||
|
||||
try {
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
const recorder = new MediaRecorder(stream);
|
||||
const mimeType = MediaRecorder.isTypeSupported('audio/webm;codecs=opus')
|
||||
? 'audio/webm;codecs=opus'
|
||||
: (MediaRecorder.isTypeSupported('audio/webm') ? 'audio/webm' : '');
|
||||
const recorder = mimeType ? new MediaRecorder(stream, { mimeType }) : new MediaRecorder(stream);
|
||||
|
||||
chunksRef.current = [];
|
||||
streamRef.current = stream;
|
||||
mediaRecorderRef.current = recorder;
|
||||
|
||||
const visualizationContext = createAudioContext();
|
||||
const source = visualizationContext.createMediaStreamSource(stream);
|
||||
const analyser = visualizationContext.createAnalyser();
|
||||
analyser.fftSize = 1024;
|
||||
source.connect(analyser);
|
||||
analyserRef.current = analyser;
|
||||
visualAudioContextRef.current = visualizationContext;
|
||||
|
||||
const timeData = new Uint8Array(analyser.frequencyBinCount);
|
||||
const tick = () => {
|
||||
if (!analyserRef.current) return;
|
||||
analyserRef.current.getByteTimeDomainData(timeData);
|
||||
let sumSquares = 0;
|
||||
for (let i = 0; i < timeData.length; i += 1) {
|
||||
const normalized = (timeData[i] - 128) / 128;
|
||||
sumSquares += normalized * normalized;
|
||||
}
|
||||
const rms = Math.sqrt(sumSquares / timeData.length);
|
||||
const level = Math.min(1, rms * 4);
|
||||
setInputLevel(level);
|
||||
setIsSpeaking(level > 0.08);
|
||||
rafRef.current = requestAnimationFrame(tick);
|
||||
};
|
||||
tick();
|
||||
|
||||
recorder.ondataavailable = (event) => {
|
||||
if (event.data.size > 0) {
|
||||
chunksRef.current.push(event.data);
|
||||
}
|
||||
};
|
||||
|
||||
recorder.onstop = () => {
|
||||
recorder.onstop = async () => {
|
||||
const blob = new Blob(chunksRef.current, { type: recorder.mimeType || 'audio/webm' });
|
||||
const file = new File([blob], `mic-preview-${Date.now()}.webm`, { type: blob.type || 'audio/webm' });
|
||||
setSelectedFile(file);
|
||||
if (streamRef.current) {
|
||||
streamRef.current.getTracks().forEach((track) => track.stop());
|
||||
streamRef.current = null;
|
||||
setIsProcessingRecording(true);
|
||||
try {
|
||||
let outputFile: File;
|
||||
try {
|
||||
outputFile = await convertRecordedBlobToWav(blob);
|
||||
} catch {
|
||||
outputFile = new File([blob], `mic-preview-${Date.now()}.webm`, { type: blob.type || 'audio/webm' });
|
||||
}
|
||||
setSelectedFile(outputFile);
|
||||
} finally {
|
||||
setIsProcessingRecording(false);
|
||||
stopVisualization();
|
||||
stopCurrentStream();
|
||||
}
|
||||
};
|
||||
|
||||
recorder.start();
|
||||
recorder.start(250);
|
||||
setIsRecording(true);
|
||||
} catch (error: any) {
|
||||
alert(error?.message || '无法访问麦克风');
|
||||
@@ -490,7 +625,7 @@ const ASRPreviewModal: React.FC<{
|
||||
footer={
|
||||
<>
|
||||
<Button variant="ghost" onClick={onClose}>关闭</Button>
|
||||
<Button onClick={runPreview} disabled={isTranscribing || !selectedFile}>
|
||||
<Button onClick={runPreview} disabled={isTranscribing || !selectedFile || isProcessingRecording}>
|
||||
{isTranscribing ? '识别中...' : '开始识别'}
|
||||
</Button>
|
||||
</>
|
||||
@@ -518,11 +653,31 @@ const ASRPreviewModal: React.FC<{
|
||||
<p>拖拽音频文件到这里,或</p>
|
||||
<Button variant="outline" size="sm" onClick={() => inputRef.current?.click()}>选择文件</Button>
|
||||
{selectedFile && <p className="text-primary text-xs">已选择: {selectedFile.name}</p>}
|
||||
{isProcessingRecording && <p className="text-yellow-400 text-xs">正在处理录音格式...</p>}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="flex items-center justify-between rounded-lg border border-white/10 bg-white/5 p-3">
|
||||
<div className="text-sm text-muted-foreground">麦克风测试</div>
|
||||
<div className="rounded-lg border border-white/10 bg-white/5 p-3 space-y-3">
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="text-sm text-muted-foreground">麦克风测试</div>
|
||||
<div className={`text-xs font-semibold ${isSpeaking ? 'text-green-400' : 'text-muted-foreground'}`}>
|
||||
{isRecording ? (isSpeaking ? '正在说话' : '等待语音') : '未录音'}
|
||||
</div>
|
||||
</div>
|
||||
<div className="h-10 rounded-md bg-black/30 border border-white/10 px-2 flex items-end gap-1">
|
||||
{Array.from({ length: 20 }).map((_, index) => {
|
||||
const threshold = (index + 1) / 20;
|
||||
const active = inputLevel >= threshold;
|
||||
const height = 6 + ((index % 5) * 6);
|
||||
return (
|
||||
<div
|
||||
key={`meter-${index}`}
|
||||
className={`w-1 rounded-sm transition-all ${active ? (isSpeaking ? 'bg-green-400' : 'bg-primary') : 'bg-white/10'}`}
|
||||
style={{ height }}
|
||||
/>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
{!isRecording ? (
|
||||
<Button size="sm" variant="outline" onClick={startRecording}><Mic className="h-4 w-4 mr-1" />开始录音</Button>
|
||||
) : (
|
||||
|
||||
Reference in New Issue
Block a user