AI-VideoAssistant/web/pages/ASRLibrary.tsx

import React, { useEffect, useRef, useState } from 'react';
import { Search, Filter, Plus, Trash2, Key, Server, Ear, Globe, Languages, Pencil, Mic, Square, Upload } from 'lucide-react';
import { Button, Input, TableHeader, TableRow, TableHead, TableCell, Dialog, Badge } from '../components/UI';
import { ASRModel } from '../types';
import { createASRModel, deleteASRModel, fetchASRModels, previewASRModel, updateASRModel } from '../services/backendApi';

const maskApiKey = (key?: string) => {
  if (!key) return '********';
  if (key.length < 8) return '********';
  return `${key.slice(0, 3)}****${key.slice(-4)}`;
};

const parseHotwords = (value: string): string[] => {
  return value
    .split(/[\n,]/)
    .map((item) => item.trim())
    .filter(Boolean);
};

const toHotwordsValue = (hotwords?: string[]): string => (hotwords || []).join(', ');

const createAudioContext = (): AudioContext => {
  const Ctx = (window as any).AudioContext || (window as any).webkitAudioContext;
  return new Ctx();
};

const encodeWav = (audioBuffer: AudioBuffer): Blob => {
  const numberOfChannels = audioBuffer.numberOfChannels;
  const sampleRate = audioBuffer.sampleRate;
  const format = 1;
  const bitDepth = 16;
  const channelData = Array.from({ length: numberOfChannels }, (_, ch) => audioBuffer.getChannelData(ch));
  const sampleCount = audioBuffer.length;
  const blockAlign = numberOfChannels * (bitDepth / 8);
  const byteRate = sampleRate * blockAlign;
  const dataSize = sampleCount * blockAlign;
  const buffer = new ArrayBuffer(44 + dataSize);
  const view = new DataView(buffer);

  const writeString = (offset: number, value: string) => {
    for (let i = 0; i < value.length; i += 1) {
      view.setUint8(offset + i, value.charCodeAt(i));
    }
  };

  writeString(0, 'RIFF');
  view.setUint32(4, 36 + dataSize, true);
  writeString(8, 'WAVE');
  writeString(12, 'fmt ');
  view.setUint32(16, 16, true);
  view.setUint16(20, format, true);
  view.setUint16(22, numberOfChannels, true);
  view.setUint32(24, sampleRate, true);
  view.setUint32(28, byteRate, true);
  view.setUint16(32, blockAlign, true);
  view.setUint16(34, bitDepth, true);
  writeString(36, 'data');
  view.setUint32(40, dataSize, true);

  let offset = 44;
  for (let i = 0; i < sampleCount; i += 1) {
    for (let ch = 0; ch < numberOfChannels; ch += 1) {
      const sample = Math.max(-1, Math.min(1, channelData[ch][i]));
      const pcm = sample < 0 ? sample * 0x8000 : sample * 0x7fff;
      view.setInt16(offset, pcm, true);
      offset += 2;
    }
  }

  return new Blob([buffer], { type: 'audio/wav' });
};

const convertRecordedBlobToWav = async (blob: Blob): Promise<File> => {
  const audioContext = createAudioContext();
  try {
    const inputArrayBuffer = await blob.arrayBuffer();
    const decoded = await audioContext.decodeAudioData(inputArrayBuffer.slice(0));
    const wavBlob = encodeWav(decoded);
    return new File([wavBlob], `mic-preview-${Date.now()}.wav`, { type: 'audio/wav' });
  } finally {
    await audioContext.close();
  }
};

export const ASRLibraryPage: React.FC = () => {
  const [models, setModels] = useState<ASRModel[]>([]);
  const [searchTerm, setSearchTerm] = useState('');
  const [vendorFilter, setVendorFilter] = useState<string>('OpenAI Compatible');
  const [langFilter, setLangFilter] = useState<string>('all');
  const [isAddModalOpen, setIsAddModalOpen] = useState(false);
  const [editingModel, setEditingModel] = useState<ASRModel | null>(null);
  const [previewingModel, setPreviewingModel] = useState<ASRModel | null>(null);
  const [isLoading, setIsLoading] = useState(true);

  const loadModels = async () => {
    setIsLoading(true);
    try {
      setModels(await fetchASRModels());
    } catch (error) {
      console.error(error);
      setModels([]);
    } finally {
      setIsLoading(false);
    }
  };

  useEffect(() => {
    loadModels();
  }, []);

  const filteredModels = models.filter((m) => {
    const q = searchTerm.toLowerCase();
    const matchesSearch = m.name.toLowerCase().includes(q) || (m.modelName || '').toLowerCase().includes(q);
    const matchesVendor = m.vendor === vendorFilter;
    const matchesLang = langFilter === 'all' || m.language === langFilter || (langFilter !== 'all' && m.language === 'Multi-lingual');
    return matchesSearch && matchesVendor && matchesLang;
  });

  const handleCreate = async (data: Partial<ASRModel>) => {
    const created = await createASRModel(data);
    setModels((prev) => [created, ...prev]);
    setIsAddModalOpen(false);
  };

  const handleUpdate = async (id: string, data: Partial<ASRModel>) => {
    const updated = await updateASRModel(id, data);
    setModels((prev) => prev.map((m) => (m.id === id ? updated : m)));
    setEditingModel(null);
  };

  const handleDelete = async (id: string) => {
    if (!confirm('确认删除该语音识别模型吗？')) return;
    await deleteASRModel(id);
    setModels((prev) => prev.filter((m) => m.id !== id));
  };

  return (
    <div className="space-y-6 animate-in fade-in py-4 pb-10">
      <div className="flex items-center justify-between">
        <h1 className="text-2xl font-bold tracking-tight text-white">语音识别</h1>
        <Button onClick={() => setIsAddModalOpen(true)} className="shadow-[0_0_15px_rgba(6,182,212,0.4)]">
          <Plus className="mr-2 h-4 w-4" /> 添加模型
        </Button>
      </div>

      <div className="grid grid-cols-1 md:grid-cols-4 gap-4 bg-card/50 p-4 rounded-lg border border-white/5 shadow-sm">
        <div className="relative col-span-1 md:col-span-2">
          <Search className="absolute left-2.5 top-2.5 h-4 w-4 text-muted-foreground" />
          <Input
            placeholder="搜索模型名称/Model Name..."
            className="pl-9 border-0 bg-white/5"
            value={searchTerm}
            onChange={(e) => setSearchTerm(e.target.value)}
          />
        </div>
        <div className="flex items-center space-x-2">
          <Filter className="h-4 w-4 text-muted-foreground" />
          <select
            className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 [&>option]:bg-card text-foreground"
            value={vendorFilter}
            onChange={(e) => setVendorFilter(e.target.value)}
          >
            <option value="OpenAI Compatible">OpenAI Compatible</option>
          </select>
        </div>
        <div className="flex items-center space-x-2">
          <select
            className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 [&>option]:bg-card text-foreground"
            value={langFilter}
            onChange={(e) => setLangFilter(e.target.value)}
          >
            <option value="all">所有语言</option>
            <option value="zh">中文 (Chinese)</option>
            <option value="en">英文 (English)</option>
            <option value="Multi-lingual">多语言 (Multi-lingual)</option>
          </select>
        </div>
      </div>

      <div className="rounded-md border border-white/5 bg-card/40 backdrop-blur-md overflow-hidden">
        <table className="w-full text-sm">
          <TableHeader>
            <TableRow>
              <TableHead>模型名称</TableHead>
              <TableHead>厂商</TableHead>
              <TableHead>语言</TableHead>
              <TableHead>模型标识</TableHead>
              <TableHead>Base URL</TableHead>
              <TableHead>API Key</TableHead>
              <TableHead className="text-right">操作</TableHead>
            </TableRow>
          </TableHeader>
          <tbody>
            {!isLoading && filteredModels.map((model) => (
              <TableRow key={model.id}>
                <TableCell className="font-medium text-white">
                  <div className="flex flex-col">
                    <span className="flex items-center">
                      <Ear className="w-4 h-4 mr-2 text-primary" />
                      {model.name}
                    </span>
                    {model.hotwords && model.hotwords.length > 0 && (
                      <span className="text-xs text-muted-foreground">热词: {model.hotwords.join(', ')}</span>
                    )}
                  </div>
                </TableCell>
                <TableCell><Badge variant="outline">{model.vendor}</Badge></TableCell>
                <TableCell>{model.language}</TableCell>
                <TableCell className="font-mono text-xs text-muted-foreground">{model.modelName || '-'}</TableCell>
                <TableCell className="font-mono text-xs text-muted-foreground max-w-[220px] truncate">{model.baseUrl}</TableCell>
                <TableCell className="font-mono text-xs text-muted-foreground">{maskApiKey(model.apiKey)}</TableCell>
                <TableCell className="text-right">
                  <Button variant="ghost" size="icon" onClick={() => setPreviewingModel(model)}>
                    <Ear className="h-4 w-4" />
                  </Button>
                  <Button variant="ghost" size="icon" onClick={() => setEditingModel(model)}>
                    <Pencil className="h-4 w-4" />
                  </Button>
                  <Button variant="ghost" size="icon" onClick={() => handleDelete(model.id)} className="text-red-400">
                    <Trash2 className="h-4 w-4" />
                  </Button>
                </TableCell>
              </TableRow>
            ))}
            {!isLoading && filteredModels.length === 0 && (
              <TableRow>
                <TableCell colSpan={7} className="text-center py-8 text-muted-foreground">暂无语音识别模型</TableCell>
              </TableRow>
            )}
            {isLoading && (
              <TableRow>
                <TableCell colSpan={7} className="text-center py-8 text-muted-foreground">加载中...</TableCell>
              </TableRow>
            )}
          </tbody>
        </table>
      </div>

      <ASRModelModal
        isOpen={isAddModalOpen}
        onClose={() => setIsAddModalOpen(false)}
        onSubmit={handleCreate}
      />

      <ASRModelModal
        isOpen={!!editingModel}
        onClose={() => setEditingModel(null)}
        onSubmit={(data) => handleUpdate(editingModel!.id, data)}
        initialModel={editingModel || undefined}
      />

      <ASRPreviewModal
        isOpen={!!previewingModel}
        onClose={() => setPreviewingModel(null)}
        model={previewingModel}
      />
    </div>
  );
};

const ASRModelModal: React.FC<{
  isOpen: boolean;
  onClose: () => void;
  onSubmit: (model: Partial<ASRModel>) => Promise<void>;
  initialModel?: ASRModel;
}> = ({ isOpen, onClose, onSubmit, initialModel }) => {
  const [name, setName] = useState('');
  const [vendor, setVendor] = useState('OpenAI Compatible');
  const [language, setLanguage] = useState('zh');
  const [modelName, setModelName] = useState('FunAudioLLM/SenseVoiceSmall');
  const [baseUrl, setBaseUrl] = useState('https://api.siliconflow.cn/v1');
  const [apiKey, setApiKey] = useState('');
  const [hotwords, setHotwords] = useState('');
  const [enablePunctuation, setEnablePunctuation] = useState(true);
  const [enableNormalization, setEnableNormalization] = useState(true);
  const [enabled, setEnabled] = useState(true);
  const [saving, setSaving] = useState(false);

  useEffect(() => {
    if (!isOpen) return;
    if (initialModel) {
      setName(initialModel.name || '');
      setVendor(initialModel.vendor || 'OpenAI Compatible');
      setLanguage(initialModel.language || 'zh');
      setModelName(initialModel.modelName || 'FunAudioLLM/SenseVoiceSmall');
      setBaseUrl(initialModel.baseUrl || 'https://api.siliconflow.cn/v1');
      setApiKey(initialModel.apiKey || '');
      setHotwords(toHotwordsValue(initialModel.hotwords));
      setEnablePunctuation(initialModel.enablePunctuation ?? true);
      setEnableNormalization(initialModel.enableNormalization ?? true);
      setEnabled(initialModel.enabled ?? true);
      return;
    }

    setName('');
    setVendor('OpenAI Compatible');
    setLanguage('zh');
    setModelName('FunAudioLLM/SenseVoiceSmall');
    setBaseUrl('https://api.siliconflow.cn/v1');
    setApiKey('');
    setHotwords('');
    setEnablePunctuation(true);
    setEnableNormalization(true);
    setEnabled(true);
  }, [initialModel, isOpen]);

  const handleSubmit = async () => {
    if (!name.trim()) {
      alert('请填写模型名称');
      return;
    }
    if (!baseUrl.trim()) {
      alert('请填写 Base URL');
      return;
    }
    if (!apiKey.trim()) {
      alert('请填写 API Key');
      return;
    }

    try {
      setSaving(true);
      await onSubmit({
        name: name.trim(),
        vendor: vendor.trim(),
        language,
        modelName: modelName.trim(),
        baseUrl: baseUrl.trim(),
        apiKey: apiKey.trim(),
        hotwords: parseHotwords(hotwords),
        enablePunctuation,
        enableNormalization,
        enabled,
      });
    } catch (error: any) {
      alert(error?.message || '保存失败');
    } finally {
      setSaving(false);
    }
  };

  return (
    <Dialog
      isOpen={isOpen}
      onClose={onClose}
      title={initialModel ? '编辑语音识别模型' : '添加语音识别模型'}
      footer={
        <>
          <Button variant="ghost" onClick={onClose}>取消</Button>
          <Button onClick={handleSubmit} disabled={saving}>{saving ? '保存中...' : (initialModel ? '保存修改' : '确认添加')}</Button>
        </>
      }
    >
      <div className="space-y-4 max-h-[75vh] overflow-y-auto px-1 custom-scrollbar">
        <div className="space-y-1.5">
          <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">模型名称</label>
          <Input value={name} onChange={(e) => setName(e.target.value)} placeholder="例如: SenseVoice CN" />
        </div>

        <div className="grid grid-cols-1 md:grid-cols-2 gap-4">
          <div className="space-y-1.5">
            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">接口类型</label>
            <select
              className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-foreground [&>option]:bg-card"
              value={vendor}
              onChange={(e) => setVendor(e.target.value)}
            >
              <option value="OpenAI Compatible">OpenAI Compatible</option>
            </select>
          </div>
          <div className="space-y-1.5">
            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block flex items-center"><Languages className="w-3 h-3 mr-1.5" />语言</label>
            <select
              className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-foreground [&>option]:bg-card"
              value={language}
              onChange={(e) => setLanguage(e.target.value)}
            >
              <option value="zh">中文 (Chinese)</option>
              <option value="en">英文 (English)</option>
              <option value="Multi-lingual">多语言 (Multi-lingual)</option>
            </select>
          </div>
        </div>

        <div className="space-y-1.5">
          <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">Model Name</label>
          <Input value={modelName} onChange={(e) => setModelName(e.target.value)} placeholder="FunAudioLLM/SenseVoiceSmall" />
        </div>

        <div className="grid grid-cols-1 md:grid-cols-2 gap-4">
          <div className="space-y-1.5">
            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block flex items-center"><Server className="w-3 h-3 mr-1.5" />Base URL</label>
            <Input value={baseUrl} onChange={(e) => setBaseUrl(e.target.value)} placeholder="https://api.siliconflow.cn/v1" className="font-mono text-xs" />
          </div>
          <div className="space-y-1.5">
            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block flex items-center"><Key className="w-3 h-3 mr-1.5" />API Key</label>
            <Input value={apiKey} onChange={(e) => setApiKey(e.target.value)} type="password" placeholder="sk-..." className="font-mono text-xs" />
          </div>
        </div>

        <div className="space-y-1.5">
          <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">热词 (comma separated)</label>
          <Input value={hotwords} onChange={(e) => setHotwords(e.target.value)} placeholder="品牌名, 人名, 专有词" />
        </div>

        <div className="grid grid-cols-1 md:grid-cols-3 gap-2">
          <label className="flex items-center space-x-2 text-xs text-muted-foreground">
            <input type="checkbox" checked={enablePunctuation} onChange={(e) => setEnablePunctuation(e.target.checked)} />
            <span>标点增强</span>
          </label>
          <label className="flex items-center space-x-2 text-xs text-muted-foreground">
            <input type="checkbox" checked={enableNormalization} onChange={(e) => setEnableNormalization(e.target.checked)} />
            <span>文本归一化</span>
          </label>
          <label className="flex items-center space-x-2 text-xs text-muted-foreground">
            <input type="checkbox" checked={enabled} onChange={(e) => setEnabled(e.target.checked)} />
            <span>启用</span>
          </label>
        </div>
      </div>
    </Dialog>
  );
};

const ASRPreviewModal: React.FC<{
  isOpen: boolean;
  onClose: () => void;
  model: ASRModel | null;
}> = ({ isOpen, onClose, model }) => {
  const [selectedFile, setSelectedFile] = useState<File | null>(null);
  const [isDragging, setIsDragging] = useState(false);
  const [isTranscribing, setIsTranscribing] = useState(false);
  const [transcript, setTranscript] = useState('');
  const [latency, setLatency] = useState<number | null>(null);
  const [confidence, setConfidence] = useState<number | null>(null);
  const [language, setLanguage] = useState('');
  const [isRecording, setIsRecording] = useState(false);
  const [isProcessingRecording, setIsProcessingRecording] = useState(false);
  const [inputLevel, setInputLevel] = useState(0);
  const [isSpeaking, setIsSpeaking] = useState(false);

  const inputRef = useRef<HTMLInputElement>(null);
  const mediaRecorderRef = useRef<MediaRecorder | null>(null);
  const streamRef = useRef<MediaStream | null>(null);
  const chunksRef = useRef<Blob[]>([]);
  const analyserRef = useRef<AnalyserNode | null>(null);
  const visualAudioContextRef = useRef<AudioContext | null>(null);
  const rafRef = useRef<number | null>(null);

  useEffect(() => {
    if (!isOpen) return;
    setSelectedFile(null);
    setTranscript('');
    setLatency(null);
    setConfidence(null);
    setLanguage(model?.language || '');
    setIsTranscribing(false);
    setIsRecording(false);
    setIsProcessingRecording(false);
    setInputLevel(0);
    setIsSpeaking(false);
  }, [isOpen, model]);

  const stopVisualization = () => {
    if (rafRef.current) {
      cancelAnimationFrame(rafRef.current);
      rafRef.current = null;
    }
    analyserRef.current = null;
    if (visualAudioContextRef.current) {
      visualAudioContextRef.current.close().catch(() => undefined);
      visualAudioContextRef.current = null;
    }
    setInputLevel(0);
    setIsSpeaking(false);
  };

  const stopCurrentStream = () => {
    if (streamRef.current) {
      streamRef.current.getTracks().forEach((track) => track.stop());
      streamRef.current = null;
    }
  };

  useEffect(() => {
    return () => {
      stopVisualization();
      stopCurrentStream();
    };
  }, []);

  useEffect(() => {
    if (!isOpen) {
      stopVisualization();
      stopCurrentStream();
    }
  }, [isOpen]);

  const pickFile = (file: File | null) => {
    if (!file) return;
    if (!file.type.startsWith('audio/')) {
      alert('仅支持音频文件');
      return;
    }
    setSelectedFile(file);
  };

  const handleDrop = (event: React.DragEvent<HTMLDivElement>) => {
    event.preventDefault();
    setIsDragging(false);
    const file = event.dataTransfer.files?.[0] || null;
    pickFile(file);
  };

  const startRecording = async () => {
    if (!navigator.mediaDevices?.getUserMedia) {
      alert('当前浏览器不支持麦克风录音');
      return;
    }

    try {
      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
      const mimeType = MediaRecorder.isTypeSupported('audio/webm;codecs=opus')
        ? 'audio/webm;codecs=opus'
        : (MediaRecorder.isTypeSupported('audio/webm') ? 'audio/webm' : '');
      const recorder = mimeType ? new MediaRecorder(stream, { mimeType }) : new MediaRecorder(stream);

      chunksRef.current = [];
      streamRef.current = stream;
      mediaRecorderRef.current = recorder;

      const visualizationContext = createAudioContext();
      const source = visualizationContext.createMediaStreamSource(stream);
      const analyser = visualizationContext.createAnalyser();
      analyser.fftSize = 1024;
      source.connect(analyser);
      analyserRef.current = analyser;
      visualAudioContextRef.current = visualizationContext;

      const timeData = new Uint8Array(analyser.frequencyBinCount);
      const tick = () => {
        if (!analyserRef.current) return;
        analyserRef.current.getByteTimeDomainData(timeData);
        let sumSquares = 0;
        for (let i = 0; i < timeData.length; i += 1) {
          const normalized = (timeData[i] - 128) / 128;
          sumSquares += normalized * normalized;
        }
        const rms = Math.sqrt(sumSquares / timeData.length);
        const level = Math.min(1, rms * 4);
        setInputLevel(level);
        setIsSpeaking(level > 0.08);
        rafRef.current = requestAnimationFrame(tick);
      };
      tick();

      recorder.ondataavailable = (event) => {
        if (event.data.size > 0) {
          chunksRef.current.push(event.data);
        }
      };

      recorder.onstop = async () => {
        const blob = new Blob(chunksRef.current, { type: recorder.mimeType || 'audio/webm' });
        setIsProcessingRecording(true);
        try {
          let outputFile: File;
          try {
            outputFile = await convertRecordedBlobToWav(blob);
          } catch {
            outputFile = new File([blob], `mic-preview-${Date.now()}.webm`, { type: blob.type || 'audio/webm' });
          }
          setSelectedFile(outputFile);
        } finally {
          setIsProcessingRecording(false);
          stopVisualization();
          stopCurrentStream();
        }
      };

      recorder.start(250);
      setIsRecording(true);
    } catch (error: any) {
      alert(error?.message || '无法访问麦克风');
    }
  };

  const stopRecording = () => {
    if (!mediaRecorderRef.current) return;
    mediaRecorderRef.current.stop();
    setIsRecording(false);
  };

  const runPreview = async () => {
    if (!model?.id) return;
    if (!selectedFile) {
      alert('请先上传或录制音频');
      return;
    }

    try {
      setIsTranscribing(true);
      const result = await previewASRModel(model.id, selectedFile, { language: language || undefined });
      setTranscript(result.transcript || result.message || '无识别内容');
      setLatency(result.latency_ms ?? null);
      setConfidence(result.confidence ?? null);
    } catch (error: any) {
      alert(error?.message || '识别失败');
    } finally {
      setIsTranscribing(false);
    }
  };

  return (
    <Dialog
      isOpen={isOpen}
      onClose={onClose}
      title={`试听识别: ${model?.name || ''}`}
      footer={
        <>
          <Button variant="ghost" onClick={onClose}>关闭</Button>
          <Button onClick={runPreview} disabled={isTranscribing || !selectedFile || isProcessingRecording}>
            {isTranscribing ? '识别中...' : '开始识别'}
          </Button>
        </>
      }
    >
      <div className="space-y-4">
        <div
          className={`rounded-lg border-2 border-dashed p-4 transition-colors ${isDragging ? 'border-primary bg-primary/10' : 'border-white/10 bg-white/5'}`}
          onDragOver={(e) => {
            e.preventDefault();
            setIsDragging(true);
          }}
          onDragLeave={() => setIsDragging(false)}
          onDrop={handleDrop}
        >
          <input
            ref={inputRef}
            type="file"
            accept="audio/*"
            className="hidden"
            onChange={(e) => pickFile(e.target.files?.[0] || null)}
          />
          <div className="flex flex-col items-center justify-center gap-2 text-sm text-muted-foreground">
            <Upload className="h-6 w-6 text-primary" />
            <p>拖拽音频文件到这里，或</p>
            <Button variant="outline" size="sm" onClick={() => inputRef.current?.click()}>选择文件</Button>
            {selectedFile && <p className="text-primary text-xs">已选择: {selectedFile.name}</p>}
            {isProcessingRecording && <p className="text-yellow-400 text-xs">正在处理录音格式...</p>}
          </div>
        </div>

        <div className="rounded-lg border border-white/10 bg-white/5 p-3 space-y-3">
          <div className="flex items-center justify-between">
            <div className="text-sm text-muted-foreground">麦克风测试</div>
            <div className={`text-xs font-semibold ${isSpeaking ? 'text-green-400' : 'text-muted-foreground'}`}>
              {isRecording ? (isSpeaking ? '正在说话' : '等待语音') : '未录音'}
            </div>
          </div>
          <div className="h-10 rounded-md bg-black/30 border border-white/10 px-2 flex items-end gap-1">
            {Array.from({ length: 20 }).map((_, index) => {
              const threshold = (index + 1) / 20;
              const active = inputLevel >= threshold;
              const height = 6 + ((index % 5) * 6);
              return (
                <div
                  key={`meter-${index}`}
                  className={`w-1 rounded-sm transition-all ${active ? (isSpeaking ? 'bg-green-400' : 'bg-primary') : 'bg-white/10'}`}
                  style={{ height }}
                />
              );
            })}
          </div>
          {!isRecording ? (
            <Button size="sm" variant="outline" onClick={startRecording}><Mic className="h-4 w-4 mr-1" />开始录音</Button>
          ) : (
            <Button size="sm" variant="destructive" onClick={stopRecording}><Square className="h-4 w-4 mr-1" />停止录音</Button>
          )}
        </div>

        <div className="space-y-1.5">
          <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block flex items-center">
            <Globe className="w-3 h-3 mr-1.5" />识别语言 (Optional)
          </label>
          <Input value={language} onChange={(e) => setLanguage(e.target.value)} placeholder="zh / en / auto" />
        </div>

        <div className="rounded-lg border border-primary/20 bg-primary/5 p-3 space-y-2">
          <div className="flex items-center justify-between text-xs text-primary">
            <span>识别结果</span>
            <span>
              {latency !== null ? `Latency: ${latency}ms` : ''}
              {confidence !== null ? `  Confidence: ${confidence.toFixed(3)}` : ''}
            </span>
          </div>
          <textarea
            readOnly
            value={transcript}
            className="flex min-h-[120px] w-full rounded-md border-0 bg-black/20 px-3 py-2 text-sm shadow-sm text-white"
            placeholder="识别结果会显示在这里"
          />
        </div>
      </div>
    </Dialog>
  );
};