AI-VideoAssistant/web/pages/VoiceLibrary.tsx

import React, { useEffect, useMemo, useState, useRef } from 'react';
import { Search, Mic2, Play, Pause, Upload, Filter, Plus, Volume2, Pencil, Trash2 } from 'lucide-react';
import { Button, Input, Select, TableHeader, TableRow, TableHead, TableCell, Dialog, Badge, LibraryPageShell, TableStatusRow, LibraryActionCell } from '../components/UI';
import { Voice } from '../types';
import { previewVoice } from '../services/backendApi';
import {
  useCreateVoiceMutation,
  useDeleteVoiceMutation,
  useUpdateVoiceMutation,
  useVoicesQuery,
} from '../services/queries';

const OPENAI_COMPATIBLE_DEFAULT_MODEL = 'FunAudioLLM/CosyVoice2-0.5B';
const OPENAI_COMPATIBLE_DEFAULT_VOICE = 'FunAudioLLM/CosyVoice2-0.5B:anna';
const DASHSCOPE_DEFAULT_MODEL = 'qwen3-tts-flash-realtime';
const DASHSCOPE_DEFAULT_VOICE = 'Cherry';
const DASHSCOPE_DEFAULT_BASE_URL = 'wss://dashscope.aliyuncs.com/api-ws/v1/realtime';

type VoiceVendor = 'OpenAI Compatible' | 'DashScope';

const buildOpenAICompatibleVoiceKey = (rawId: string, model: string): string => {
  const id = (rawId || '').trim();
  if (!id) return `${model}:anna`;
  return id.includes(':') ? id : `${model}:${id}`;
};

export const VoiceLibraryPage: React.FC = () => {
  const [searchTerm, setSearchTerm] = useState('');
  const [vendorFilter, setVendorFilter] = useState<string>('all');
  const [genderFilter, setGenderFilter] = useState<'all' | 'Male' | 'Female'>('all');
  const [langFilter, setLangFilter] = useState<'all' | 'zh' | 'en'>('all');

  const [playingVoiceId, setPlayingVoiceId] = useState<string | null>(null);
  const [isCloneModalOpen, setIsCloneModalOpen] = useState(false);
  const [isAddModalOpen, setIsAddModalOpen] = useState(false);
  const [editingVoice, setEditingVoice] = useState<Voice | null>(null);
  const [playLoadingId, setPlayLoadingId] = useState<string | null>(null);
  const audioRef = useRef<HTMLAudioElement | null>(null);

  const voicesQuery = useVoicesQuery();
  const voices = voicesQuery.data || [];
  const isLoading = voicesQuery.isLoading;
  const createVoiceMutation = useCreateVoiceMutation();
  const updateVoiceMutation = useUpdateVoiceMutation();
  const deleteVoiceMutation = useDeleteVoiceMutation();

  const vendorOptions = useMemo(
    () => Array.from(new Set(voices.map((v) => String(v.vendor || '').trim()).filter(Boolean))).sort(),
    [voices]
  );

  const filteredVoices = voices.filter((voice) => {
    const matchesSearch = voice.name.toLowerCase().includes(searchTerm.toLowerCase());
    const matchesVendor = vendorFilter === 'all' || voice.vendor === vendorFilter;
    const matchesGender = genderFilter === 'all' || voice.gender === genderFilter;
    const matchesLang = langFilter === 'all' || voice.language === langFilter;
    return matchesSearch && matchesVendor && matchesGender && matchesLang;
  });

  const handlePlayToggle = async (voice: Voice) => {
    if (playingVoiceId === voice.id && audioRef.current) {
      audioRef.current.pause();
      audioRef.current.currentTime = 0;
      setPlayingVoiceId(null);
      return;
    }

    try {
      setPlayLoadingId(voice.id);
      const audioUrl = await previewVoice(
        voice.id,
        voice.language === 'en' ? 'Hello, this is a voice preview.' : '你好，这是一段语音试听。',
        voice.speed
      );

      if (audioRef.current) {
        audioRef.current.pause();
      }

      const audio = new Audio(audioUrl);
      audio.onended = () => setPlayingVoiceId(null);
      audio.onerror = () => {
        setPlayingVoiceId(null);
        alert('试听失败，请检查该声音的 API Key / Base URL。');
      };

      audioRef.current = audio;
      setPlayingVoiceId(voice.id);
      await audio.play();
    } catch (error: any) {
      alert(error?.message || '试听失败');
      setPlayingVoiceId(null);
    } finally {
      setPlayLoadingId(null);
    }
  };

  const handleAddSuccess = async (newVoice: Voice) => {
    await createVoiceMutation.mutateAsync(newVoice);
    setIsAddModalOpen(false);
    setIsCloneModalOpen(false);
  };

  const handleUpdateSuccess = async (id: string, data: Voice) => {
    await updateVoiceMutation.mutateAsync({ id, data });
    setEditingVoice(null);
  };

  const handleDelete = async (id: string) => {
    if (!confirm('确认删除该声音吗？该操作不可恢复。')) return;
    await deleteVoiceMutation.mutateAsync(id);
  };

  return (
    <LibraryPageShell
      title="声音资源"
      primaryAction={(
        <div className="flex space-x-3">
          <Button variant="primary" onClick={() => setIsAddModalOpen(true)} className="shadow-[0_0_15px_rgba(6,182,212,0.4)]">
            <Plus className="mr-2 h-4 w-4" /> 添加声音
          </Button>
          <Button variant="primary" onClick={() => setIsCloneModalOpen(true)} className="shadow-[0_0_15px_rgba(6,182,212,0.4)]">
            <Mic2 className="mr-2 h-4 w-4" /> 克隆声音
          </Button>
        </div>
      )}
      filterBar={(
        <>
        <div className="relative">
          <Search className="absolute left-2.5 top-2.5 h-4 w-4 text-muted-foreground" />
          <Input
            placeholder="搜索名称..."
            className="pl-9 border-0 bg-white/5"
            value={searchTerm}
            onChange={(e) => setSearchTerm(e.target.value)}
          />
        </div>
        <div className="flex items-center space-x-2">
          <Filter className="h-4 w-4 text-muted-foreground" />
          <Select
            value={vendorFilter}
            onChange={(e) => setVendorFilter(e.target.value)}
          >
            <option value="all">所有厂商</option>
            {vendorOptions.map((vendor) => (
              <option key={vendor} value={vendor}>{vendor}</option>
            ))}
          </Select>
        </div>
        <div className="flex items-center space-x-2">
          <Select
            value={genderFilter}
            onChange={(e) => setGenderFilter(e.target.value as any)}
          >
            <option value="all">所有性别</option>
            <option value="Male">男 (Male)</option>
            <option value="Female">女 (Female)</option>
          </Select>
        </div>
        <div className="flex items-center space-x-2">
          <Select
            value={langFilter}
            onChange={(e) => setLangFilter(e.target.value as any)}
          >
            <option value="all">所有语言</option>
            <option value="zh">中文 (Chinese)</option>
            <option value="en">英文 (English)</option>
          </Select>
        </div>
        </>
      )}
    >

      <div className="rounded-md border border-white/5 bg-card/40 backdrop-blur-md overflow-hidden">
        <table className="w-full text-sm">
          <TableHeader>
            <TableRow>
              <TableHead>声音名称</TableHead>
              <TableHead>厂商</TableHead>
              <TableHead>性别</TableHead>
              <TableHead>语言</TableHead>
              <TableHead className="text-right">试听</TableHead>
              <TableHead className="text-right">操作</TableHead>
            </TableRow>
          </TableHeader>
          <tbody>
            {!isLoading && filteredVoices.map((voice) => (
              <TableRow key={voice.id}>
                <TableCell className="font-medium">
                  <div className="flex flex-col">
                    <span className="flex items-center text-white">{voice.name}</span>
                    {voice.description && <span className="text-xs text-muted-foreground">{voice.description}</span>}
                  </div>
                </TableCell>
                <TableCell>
                  <Badge variant="outline">{voice.vendor}</Badge>
                </TableCell>
                <TableCell className="text-muted-foreground">{voice.gender === 'Male' ? '男' : '女'}</TableCell>
                <TableCell className="text-muted-foreground">{voice.language === 'zh' ? '中文' : 'English'}</TableCell>
                <TableCell className="text-right">
                  <Button
                    variant="ghost"
                    size="icon"
                    onClick={() => handlePlayToggle(voice)}
                    disabled={playLoadingId === voice.id}
                    className={playingVoiceId === voice.id ? 'text-primary animate-pulse' : ''}
                  >
                    {playingVoiceId === voice.id ? <Pause className="h-4 w-4" /> : <Play className="h-4 w-4" />}
                  </Button>
                </TableCell>
                <LibraryActionCell
                  editAction={(
                    <Button variant="ghost" size="icon" onClick={() => setEditingVoice(voice)} title="编辑声音">
                      <Pencil className="h-4 w-4" />
                    </Button>
                  )}
                  deleteAction={(
                    <Button variant="ghost" size="icon" onClick={() => handleDelete(voice.id)} className="text-muted-foreground hover:text-destructive transition-colors" title="删除声音">
                      <Trash2 className="h-4 w-4" />
                    </Button>
                  )}
                />
              </TableRow>
            ))}
            {!isLoading && filteredVoices.length === 0 && <TableStatusRow colSpan={6} text="暂无声音数据" />}
            {isLoading && <TableStatusRow colSpan={6} text="加载中..." />}
          </tbody>
        </table>
      </div>

      <AddVoiceModal isOpen={isAddModalOpen} onClose={() => setIsAddModalOpen(false)} onSuccess={handleAddSuccess} />

      <AddVoiceModal
        isOpen={!!editingVoice}
        onClose={() => setEditingVoice(null)}
        onSuccess={(voice) => handleUpdateSuccess(editingVoice!.id, voice)}
        initialVoice={editingVoice || undefined}
      />

      <CloneVoiceModal isOpen={isCloneModalOpen} onClose={() => setIsCloneModalOpen(false)} onSuccess={handleAddSuccess} />
    </LibraryPageShell>
  );
};

const AddVoiceModal: React.FC<{
  isOpen: boolean;
  onClose: () => void;
  onSuccess: (voice: Voice) => Promise<void>;
  initialVoice?: Voice;
}> = ({ isOpen, onClose, onSuccess, initialVoice }) => {
  const [vendor, setVendor] = useState<VoiceVendor>('OpenAI Compatible');
  const [name, setName] = useState('');

  const [openaiCompatibleModel, setOpenaiCompatibleModel] = useState(OPENAI_COMPATIBLE_DEFAULT_MODEL);
  const [sfVoiceId, setSfVoiceId] = useState(OPENAI_COMPATIBLE_DEFAULT_VOICE);
  const [sfSpeed, setSfSpeed] = useState(1);
  const [sfGain, setSfGain] = useState(0);
  const [sfPitch, setSfPitch] = useState(0);

  const [gender, setGender] = useState('Female');
  const [language, setLanguage] = useState('zh');
  const [description, setDescription] = useState('');
  const [apiKey, setApiKey] = useState('');
  const [baseUrl, setBaseUrl] = useState('');

  const [testInput, setTestInput] = useState('你好，正在测试语音合成效果。');
  const [isAuditioning, setIsAuditioning] = useState(false);
  const [isSaving, setIsSaving] = useState(false);
  const testAudioRef = useRef<HTMLAudioElement | null>(null);

  useEffect(() => {
    if (!isOpen) return;

    if (!initialVoice) {
      setVendor('OpenAI Compatible');
      setName('');
      setGender('Female');
      setLanguage('zh');
      setDescription('');
      setOpenaiCompatibleModel(OPENAI_COMPATIBLE_DEFAULT_MODEL);
      setSfVoiceId(OPENAI_COMPATIBLE_DEFAULT_VOICE);
      setSfSpeed(1);
      setSfGain(0);
      setSfPitch(0);
      setApiKey('');
      setBaseUrl('');
      setTestInput('你好，正在测试语音合成效果。');
      return;
    }

    const nextVendor: VoiceVendor = String(initialVoice.vendor || '').trim().toLowerCase() === 'dashscope'
      ? 'DashScope'
      : 'OpenAI Compatible';
    const nextModel = (initialVoice.model || (nextVendor === 'DashScope' ? DASHSCOPE_DEFAULT_MODEL : OPENAI_COMPATIBLE_DEFAULT_MODEL)).trim();
    const defaultVoiceKey = nextVendor === 'DashScope'
      ? DASHSCOPE_DEFAULT_VOICE
      : buildOpenAICompatibleVoiceKey(initialVoice.id || initialVoice.name || '', nextModel);

    setVendor(nextVendor);
    setName(initialVoice.name || '');
    setGender(initialVoice.gender || 'Female');
    setLanguage(initialVoice.language || 'zh');
    setDescription(initialVoice.description || '');
    setOpenaiCompatibleModel(nextModel);
    setSfVoiceId((initialVoice.voiceKey || '').trim() || defaultVoiceKey);
    setSfSpeed(initialVoice.speed ?? 1);
    setSfGain(initialVoice.gain ?? 0);
    setSfPitch(initialVoice.pitch ?? 0);
    setApiKey(initialVoice.apiKey || '');
    setBaseUrl(initialVoice.baseUrl || (nextVendor === 'DashScope' ? DASHSCOPE_DEFAULT_BASE_URL : ''));
  }, [initialVoice, isOpen]);

  const handleAudition = async () => {
    if (!testInput.trim()) return;
    if (!initialVoice?.id) {
      alert('请先创建声音，再进行试听。');
      return;
    }
    try {
      setIsAuditioning(true);
      const audioUrl = await previewVoice(initialVoice.id, testInput, sfSpeed, apiKey || undefined);
      if (testAudioRef.current) {
        testAudioRef.current.pause();
      }
      const audio = new Audio(audioUrl);
      testAudioRef.current = audio;
      await audio.play();
    } catch (error: any) {
      alert(error?.message || '试听失败');
    } finally {
      setIsAuditioning(false);
    }
  };

  const handleSubmit = async () => {
    if (!name) {
      alert('请填写声音显示名称');
      return;
    }

    const resolvedModel = (() => {
      const current = (openaiCompatibleModel || '').trim();
      if (current) return current;
      return vendor === 'DashScope' ? DASHSCOPE_DEFAULT_MODEL : OPENAI_COMPATIBLE_DEFAULT_MODEL;
    })();

    const resolvedVoiceKey = (() => {
      const current = (sfVoiceId || '').trim();
      if (current) return current;
      if (vendor === 'DashScope') return DASHSCOPE_DEFAULT_VOICE;
      return buildOpenAICompatibleVoiceKey(initialVoice?.id || name, resolvedModel);
    })();

    const resolvedBaseUrl = (() => {
      const current = (baseUrl || '').trim();
      if (current) return current;
      return vendor === 'DashScope' ? DASHSCOPE_DEFAULT_BASE_URL : '';
    })();

    const newVoice: Voice = {
      id: initialVoice?.id || `oa-${Date.now()}`,
      name,
      vendor,
      gender,
      language,
      description: description || `Model: ${resolvedModel}`,
      model: resolvedModel,
      voiceKey: resolvedVoiceKey,
      apiKey,
      baseUrl: resolvedBaseUrl,
      speed: sfSpeed,
      gain: sfGain,
      pitch: sfPitch,
    };

    try {
      setIsSaving(true);
      await onSuccess(newVoice);
      setName('');
      setVendor('OpenAI Compatible');
      setDescription('');
      setApiKey('');
      setBaseUrl('');
      setOpenaiCompatibleModel(OPENAI_COMPATIBLE_DEFAULT_MODEL);
      setSfVoiceId(OPENAI_COMPATIBLE_DEFAULT_VOICE);
      setSfSpeed(1);
      setSfGain(0);
      setSfPitch(0);
    } catch (error: any) {
      alert(error?.message || '保存失败');
    } finally {
      setIsSaving(false);
    }
  };

  return (
    <Dialog
      isOpen={isOpen}
      onClose={onClose}
      title={initialVoice ? '编辑声音' : '添加声音'}
      footer={
        <>
          <Button variant="ghost" onClick={onClose}>取消</Button>
          <Button onClick={handleSubmit} className="bg-primary hover:bg-primary/90" disabled={isSaving}>
            {isSaving ? '保存中...' : initialVoice ? '保存修改' : '确认添加'}
          </Button>
        </>
      }
    >
      <div className="space-y-4 max-h-[75vh] overflow-y-auto px-1 custom-scrollbar">
        <div className="space-y-1.5">
          <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">厂商 (Vendor)</label>
          <Select value={vendor} onChange={(e) => setVendor(e.target.value as VoiceVendor)}>
            <option value="OpenAI Compatible">OpenAI Compatible</option>
            <option value="DashScope">DashScope</option>
          </Select>
        </div>

        <div className="h-px bg-white/5"></div>

        <div className="space-y-1.5">
          <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">声音名称</label>
          <Input value={name} onChange={(e) => setName(e.target.value)} placeholder="例如: 客服小美" />
        </div>

        <div className="space-y-4 animate-in fade-in slide-in-from-top-1 duration-200">
            <div className="grid grid-cols-2 gap-4">
              <div className="space-y-1.5">
                <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">模型 (Model)</label>
                <Input
                  className="font-mono text-xs"
                  value={openaiCompatibleModel}
                  onChange={(e) => setOpenaiCompatibleModel(e.target.value)}
                  placeholder={vendor === 'DashScope' ? DASHSCOPE_DEFAULT_MODEL : OPENAI_COMPATIBLE_DEFAULT_MODEL}
                />
              </div>
              <div className="space-y-1.5">
                <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">声音 ID (Voice)</label>
                <Input
                  value={sfVoiceId}
                  onChange={(e) => setSfVoiceId(e.target.value)}
                  placeholder={vendor === 'DashScope' ? DASHSCOPE_DEFAULT_VOICE : OPENAI_COMPATIBLE_DEFAULT_VOICE}
                />
              </div>
            </div>

            <div className="grid grid-cols-2 gap-4">
              <div className="space-y-1.5">
                <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">语速 (Speed)</label>
                <div className="flex items-center space-x-2">
                  <input type="range" min="0.5" max="2" step="0.1" value={sfSpeed} onChange={(e) => setSfSpeed(parseFloat(e.target.value))} className="flex-1 accent-primary" />
                  <span className="text-[10px] font-mono text-primary bg-primary/10 px-1.5 py-0.5 rounded">{sfSpeed}x</span>
                </div>
              </div>
              <div className="space-y-1.5">
                <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">增益 (Gain)</label>
                <div className="flex items-center space-x-2">
                  <input type="range" min="-10" max="10" step="1" value={sfGain} onChange={(e) => setSfGain(parseInt(e.target.value, 10))} className="flex-1 accent-primary" />
                  <span className="text-[10px] font-mono text-primary bg-primary/10 px-1.5 py-0.5 rounded">{sfGain}dB</span>
                </div>
              </div>
              <div className="space-y-1.5">
                <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">音调 (Pitch)</label>
                <div className="flex items-center space-x-2">
                  <input type="range" min="-12" max="12" step="1" value={sfPitch} onChange={(e) => setSfPitch(parseInt(e.target.value, 10))} className="flex-1 accent-primary" />
                  <span className="text-[10px] font-mono text-primary bg-primary/10 px-1.5 py-0.5 rounded">{sfPitch}</span>
                </div>
              </div>
            </div>
          </div>

        <div className="grid grid-cols-2 gap-4">
          <div className="space-y-1.5">
            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">API Key</label>
            <Input value={apiKey} type="password" onChange={(e) => setApiKey(e.target.value)} placeholder="每个声音独立 API Key" />
          </div>
          <div className="space-y-1.5">
            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">Base URL</label>
            <Input
              value={baseUrl}
              onChange={(e) => setBaseUrl(e.target.value)}
              placeholder={vendor === 'DashScope' ? DASHSCOPE_DEFAULT_BASE_URL : 'https://.../v1'}
            />
          </div>
        </div>

        <div className="grid grid-cols-2 gap-4">
          <div className="space-y-1.5">
            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">性别</label>
            <Select
              value={gender}
              onChange={(e) => setGender(e.target.value)}
            >
              <option value="Female">女 (Female)</option>
              <option value="Male">男 (Male)</option>
            </Select>
          </div>
          <div className="space-y-1.5">
            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">语言</label>
            <Select
              value={language}
              onChange={(e) => setLanguage(e.target.value)}
            >
              <option value="zh">中文 (Chinese)</option>
              <option value="en">英文 (English)</option>
            </Select>
          </div>
        </div>

        <div className="space-y-1.5">
          <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">备注</label>
          <textarea
            className="flex min-h-[60px] w-full rounded-md border-0 bg-white/5 px-3 py-2 text-sm shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-white"
            value={description}
            onChange={(e) => setDescription(e.target.value)}
            placeholder="记录该声音的特点..."
          />
        </div>

        <div className="p-4 rounded-xl border border-primary/20 bg-primary/5 space-y-3">
          <div className="flex items-center justify-between">
            <h4 className="text-[10px] font-black text-primary flex items-center tracking-widest uppercase">
              <Volume2 className="w-3.5 h-3.5 mr-1.5" /> 参数试听 (Preview)
            </h4>
          </div>
          <div className="flex gap-2">
            <Input
              value={testInput}
              onChange={(e) => setTestInput(e.target.value)}
              placeholder="输入测试文本..."
              className="text-xs bg-black/20"
            />
            <Button
              variant="primary"
              size="sm"
              onClick={handleAudition}
              disabled={isAuditioning || !initialVoice}
              className="shrink-0 h-9"
            >
              {isAuditioning ? <Pause className="h-3.5 w-3.5 animate-pulse" /> : <Play className="h-3.5 w-3.5" />}
            </Button>
          </div>
        </div>
      </div>
    </Dialog>
  );
};

const CloneVoiceModal: React.FC<{
  isOpen: boolean;
  onClose: () => void;
  onSuccess: (voice: Voice) => Promise<void>;
}> = ({ isOpen, onClose, onSuccess }) => {
  const [name, setName] = useState('');
  const [description, setDescription] = useState('');
  const [file, setFile] = useState<File | null>(null);
  const inputRef = useRef<HTMLInputElement>(null);

  const handleFileChange = (e: React.ChangeEvent<HTMLInputElement>) => {
    if (e.target.files && e.target.files[0]) {
      setFile(e.target.files[0]);
    }
  };

  const handleSubmit = async () => {
    if (!name || !file) {
      alert('请填写名称并上传音频文件');
      return;
    }

    const newVoice: Voice = {
      id: `v-${Date.now()}`,
      name,
      vendor: 'OpenAI Compatible',
      gender: 'Female',
      language: 'zh',
      description: description || 'User cloned voice',
    };

    await onSuccess(newVoice);
    setName('');
    setDescription('');
    setFile(null);
  };

  return (
    <Dialog
      isOpen={isOpen}
      onClose={onClose}
      title="克隆声音"
      footer={
        <>
          <Button variant="ghost" onClick={onClose}>取消</Button>
          <Button onClick={handleSubmit}>开始克隆</Button>
        </>
      }
    >
      <div className="space-y-4">
        <div className="space-y-2">
          <label className="text-sm font-medium text-white">语音名称</label>
          <Input value={name} onChange={(e) => setName(e.target.value)} placeholder="给新声音起个名字" />
        </div>

        <div className="space-y-2">
          <label className="text-sm font-medium text-white">上传音频文件 (参考音频)</label>
          <div
            className="flex flex-col items-center justify-center w-full h-32 rounded-lg border-2 border-dashed border-white/10 bg-white/5 hover:bg-white/10 transition-colors cursor-pointer"
            onClick={() => inputRef.current?.click()}
          >
            <input ref={inputRef} type="file" accept="audio/*" className="hidden" onChange={handleFileChange} />
            {file ? (
              <div className="flex items-center space-x-2 text-primary">
                <Mic2 className="h-6 w-6" />
                <span className="text-sm font-medium">{file.name}</span>
              </div>
            ) : (
              <>
                <Upload className="h-8 w-8 mb-2 text-muted-foreground" />
                <p className="text-sm text-muted-foreground">点击上传 WAV/MP3 文件</p>
              </>
            )}
          </div>
        </div>

        <div className="space-y-2">
          <label className="text-sm font-medium text-white">语音描述</label>
          <textarea
            className="flex min-h-[80px] w-full rounded-md border-0 bg-white/5 px-3 py-2 text-sm shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-white"
            value={description}
            onChange={(e) => setDescription(e.target.value)}
            placeholder="描述声音特点（如：年轻、沉稳...）"
          />
        </div>
      </div>
    </Dialog>
  );
};