Files
AI-VideoAssistant/web/pages/VoiceLibrary.tsx
Xin Wang eecde9f0fb Integrate React Query for data management and enhance Debug Preferences
- Added React Query for managing API calls related to assistants and voices.
- Introduced `useAssistantsQuery` and `useVoicesQuery` hooks for fetching data.
- Implemented mutations for creating, updating, and deleting voices using React Query.
- Integrated a global `QueryClient` for managing query states and configurations.
- Refactored components to utilize the new query hooks, improving data handling and performance.
- Added a Zustand store for managing debug preferences, including WebSocket URL and audio settings.
2026-03-02 22:50:57 +08:00

638 lines
25 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import React, { useEffect, useMemo, useState, useRef } from 'react';
import { Search, Mic2, Play, Pause, Upload, Filter, Plus, Volume2, Pencil, Trash2 } from 'lucide-react';
import { Button, Input, Select, TableHeader, TableRow, TableHead, TableCell, Dialog, Badge, LibraryPageShell, TableStatusRow, LibraryActionCell } from '../components/UI';
import { Voice } from '../types';
import { previewVoice } from '../services/backendApi';
import {
useCreateVoiceMutation,
useDeleteVoiceMutation,
useUpdateVoiceMutation,
useVoicesQuery,
} from '../services/queries';
const OPENAI_COMPATIBLE_DEFAULT_MODEL = 'FunAudioLLM/CosyVoice2-0.5B';
const OPENAI_COMPATIBLE_DEFAULT_VOICE = 'FunAudioLLM/CosyVoice2-0.5B:anna';
const DASHSCOPE_DEFAULT_MODEL = 'qwen3-tts-flash-realtime';
const DASHSCOPE_DEFAULT_VOICE = 'Cherry';
const DASHSCOPE_DEFAULT_BASE_URL = 'wss://dashscope.aliyuncs.com/api-ws/v1/realtime';
type VoiceVendor = 'OpenAI Compatible' | 'DashScope';
const buildOpenAICompatibleVoiceKey = (rawId: string, model: string): string => {
const id = (rawId || '').trim();
if (!id) return `${model}:anna`;
return id.includes(':') ? id : `${model}:${id}`;
};
export const VoiceLibraryPage: React.FC = () => {
const [searchTerm, setSearchTerm] = useState('');
const [vendorFilter, setVendorFilter] = useState<string>('all');
const [genderFilter, setGenderFilter] = useState<'all' | 'Male' | 'Female'>('all');
const [langFilter, setLangFilter] = useState<'all' | 'zh' | 'en'>('all');
const [playingVoiceId, setPlayingVoiceId] = useState<string | null>(null);
const [isCloneModalOpen, setIsCloneModalOpen] = useState(false);
const [isAddModalOpen, setIsAddModalOpen] = useState(false);
const [editingVoice, setEditingVoice] = useState<Voice | null>(null);
const [playLoadingId, setPlayLoadingId] = useState<string | null>(null);
const audioRef = useRef<HTMLAudioElement | null>(null);
const voicesQuery = useVoicesQuery();
const voices = voicesQuery.data || [];
const isLoading = voicesQuery.isLoading;
const createVoiceMutation = useCreateVoiceMutation();
const updateVoiceMutation = useUpdateVoiceMutation();
const deleteVoiceMutation = useDeleteVoiceMutation();
const vendorOptions = useMemo(
() => Array.from(new Set(voices.map((v) => String(v.vendor || '').trim()).filter(Boolean))).sort(),
[voices]
);
const filteredVoices = voices.filter((voice) => {
const matchesSearch = voice.name.toLowerCase().includes(searchTerm.toLowerCase());
const matchesVendor = vendorFilter === 'all' || voice.vendor === vendorFilter;
const matchesGender = genderFilter === 'all' || voice.gender === genderFilter;
const matchesLang = langFilter === 'all' || voice.language === langFilter;
return matchesSearch && matchesVendor && matchesGender && matchesLang;
});
const handlePlayToggle = async (voice: Voice) => {
if (playingVoiceId === voice.id && audioRef.current) {
audioRef.current.pause();
audioRef.current.currentTime = 0;
setPlayingVoiceId(null);
return;
}
try {
setPlayLoadingId(voice.id);
const audioUrl = await previewVoice(
voice.id,
voice.language === 'en' ? 'Hello, this is a voice preview.' : '你好,这是一段语音试听。',
voice.speed
);
if (audioRef.current) {
audioRef.current.pause();
}
const audio = new Audio(audioUrl);
audio.onended = () => setPlayingVoiceId(null);
audio.onerror = () => {
setPlayingVoiceId(null);
alert('试听失败,请检查该声音的 API Key / Base URL。');
};
audioRef.current = audio;
setPlayingVoiceId(voice.id);
await audio.play();
} catch (error: any) {
alert(error?.message || '试听失败');
setPlayingVoiceId(null);
} finally {
setPlayLoadingId(null);
}
};
const handleAddSuccess = async (newVoice: Voice) => {
await createVoiceMutation.mutateAsync(newVoice);
setIsAddModalOpen(false);
setIsCloneModalOpen(false);
};
const handleUpdateSuccess = async (id: string, data: Voice) => {
await updateVoiceMutation.mutateAsync({ id, data });
setEditingVoice(null);
};
const handleDelete = async (id: string) => {
if (!confirm('确认删除该声音吗?该操作不可恢复。')) return;
await deleteVoiceMutation.mutateAsync(id);
};
return (
<LibraryPageShell
title="声音资源"
primaryAction={(
<div className="flex space-x-3">
<Button variant="primary" onClick={() => setIsAddModalOpen(true)} className="shadow-[0_0_15px_rgba(6,182,212,0.4)]">
<Plus className="mr-2 h-4 w-4" />
</Button>
<Button variant="primary" onClick={() => setIsCloneModalOpen(true)} className="shadow-[0_0_15px_rgba(6,182,212,0.4)]">
<Mic2 className="mr-2 h-4 w-4" />
</Button>
</div>
)}
filterBar={(
<>
<div className="relative">
<Search className="absolute left-2.5 top-2.5 h-4 w-4 text-muted-foreground" />
<Input
placeholder="搜索名称..."
className="pl-9 border-0 bg-white/5"
value={searchTerm}
onChange={(e) => setSearchTerm(e.target.value)}
/>
</div>
<div className="flex items-center space-x-2">
<Filter className="h-4 w-4 text-muted-foreground" />
<Select
value={vendorFilter}
onChange={(e) => setVendorFilter(e.target.value)}
>
<option value="all"></option>
{vendorOptions.map((vendor) => (
<option key={vendor} value={vendor}>{vendor}</option>
))}
</Select>
</div>
<div className="flex items-center space-x-2">
<Select
value={genderFilter}
onChange={(e) => setGenderFilter(e.target.value as any)}
>
<option value="all"></option>
<option value="Male"> (Male)</option>
<option value="Female"> (Female)</option>
</Select>
</div>
<div className="flex items-center space-x-2">
<Select
value={langFilter}
onChange={(e) => setLangFilter(e.target.value as any)}
>
<option value="all"></option>
<option value="zh"> (Chinese)</option>
<option value="en"> (English)</option>
</Select>
</div>
</>
)}
>
<div className="rounded-md border border-white/5 bg-card/40 backdrop-blur-md overflow-hidden">
<table className="w-full text-sm">
<TableHeader>
<TableRow>
<TableHead></TableHead>
<TableHead></TableHead>
<TableHead></TableHead>
<TableHead></TableHead>
<TableHead className="text-right"></TableHead>
<TableHead className="text-right"></TableHead>
</TableRow>
</TableHeader>
<tbody>
{!isLoading && filteredVoices.map((voice) => (
<TableRow key={voice.id}>
<TableCell className="font-medium">
<div className="flex flex-col">
<span className="flex items-center text-white">{voice.name}</span>
{voice.description && <span className="text-xs text-muted-foreground">{voice.description}</span>}
</div>
</TableCell>
<TableCell>
<Badge variant="outline">{voice.vendor}</Badge>
</TableCell>
<TableCell className="text-muted-foreground">{voice.gender === 'Male' ? '男' : '女'}</TableCell>
<TableCell className="text-muted-foreground">{voice.language === 'zh' ? '中文' : 'English'}</TableCell>
<TableCell className="text-right">
<Button
variant="ghost"
size="icon"
onClick={() => handlePlayToggle(voice)}
disabled={playLoadingId === voice.id}
className={playingVoiceId === voice.id ? 'text-primary animate-pulse' : ''}
>
{playingVoiceId === voice.id ? <Pause className="h-4 w-4" /> : <Play className="h-4 w-4" />}
</Button>
</TableCell>
<LibraryActionCell
editAction={(
<Button variant="ghost" size="icon" onClick={() => setEditingVoice(voice)} title="编辑声音">
<Pencil className="h-4 w-4" />
</Button>
)}
deleteAction={(
<Button variant="ghost" size="icon" onClick={() => handleDelete(voice.id)} className="text-muted-foreground hover:text-destructive transition-colors" title="删除声音">
<Trash2 className="h-4 w-4" />
</Button>
)}
/>
</TableRow>
))}
{!isLoading && filteredVoices.length === 0 && <TableStatusRow colSpan={6} text="暂无声音数据" />}
{isLoading && <TableStatusRow colSpan={6} text="加载中..." />}
</tbody>
</table>
</div>
<AddVoiceModal isOpen={isAddModalOpen} onClose={() => setIsAddModalOpen(false)} onSuccess={handleAddSuccess} />
<AddVoiceModal
isOpen={!!editingVoice}
onClose={() => setEditingVoice(null)}
onSuccess={(voice) => handleUpdateSuccess(editingVoice!.id, voice)}
initialVoice={editingVoice || undefined}
/>
<CloneVoiceModal isOpen={isCloneModalOpen} onClose={() => setIsCloneModalOpen(false)} onSuccess={handleAddSuccess} />
</LibraryPageShell>
);
};
const AddVoiceModal: React.FC<{
isOpen: boolean;
onClose: () => void;
onSuccess: (voice: Voice) => Promise<void>;
initialVoice?: Voice;
}> = ({ isOpen, onClose, onSuccess, initialVoice }) => {
const [vendor, setVendor] = useState<VoiceVendor>('OpenAI Compatible');
const [name, setName] = useState('');
const [openaiCompatibleModel, setOpenaiCompatibleModel] = useState(OPENAI_COMPATIBLE_DEFAULT_MODEL);
const [sfVoiceId, setSfVoiceId] = useState(OPENAI_COMPATIBLE_DEFAULT_VOICE);
const [sfSpeed, setSfSpeed] = useState(1);
const [sfGain, setSfGain] = useState(0);
const [sfPitch, setSfPitch] = useState(0);
const [gender, setGender] = useState('Female');
const [language, setLanguage] = useState('zh');
const [description, setDescription] = useState('');
const [apiKey, setApiKey] = useState('');
const [baseUrl, setBaseUrl] = useState('');
const [testInput, setTestInput] = useState('你好,正在测试语音合成效果。');
const [isAuditioning, setIsAuditioning] = useState(false);
const [isSaving, setIsSaving] = useState(false);
const testAudioRef = useRef<HTMLAudioElement | null>(null);
useEffect(() => {
if (!isOpen) return;
if (!initialVoice) {
setVendor('OpenAI Compatible');
setName('');
setGender('Female');
setLanguage('zh');
setDescription('');
setOpenaiCompatibleModel(OPENAI_COMPATIBLE_DEFAULT_MODEL);
setSfVoiceId(OPENAI_COMPATIBLE_DEFAULT_VOICE);
setSfSpeed(1);
setSfGain(0);
setSfPitch(0);
setApiKey('');
setBaseUrl('');
setTestInput('你好,正在测试语音合成效果。');
return;
}
const nextVendor: VoiceVendor = String(initialVoice.vendor || '').trim().toLowerCase() === 'dashscope'
? 'DashScope'
: 'OpenAI Compatible';
const nextModel = (initialVoice.model || (nextVendor === 'DashScope' ? DASHSCOPE_DEFAULT_MODEL : OPENAI_COMPATIBLE_DEFAULT_MODEL)).trim();
const defaultVoiceKey = nextVendor === 'DashScope'
? DASHSCOPE_DEFAULT_VOICE
: buildOpenAICompatibleVoiceKey(initialVoice.id || initialVoice.name || '', nextModel);
setVendor(nextVendor);
setName(initialVoice.name || '');
setGender(initialVoice.gender || 'Female');
setLanguage(initialVoice.language || 'zh');
setDescription(initialVoice.description || '');
setOpenaiCompatibleModel(nextModel);
setSfVoiceId((initialVoice.voiceKey || '').trim() || defaultVoiceKey);
setSfSpeed(initialVoice.speed ?? 1);
setSfGain(initialVoice.gain ?? 0);
setSfPitch(initialVoice.pitch ?? 0);
setApiKey(initialVoice.apiKey || '');
setBaseUrl(initialVoice.baseUrl || (nextVendor === 'DashScope' ? DASHSCOPE_DEFAULT_BASE_URL : ''));
}, [initialVoice, isOpen]);
const handleAudition = async () => {
if (!testInput.trim()) return;
if (!initialVoice?.id) {
alert('请先创建声音,再进行试听。');
return;
}
try {
setIsAuditioning(true);
const audioUrl = await previewVoice(initialVoice.id, testInput, sfSpeed, apiKey || undefined);
if (testAudioRef.current) {
testAudioRef.current.pause();
}
const audio = new Audio(audioUrl);
testAudioRef.current = audio;
await audio.play();
} catch (error: any) {
alert(error?.message || '试听失败');
} finally {
setIsAuditioning(false);
}
};
const handleSubmit = async () => {
if (!name) {
alert('请填写声音显示名称');
return;
}
const resolvedModel = (() => {
const current = (openaiCompatibleModel || '').trim();
if (current) return current;
return vendor === 'DashScope' ? DASHSCOPE_DEFAULT_MODEL : OPENAI_COMPATIBLE_DEFAULT_MODEL;
})();
const resolvedVoiceKey = (() => {
const current = (sfVoiceId || '').trim();
if (current) return current;
if (vendor === 'DashScope') return DASHSCOPE_DEFAULT_VOICE;
return buildOpenAICompatibleVoiceKey(initialVoice?.id || name, resolvedModel);
})();
const resolvedBaseUrl = (() => {
const current = (baseUrl || '').trim();
if (current) return current;
return vendor === 'DashScope' ? DASHSCOPE_DEFAULT_BASE_URL : '';
})();
const newVoice: Voice = {
id: initialVoice?.id || `oa-${Date.now()}`,
name,
vendor,
gender,
language,
description: description || `Model: ${resolvedModel}`,
model: resolvedModel,
voiceKey: resolvedVoiceKey,
apiKey,
baseUrl: resolvedBaseUrl,
speed: sfSpeed,
gain: sfGain,
pitch: sfPitch,
};
try {
setIsSaving(true);
await onSuccess(newVoice);
setName('');
setVendor('OpenAI Compatible');
setDescription('');
setApiKey('');
setBaseUrl('');
setOpenaiCompatibleModel(OPENAI_COMPATIBLE_DEFAULT_MODEL);
setSfVoiceId(OPENAI_COMPATIBLE_DEFAULT_VOICE);
setSfSpeed(1);
setSfGain(0);
setSfPitch(0);
} catch (error: any) {
alert(error?.message || '保存失败');
} finally {
setIsSaving(false);
}
};
return (
<Dialog
isOpen={isOpen}
onClose={onClose}
title={initialVoice ? '编辑声音' : '添加声音'}
footer={
<>
<Button variant="ghost" onClick={onClose}></Button>
<Button onClick={handleSubmit} className="bg-primary hover:bg-primary/90" disabled={isSaving}>
{isSaving ? '保存中...' : initialVoice ? '保存修改' : '确认添加'}
</Button>
</>
}
>
<div className="space-y-4 max-h-[75vh] overflow-y-auto px-1 custom-scrollbar">
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"> (Vendor)</label>
<Select value={vendor} onChange={(e) => setVendor(e.target.value as VoiceVendor)}>
<option value="OpenAI Compatible">OpenAI Compatible</option>
<option value="DashScope">DashScope</option>
</Select>
</div>
<div className="h-px bg-white/5"></div>
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"></label>
<Input value={name} onChange={(e) => setName(e.target.value)} placeholder="例如: 客服小美" />
</div>
<div className="space-y-4 animate-in fade-in slide-in-from-top-1 duration-200">
<div className="grid grid-cols-2 gap-4">
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"> (Model)</label>
<Input
className="font-mono text-xs"
value={openaiCompatibleModel}
onChange={(e) => setOpenaiCompatibleModel(e.target.value)}
placeholder={vendor === 'DashScope' ? DASHSCOPE_DEFAULT_MODEL : OPENAI_COMPATIBLE_DEFAULT_MODEL}
/>
</div>
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"> ID (Voice)</label>
<Input
value={sfVoiceId}
onChange={(e) => setSfVoiceId(e.target.value)}
placeholder={vendor === 'DashScope' ? DASHSCOPE_DEFAULT_VOICE : OPENAI_COMPATIBLE_DEFAULT_VOICE}
/>
</div>
</div>
<div className="grid grid-cols-2 gap-4">
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"> (Speed)</label>
<div className="flex items-center space-x-2">
<input type="range" min="0.5" max="2" step="0.1" value={sfSpeed} onChange={(e) => setSfSpeed(parseFloat(e.target.value))} className="flex-1 accent-primary" />
<span className="text-[10px] font-mono text-primary bg-primary/10 px-1.5 py-0.5 rounded">{sfSpeed}x</span>
</div>
</div>
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"> (Gain)</label>
<div className="flex items-center space-x-2">
<input type="range" min="-10" max="10" step="1" value={sfGain} onChange={(e) => setSfGain(parseInt(e.target.value, 10))} className="flex-1 accent-primary" />
<span className="text-[10px] font-mono text-primary bg-primary/10 px-1.5 py-0.5 rounded">{sfGain}dB</span>
</div>
</div>
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"> (Pitch)</label>
<div className="flex items-center space-x-2">
<input type="range" min="-12" max="12" step="1" value={sfPitch} onChange={(e) => setSfPitch(parseInt(e.target.value, 10))} className="flex-1 accent-primary" />
<span className="text-[10px] font-mono text-primary bg-primary/10 px-1.5 py-0.5 rounded">{sfPitch}</span>
</div>
</div>
</div>
</div>
<div className="grid grid-cols-2 gap-4">
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">API Key</label>
<Input value={apiKey} type="password" onChange={(e) => setApiKey(e.target.value)} placeholder="每个声音独立 API Key" />
</div>
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">Base URL</label>
<Input
value={baseUrl}
onChange={(e) => setBaseUrl(e.target.value)}
placeholder={vendor === 'DashScope' ? DASHSCOPE_DEFAULT_BASE_URL : 'https://.../v1'}
/>
</div>
</div>
<div className="grid grid-cols-2 gap-4">
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"></label>
<Select
value={gender}
onChange={(e) => setGender(e.target.value)}
>
<option value="Female"> (Female)</option>
<option value="Male"> (Male)</option>
</Select>
</div>
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"></label>
<Select
value={language}
onChange={(e) => setLanguage(e.target.value)}
>
<option value="zh"> (Chinese)</option>
<option value="en"> (English)</option>
</Select>
</div>
</div>
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"></label>
<textarea
className="flex min-h-[60px] w-full rounded-md border-0 bg-white/5 px-3 py-2 text-sm shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-white"
value={description}
onChange={(e) => setDescription(e.target.value)}
placeholder="记录该声音的特点..."
/>
</div>
<div className="p-4 rounded-xl border border-primary/20 bg-primary/5 space-y-3">
<div className="flex items-center justify-between">
<h4 className="text-[10px] font-black text-primary flex items-center tracking-widest uppercase">
<Volume2 className="w-3.5 h-3.5 mr-1.5" /> (Preview)
</h4>
</div>
<div className="flex gap-2">
<Input
value={testInput}
onChange={(e) => setTestInput(e.target.value)}
placeholder="输入测试文本..."
className="text-xs bg-black/20"
/>
<Button
variant="primary"
size="sm"
onClick={handleAudition}
disabled={isAuditioning || !initialVoice}
className="shrink-0 h-9"
>
{isAuditioning ? <Pause className="h-3.5 w-3.5 animate-pulse" /> : <Play className="h-3.5 w-3.5" />}
</Button>
</div>
</div>
</div>
</Dialog>
);
};
const CloneVoiceModal: React.FC<{
isOpen: boolean;
onClose: () => void;
onSuccess: (voice: Voice) => Promise<void>;
}> = ({ isOpen, onClose, onSuccess }) => {
const [name, setName] = useState('');
const [description, setDescription] = useState('');
const [file, setFile] = useState<File | null>(null);
const inputRef = useRef<HTMLInputElement>(null);
const handleFileChange = (e: React.ChangeEvent<HTMLInputElement>) => {
if (e.target.files && e.target.files[0]) {
setFile(e.target.files[0]);
}
};
const handleSubmit = async () => {
if (!name || !file) {
alert('请填写名称并上传音频文件');
return;
}
const newVoice: Voice = {
id: `v-${Date.now()}`,
name,
vendor: 'OpenAI Compatible',
gender: 'Female',
language: 'zh',
description: description || 'User cloned voice',
};
await onSuccess(newVoice);
setName('');
setDescription('');
setFile(null);
};
return (
<Dialog
isOpen={isOpen}
onClose={onClose}
title="克隆声音"
footer={
<>
<Button variant="ghost" onClick={onClose}></Button>
<Button onClick={handleSubmit}></Button>
</>
}
>
<div className="space-y-4">
<div className="space-y-2">
<label className="text-sm font-medium text-white"></label>
<Input value={name} onChange={(e) => setName(e.target.value)} placeholder="给新声音起个名字" />
</div>
<div className="space-y-2">
<label className="text-sm font-medium text-white"> ()</label>
<div
className="flex flex-col items-center justify-center w-full h-32 rounded-lg border-2 border-dashed border-white/10 bg-white/5 hover:bg-white/10 transition-colors cursor-pointer"
onClick={() => inputRef.current?.click()}
>
<input ref={inputRef} type="file" accept="audio/*" className="hidden" onChange={handleFileChange} />
{file ? (
<div className="flex items-center space-x-2 text-primary">
<Mic2 className="h-6 w-6" />
<span className="text-sm font-medium">{file.name}</span>
</div>
) : (
<>
<Upload className="h-8 w-8 mb-2 text-muted-foreground" />
<p className="text-sm text-muted-foreground"> WAV/MP3 </p>
</>
)}
</div>
</div>
<div className="space-y-2">
<label className="text-sm font-medium text-white"></label>
<textarea
className="flex min-h-[80px] w-full rounded-md border-0 bg-white/5 px-3 py-2 text-sm shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-white"
value={description}
onChange={(e) => setDescription(e.target.value)}
placeholder="描述声音特点(如:年轻、沉稳..."
/>
</div>
</div>
</Dialog>
);
};