Files
AI-VideoAssistant/web/pages/VoiceLibrary.tsx
2026-02-12 18:44:55 +08:00

586 lines
24 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import React, { useEffect, useState, useRef } from 'react';
import { Search, Mic2, Play, Pause, Upload, Filter, Plus, Volume2, Pencil, Trash2 } from 'lucide-react';
import { Button, Input, TableHeader, TableRow, TableHead, TableCell, Dialog, Badge } from '../components/UI';
import { Voice } from '../types';
import { createVoice, deleteVoice, fetchVoices, previewVoice, updateVoice } from '../services/backendApi';
const OPENAI_COMPATIBLE_DEFAULT_MODEL = 'FunAudioLLM/CosyVoice2-0.5B';
const buildOpenAICompatibleVoiceKey = (rawId: string, model: string): string => {
const id = (rawId || '').trim();
if (!id) return `${model}:anna`;
return id.includes(':') ? id : `${model}:${id}`;
};
export const VoiceLibraryPage: React.FC = () => {
const [voices, setVoices] = useState<Voice[]>([]);
const [searchTerm, setSearchTerm] = useState('');
const [vendorFilter, setVendorFilter] = useState<'OpenAI Compatible'>('OpenAI Compatible');
const [genderFilter, setGenderFilter] = useState<'all' | 'Male' | 'Female'>('all');
const [langFilter, setLangFilter] = useState<'all' | 'zh' | 'en'>('all');
const [playingVoiceId, setPlayingVoiceId] = useState<string | null>(null);
const [isCloneModalOpen, setIsCloneModalOpen] = useState(false);
const [isAddModalOpen, setIsAddModalOpen] = useState(false);
const [editingVoice, setEditingVoice] = useState<Voice | null>(null);
const [isLoading, setIsLoading] = useState(true);
const [playLoadingId, setPlayLoadingId] = useState<string | null>(null);
const audioRef = useRef<HTMLAudioElement | null>(null);
useEffect(() => {
const loadVoices = async () => {
setIsLoading(true);
try {
setVoices(await fetchVoices());
} catch (error) {
console.error(error);
setVoices([]);
} finally {
setIsLoading(false);
}
};
loadVoices();
}, []);
const filteredVoices = voices.filter((voice) => {
const matchesSearch = voice.name.toLowerCase().includes(searchTerm.toLowerCase());
const matchesVendor = voice.vendor === vendorFilter;
const matchesGender = genderFilter === 'all' || voice.gender === genderFilter;
const matchesLang = langFilter === 'all' || voice.language === langFilter;
return matchesSearch && matchesVendor && matchesGender && matchesLang;
});
const handlePlayToggle = async (voice: Voice) => {
if (playingVoiceId === voice.id && audioRef.current) {
audioRef.current.pause();
audioRef.current.currentTime = 0;
setPlayingVoiceId(null);
return;
}
try {
setPlayLoadingId(voice.id);
const audioUrl = await previewVoice(
voice.id,
voice.language === 'en' ? 'Hello, this is a voice preview.' : '你好,这是一段语音试听。',
voice.speed
);
if (audioRef.current) {
audioRef.current.pause();
}
const audio = new Audio(audioUrl);
audio.onended = () => setPlayingVoiceId(null);
audio.onerror = () => {
setPlayingVoiceId(null);
alert('试听失败,请检查该声音的 API Key / Base URL。');
};
audioRef.current = audio;
setPlayingVoiceId(voice.id);
await audio.play();
} catch (error: any) {
alert(error?.message || '试听失败');
setPlayingVoiceId(null);
} finally {
setPlayLoadingId(null);
}
};
const handleAddSuccess = async (newVoice: Voice) => {
const created = await createVoice(newVoice);
setVoices((prev) => [created, ...prev]);
setIsAddModalOpen(false);
setIsCloneModalOpen(false);
};
const handleUpdateSuccess = async (id: string, data: Voice) => {
const updated = await updateVoice(id, data);
setVoices((prev) => prev.map((voice) => (voice.id === id ? updated : voice)));
setEditingVoice(null);
};
const handleDelete = async (id: string) => {
if (!confirm('确认删除这个声音吗?')) return;
await deleteVoice(id);
setVoices((prev) => prev.filter((voice) => voice.id !== id));
};
return (
<div className="space-y-6 animate-in fade-in py-4 pb-10">
<div className="flex items-center justify-between">
<h1 className="text-2xl font-bold tracking-tight text-white"></h1>
<div className="flex space-x-3">
<Button variant="primary" onClick={() => setIsAddModalOpen(true)} className="shadow-[0_0_15px_rgba(6,182,212,0.4)]">
<Plus className="mr-2 h-4 w-4" />
</Button>
<Button variant="primary" onClick={() => setIsCloneModalOpen(true)} className="shadow-[0_0_15px_rgba(6,182,212,0.4)]">
<Mic2 className="mr-2 h-4 w-4" />
</Button>
</div>
</div>
<div className="grid grid-cols-1 md:grid-cols-4 gap-4 bg-card/50 p-4 rounded-lg border border-white/5 shadow-sm">
<div className="relative">
<Search className="absolute left-2.5 top-2.5 h-4 w-4 text-muted-foreground" />
<Input
placeholder="搜索声音名称..."
className="pl-9 border-0 bg-white/5"
value={searchTerm}
onChange={(e) => setSearchTerm(e.target.value)}
/>
</div>
<div className="flex items-center space-x-2">
<Filter className="h-4 w-4 text-muted-foreground" />
<select
className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 [&>option]:bg-card text-foreground"
value={vendorFilter}
onChange={(e) => setVendorFilter(e.target.value as any)}
>
<option value="OpenAI Compatible">OpenAI Compatible</option>
</select>
</div>
<div className="flex items-center space-x-2">
<select
className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 [&>option]:bg-card text-foreground"
value={genderFilter}
onChange={(e) => setGenderFilter(e.target.value as any)}
>
<option value="all"></option>
<option value="Male"> (Male)</option>
<option value="Female"> (Female)</option>
</select>
</div>
<div className="flex items-center space-x-2">
<select
className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 [&>option]:bg-card text-foreground"
value={langFilter}
onChange={(e) => setLangFilter(e.target.value as any)}
>
<option value="all"></option>
<option value="zh"> (Chinese)</option>
<option value="en"> (English)</option>
</select>
</div>
</div>
<div className="rounded-md border border-white/5 bg-card/40 backdrop-blur-md overflow-hidden">
<table className="w-full text-sm">
<TableHeader>
<TableRow>
<TableHead></TableHead>
<TableHead></TableHead>
<TableHead></TableHead>
<TableHead></TableHead>
<TableHead className="text-right"></TableHead>
<TableHead className="text-right"></TableHead>
</TableRow>
</TableHeader>
<tbody>
{!isLoading && filteredVoices.map((voice) => (
<TableRow key={voice.id}>
<TableCell className="font-medium">
<div className="flex flex-col">
<span className="flex items-center text-white">{voice.name}</span>
{voice.description && <span className="text-xs text-muted-foreground">{voice.description}</span>}
</div>
</TableCell>
<TableCell>
<Badge variant="outline">{voice.vendor}</Badge>
</TableCell>
<TableCell className="text-muted-foreground">{voice.gender === 'Male' ? '男' : '女'}</TableCell>
<TableCell className="text-muted-foreground">{voice.language === 'zh' ? '中文' : 'English'}</TableCell>
<TableCell className="text-right">
<Button
variant="ghost"
size="icon"
onClick={() => handlePlayToggle(voice)}
disabled={playLoadingId === voice.id}
className={playingVoiceId === voice.id ? 'text-primary animate-pulse' : ''}
>
{playingVoiceId === voice.id ? <Pause className="h-4 w-4" /> : <Play className="h-4 w-4" />}
</Button>
</TableCell>
<TableCell className="text-right">
<Button variant="ghost" size="icon" onClick={() => setEditingVoice(voice)}>
<Pencil className="h-4 w-4" />
</Button>
<Button variant="ghost" size="icon" onClick={() => handleDelete(voice.id)} className="text-red-400">
<Trash2 className="h-4 w-4" />
</Button>
</TableCell>
</TableRow>
))}
{!isLoading && filteredVoices.length === 0 && (
<TableRow>
<TableCell colSpan={6} className="text-center py-6 text-muted-foreground"></TableCell>
</TableRow>
)}
{isLoading && (
<TableRow>
<TableCell colSpan={6} className="text-center py-6 text-muted-foreground">...</TableCell>
</TableRow>
)}
</tbody>
</table>
</div>
<AddVoiceModal isOpen={isAddModalOpen} onClose={() => setIsAddModalOpen(false)} onSuccess={handleAddSuccess} />
<AddVoiceModal
isOpen={!!editingVoice}
onClose={() => setEditingVoice(null)}
onSuccess={(voice) => handleUpdateSuccess(editingVoice!.id, voice)}
initialVoice={editingVoice || undefined}
/>
<CloneVoiceModal isOpen={isCloneModalOpen} onClose={() => setIsCloneModalOpen(false)} onSuccess={handleAddSuccess} />
</div>
);
};
const AddVoiceModal: React.FC<{
isOpen: boolean;
onClose: () => void;
onSuccess: (voice: Voice) => Promise<void>;
initialVoice?: Voice;
}> = ({ isOpen, onClose, onSuccess, initialVoice }) => {
const [vendor, setVendor] = useState<'OpenAI Compatible'>('OpenAI Compatible');
const [name, setName] = useState('');
const [openaiCompatibleModel, setOpenaiCompatibleModel] = useState(OPENAI_COMPATIBLE_DEFAULT_MODEL);
const [sfVoiceId, setSfVoiceId] = useState('FunAudioLLM/CosyVoice2-0.5B:anna');
const [sfSpeed, setSfSpeed] = useState(1);
const [sfGain, setSfGain] = useState(0);
const [sfPitch, setSfPitch] = useState(0);
const [gender, setGender] = useState('Female');
const [language, setLanguage] = useState('zh');
const [description, setDescription] = useState('');
const [apiKey, setApiKey] = useState('');
const [baseUrl, setBaseUrl] = useState('');
const [testInput, setTestInput] = useState('你好,正在测试语音合成效果。');
const [isAuditioning, setIsAuditioning] = useState(false);
const [isSaving, setIsSaving] = useState(false);
const testAudioRef = useRef<HTMLAudioElement | null>(null);
useEffect(() => {
if (!initialVoice) return;
const nextVendor = 'OpenAI Compatible';
const nextModel = initialVoice.model || OPENAI_COMPATIBLE_DEFAULT_MODEL;
const defaultVoiceKey = buildOpenAICompatibleVoiceKey(initialVoice.id || initialVoice.name || '', nextModel);
setVendor(nextVendor);
setName(initialVoice.name || '');
setGender(initialVoice.gender || 'Female');
setLanguage(initialVoice.language || 'zh');
setDescription(initialVoice.description || '');
setOpenaiCompatibleModel(nextModel);
setSfVoiceId((initialVoice.voiceKey || '').trim() || defaultVoiceKey);
setSfSpeed(initialVoice.speed ?? 1);
setSfGain(initialVoice.gain ?? 0);
setSfPitch(initialVoice.pitch ?? 0);
setApiKey(initialVoice.apiKey || '');
setBaseUrl(initialVoice.baseUrl || '');
}, [initialVoice, isOpen]);
const handleAudition = async () => {
if (!testInput.trim()) return;
if (!initialVoice?.id) {
alert('请先创建声音,再进行试听。');
return;
}
try {
setIsAuditioning(true);
const audioUrl = await previewVoice(initialVoice.id, testInput, sfSpeed, apiKey || undefined);
if (testAudioRef.current) {
testAudioRef.current.pause();
}
const audio = new Audio(audioUrl);
testAudioRef.current = audio;
await audio.play();
} catch (error: any) {
alert(error?.message || '试听失败');
} finally {
setIsAuditioning(false);
}
};
const handleSubmit = async () => {
if (!name) {
alert('请填写声音显示名称');
return;
}
const resolvedVoiceKey = (() => {
const current = (sfVoiceId || '').trim();
if (current) return current;
return buildOpenAICompatibleVoiceKey(initialVoice?.id || name, openaiCompatibleModel || OPENAI_COMPATIBLE_DEFAULT_MODEL);
})();
const newVoice: Voice = {
id: initialVoice?.id || `oa-${Date.now()}`,
name,
vendor,
gender,
language,
description: description || `Model: ${openaiCompatibleModel}`,
model: openaiCompatibleModel,
voiceKey: resolvedVoiceKey,
apiKey,
baseUrl,
speed: sfSpeed,
gain: sfGain,
pitch: sfPitch,
};
try {
setIsSaving(true);
await onSuccess(newVoice);
setName('');
setVendor('OpenAI Compatible');
setDescription('');
setApiKey('');
setBaseUrl('');
} catch (error: any) {
alert(error?.message || '保存失败');
} finally {
setIsSaving(false);
}
};
return (
<Dialog
isOpen={isOpen}
onClose={onClose}
title={initialVoice ? '编辑声音' : '添加声音'}
footer={
<>
<Button variant="ghost" onClick={onClose}></Button>
<Button onClick={handleSubmit} className="bg-primary hover:bg-primary/90" disabled={isSaving}>
{isSaving ? '保存中...' : initialVoice ? '保存修改' : '确认添加'}
</Button>
</>
}
>
<div className="space-y-4 max-h-[75vh] overflow-y-auto px-1 custom-scrollbar">
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"> (Vendor)</label>
<Input value={vendor} readOnly className="h-10 border border-white/10 bg-white/5" />
</div>
<div className="h-px bg-white/5"></div>
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"></label>
<Input value={name} onChange={(e) => setName(e.target.value)} placeholder="例如: 客服小美" />
</div>
<div className="space-y-4 animate-in fade-in slide-in-from-top-1 duration-200">
<div className="grid grid-cols-2 gap-4">
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"> (Model)</label>
<Input
className="font-mono text-xs"
value={openaiCompatibleModel}
onChange={(e) => setOpenaiCompatibleModel(e.target.value)}
placeholder="例如: FunAudioLLM/CosyVoice2-0.5B"
/>
</div>
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"> ID (Voice)</label>
<Input value={sfVoiceId} onChange={(e) => setSfVoiceId(e.target.value)} placeholder="FunAudioLLM/CosyVoice2-0.5B:anna" />
</div>
</div>
<div className="grid grid-cols-2 gap-4">
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"> (Speed)</label>
<div className="flex items-center space-x-2">
<input type="range" min="0.5" max="2" step="0.1" value={sfSpeed} onChange={(e) => setSfSpeed(parseFloat(e.target.value))} className="flex-1 accent-primary" />
<span className="text-[10px] font-mono text-primary bg-primary/10 px-1.5 py-0.5 rounded">{sfSpeed}x</span>
</div>
</div>
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"> (Gain)</label>
<div className="flex items-center space-x-2">
<input type="range" min="-10" max="10" step="1" value={sfGain} onChange={(e) => setSfGain(parseInt(e.target.value, 10))} className="flex-1 accent-primary" />
<span className="text-[10px] font-mono text-primary bg-primary/10 px-1.5 py-0.5 rounded">{sfGain}dB</span>
</div>
</div>
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"> (Pitch)</label>
<div className="flex items-center space-x-2">
<input type="range" min="-12" max="12" step="1" value={sfPitch} onChange={(e) => setSfPitch(parseInt(e.target.value, 10))} className="flex-1 accent-primary" />
<span className="text-[10px] font-mono text-primary bg-primary/10 px-1.5 py-0.5 rounded">{sfPitch}</span>
</div>
</div>
</div>
</div>
<div className="grid grid-cols-2 gap-4">
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">API Key</label>
<Input value={apiKey} type="password" onChange={(e) => setApiKey(e.target.value)} placeholder="每个声音独立 API Key" />
</div>
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">Base URL</label>
<Input value={baseUrl} onChange={(e) => setBaseUrl(e.target.value)} placeholder="https://.../v1" />
</div>
</div>
<div className="grid grid-cols-2 gap-4">
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"></label>
<select
className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-foreground [&>option]:bg-card"
value={gender}
onChange={(e) => setGender(e.target.value)}
>
<option value="Female"> (Female)</option>
<option value="Male"> (Male)</option>
</select>
</div>
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"></label>
<select
className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-foreground [&>option]:bg-card"
value={language}
onChange={(e) => setLanguage(e.target.value)}
>
<option value="zh"> (Chinese)</option>
<option value="en"> (English)</option>
</select>
</div>
</div>
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"></label>
<textarea
className="flex min-h-[60px] w-full rounded-md border-0 bg-white/5 px-3 py-2 text-sm shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-white"
value={description}
onChange={(e) => setDescription(e.target.value)}
placeholder="记录该声音的特点..."
/>
</div>
<div className="p-4 rounded-xl border border-primary/20 bg-primary/5 space-y-3">
<div className="flex items-center justify-between">
<h4 className="text-[10px] font-black text-primary flex items-center tracking-widest uppercase">
<Volume2 className="w-3.5 h-3.5 mr-1.5" /> (Preview)
</h4>
</div>
<div className="flex gap-2">
<Input
value={testInput}
onChange={(e) => setTestInput(e.target.value)}
placeholder="输入测试文本..."
className="text-xs bg-black/20"
/>
<Button
variant="primary"
size="sm"
onClick={handleAudition}
disabled={isAuditioning || !initialVoice}
className="shrink-0 h-9"
>
{isAuditioning ? <Pause className="h-3.5 w-3.5 animate-pulse" /> : <Play className="h-3.5 w-3.5" />}
</Button>
</div>
</div>
</div>
</Dialog>
);
};
const CloneVoiceModal: React.FC<{
isOpen: boolean;
onClose: () => void;
onSuccess: (voice: Voice) => Promise<void>;
}> = ({ isOpen, onClose, onSuccess }) => {
const [name, setName] = useState('');
const [description, setDescription] = useState('');
const [file, setFile] = useState<File | null>(null);
const inputRef = useRef<HTMLInputElement>(null);
const handleFileChange = (e: React.ChangeEvent<HTMLInputElement>) => {
if (e.target.files && e.target.files[0]) {
setFile(e.target.files[0]);
}
};
const handleSubmit = async () => {
if (!name || !file) {
alert('请填写名称并上传音频文件');
return;
}
const newVoice: Voice = {
id: `v-${Date.now()}`,
name,
vendor: 'OpenAI Compatible',
gender: 'Female',
language: 'zh',
description: description || 'User cloned voice',
};
await onSuccess(newVoice);
setName('');
setDescription('');
setFile(null);
};
return (
<Dialog
isOpen={isOpen}
onClose={onClose}
title="克隆声音"
footer={
<>
<Button variant="ghost" onClick={onClose}></Button>
<Button onClick={handleSubmit}></Button>
</>
}
>
<div className="space-y-4">
<div className="space-y-2">
<label className="text-sm font-medium text-white"></label>
<Input value={name} onChange={(e) => setName(e.target.value)} placeholder="给新声音起个名字" />
</div>
<div className="space-y-2">
<label className="text-sm font-medium text-white"> ()</label>
<div
className="flex flex-col items-center justify-center w-full h-32 rounded-lg border-2 border-dashed border-white/10 bg-white/5 hover:bg-white/10 transition-colors cursor-pointer"
onClick={() => inputRef.current?.click()}
>
<input ref={inputRef} type="file" accept="audio/*" className="hidden" onChange={handleFileChange} />
{file ? (
<div className="flex items-center space-x-2 text-primary">
<Mic2 className="h-6 w-6" />
<span className="text-sm font-medium">{file.name}</span>
</div>
) : (
<>
<Upload className="h-8 w-8 mb-2 text-muted-foreground" />
<p className="text-sm text-muted-foreground"> WAV/MP3 </p>
</>
)}
</div>
</div>
<div className="space-y-2">
<label className="text-sm font-medium text-white"></label>
<textarea
className="flex min-h-[80px] w-full rounded-md border-0 bg-white/5 px-3 py-2 text-sm shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-white"
value={description}
onChange={(e) => setDescription(e.target.value)}
placeholder="描述声音特点(如:年轻、沉稳..."
/>
</div>
</div>
</Dialog>
);
};