From a42dd4c71258729f88942cdec473c0c9ffdc140d Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Mon, 9 Feb 2026 14:39:43 +0800 Subject: [PATCH] Fix tts voice bug --- web/pages/Assistants.tsx | 25 ++++++++++++++++++++++++- web/pages/VoiceLibrary.tsx | 24 ++++++++++++++++++++---- 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/web/pages/Assistants.tsx b/web/pages/Assistants.tsx index 93ce445..9759930 100644 --- a/web/pages/Assistants.tsx +++ b/web/pages/Assistants.tsx @@ -19,6 +19,29 @@ const isSiliconflowVendor = (vendor?: string) => { return normalized === 'siliconflow' || normalized === '硅基流动'; }; +const SILICONFLOW_DEFAULT_MODEL = 'FunAudioLLM/CosyVoice2-0.5B'; + +const buildSiliconflowVoiceKey = (voiceId: string, model?: string) => { + const id = String(voiceId || '').trim(); + if (!id) return ''; + if (id.includes(':')) return id; + return `${model || SILICONFLOW_DEFAULT_MODEL}:${id}`; +}; + +const resolveRuntimeTtsVoice = (selectedVoiceId: string, voice: Voice) => { + const explicitKey = String(voice.voiceKey || '').trim(); + if (!isSiliconflowVendor(voice.vendor)) { + return explicitKey || selectedVoiceId; + } + if (voice.isSystem) { + const canonical = buildSiliconflowVoiceKey(selectedVoiceId, voice.model); + if (!explicitKey) return canonical; + const explicitSuffix = explicitKey.includes(':') ? explicitKey.split(':').pop() : explicitKey; + if (explicitSuffix && explicitSuffix !== selectedVoiceId) return canonical; + } + return explicitKey || buildSiliconflowVoiceKey(selectedVoiceId, voice.model); +}; + export const AssistantsPage: React.FC = () => { const [assistants, setAssistants] = useState([]); const [voices, setVoices] = useState([]); @@ -1404,7 +1427,7 @@ export const DebugDrawer: React.FC<{ provider: ttsProvider, model: voice.model, apiKey: ttsProvider === 'siliconflow' ? voice.apiKey : null, - voice: voice.voiceKey || voice.id, + voice: resolveRuntimeTtsVoice(assistant.voice, voice), speed: assistant.speed || voice.speed || 1.0, }; } else { diff --git a/web/pages/VoiceLibrary.tsx b/web/pages/VoiceLibrary.tsx index 93a13a1..ea8f234 100644 --- a/web/pages/VoiceLibrary.tsx +++ b/web/pages/VoiceLibrary.tsx @@ -4,6 +4,14 @@ import { Button, Input, TableHeader, TableRow, TableHead, TableCell, Dialog, Bad import { Voice } from '../types'; import { createVoice, deleteVoice, fetchVoices, previewVoice, updateVoice } from '../services/backendApi'; +const SILICONFLOW_DEFAULT_MODEL = 'FunAudioLLM/CosyVoice2-0.5B'; + +const buildSiliconflowVoiceKey = (rawId: string, model: string): string => { + const id = (rawId || '').trim(); + if (!id) return `${model}:anna`; + return id.includes(':') ? id : `${model}:${id}`; +}; + export const VoiceLibraryPage: React.FC = () => { const [voices, setVoices] = useState([]); const [searchTerm, setSearchTerm] = useState(''); @@ -249,7 +257,7 @@ const AddVoiceModal: React.FC<{ const [vendor, setVendor] = useState<'硅基流动' | 'Ali' | 'Volcano' | 'Minimax'>('硅基流动'); const [name, setName] = useState(''); - const [sfModel, setSfModel] = useState('FunAudioLLM/CosyVoice2-0.5B'); + const [sfModel, setSfModel] = useState(SILICONFLOW_DEFAULT_MODEL); const [sfVoiceId, setSfVoiceId] = useState('FunAudioLLM/CosyVoice2-0.5B:anna'); const [sfSpeed, setSfSpeed] = useState(1); const [sfGain, setSfGain] = useState(0); @@ -271,6 +279,8 @@ const AddVoiceModal: React.FC<{ useEffect(() => { if (!initialVoice) return; const nextVendor = initialVoice.vendor === 'SiliconFlow' ? '硅基流动' : initialVoice.vendor; + const nextModel = initialVoice.model || SILICONFLOW_DEFAULT_MODEL; + const defaultVoiceKey = buildSiliconflowVoiceKey(initialVoice.id || initialVoice.name || '', nextModel); setVendor((nextVendor as any) || '硅基流动'); setName(initialVoice.name || ''); setGender(initialVoice.gender || 'Female'); @@ -278,8 +288,8 @@ const AddVoiceModal: React.FC<{ setDescription(initialVoice.description || ''); setModel(initialVoice.model || ''); setVoiceKey(initialVoice.voiceKey || ''); - setSfModel(initialVoice.model || 'FunAudioLLM/CosyVoice2-0.5B'); - setSfVoiceId(initialVoice.voiceKey || 'FunAudioLLM/CosyVoice2-0.5B:anna'); + setSfModel(nextModel); + setSfVoiceId((initialVoice.voiceKey || '').trim() || defaultVoiceKey); setSfSpeed(initialVoice.speed ?? 1); setSfGain(initialVoice.gain ?? 0); setSfPitch(initialVoice.pitch ?? 0); @@ -315,6 +325,12 @@ const AddVoiceModal: React.FC<{ return; } + const resolvedSiliconflowVoiceKey = (() => { + const current = (sfVoiceId || '').trim(); + if (current) return current; + return buildSiliconflowVoiceKey(initialVoice?.id || name, sfModel || SILICONFLOW_DEFAULT_MODEL); + })(); + const newVoice: Voice = { id: initialVoice?.id || `${vendor === '硅基流动' ? 'sf' : 'gen'}-${Date.now()}`, name, @@ -323,7 +339,7 @@ const AddVoiceModal: React.FC<{ language, description: description || (vendor === '硅基流动' ? `Model: ${sfModel}` : `Model: ${model}`), model: vendor === '硅基流动' ? sfModel : model, - voiceKey: vendor === '硅基流动' ? sfVoiceId : voiceKey, + voiceKey: vendor === '硅基流动' ? resolvedSiliconflowVoiceKey : voiceKey, apiKey, baseUrl, speed: sfSpeed,