Voice library support dashscope

2026-02-26 03:54:52 +08:00
parent b193f91432
commit f77f7c7531
11 changed files with 684 additions and 26 deletions
--- a/web/pages/VoiceLibrary.tsx
+++ b/web/pages/VoiceLibrary.tsx
@@ -5,6 +5,12 @@ import { Voice } from '../types';
 import { createVoice, deleteVoice, fetchVoices, previewVoice, updateVoice } from '../services/backendApi';

 const OPENAI_COMPATIBLE_DEFAULT_MODEL = 'FunAudioLLM/CosyVoice2-0.5B';
+const OPENAI_COMPATIBLE_DEFAULT_VOICE = 'FunAudioLLM/CosyVoice2-0.5B:anna';
+const DASHSCOPE_DEFAULT_MODEL = 'qwen3-tts-flash-realtime';
+const DASHSCOPE_DEFAULT_VOICE = 'Cherry';
+const DASHSCOPE_DEFAULT_BASE_URL = 'wss://dashscope.aliyuncs.com/api-ws/v1/realtime';
+
+type VoiceVendor = 'OpenAI Compatible' | 'DashScope';

 const buildOpenAICompatibleVoiceKey = (rawId: string, model: string): string => {
  const id = (rawId || '').trim();
@@ -249,11 +255,11 @@ const AddVoiceModal: React.FC<{
  onSuccess: (voice: Voice) => Promise<void>;
  initialVoice?: Voice;
 }> = ({ isOpen, onClose, onSuccess, initialVoice }) => {
-  const [vendor, setVendor] = useState<'OpenAI Compatible'>('OpenAI Compatible');
+  const [vendor, setVendor] = useState<VoiceVendor>('OpenAI Compatible');
  const [name, setName] = useState('');

  const [openaiCompatibleModel, setOpenaiCompatibleModel] = useState(OPENAI_COMPATIBLE_DEFAULT_MODEL);
-  const [sfVoiceId, setSfVoiceId] = useState('FunAudioLLM/CosyVoice2-0.5B:anna');
+  const [sfVoiceId, setSfVoiceId] = useState(OPENAI_COMPATIBLE_DEFAULT_VOICE);
  const [sfSpeed, setSfSpeed] = useState(1);
  const [sfGain, setSfGain] = useState(0);
  const [sfPitch, setSfPitch] = useState(0);
@@ -270,10 +276,33 @@ const AddVoiceModal: React.FC<{
  const testAudioRef = useRef<HTMLAudioElement | null>(null);

  useEffect(() => {
-    if (!initialVoice) return;
-    const nextVendor = 'OpenAI Compatible';
-    const nextModel = initialVoice.model || OPENAI_COMPATIBLE_DEFAULT_MODEL;
-    const defaultVoiceKey = buildOpenAICompatibleVoiceKey(initialVoice.id || initialVoice.name || '', nextModel);
+    if (!isOpen) return;
+
+    if (!initialVoice) {
+      setVendor('OpenAI Compatible');
+      setName('');
+      setGender('Female');
+      setLanguage('zh');
+      setDescription('');
+      setOpenaiCompatibleModel(OPENAI_COMPATIBLE_DEFAULT_MODEL);
+      setSfVoiceId(OPENAI_COMPATIBLE_DEFAULT_VOICE);
+      setSfSpeed(1);
+      setSfGain(0);
+      setSfPitch(0);
+      setApiKey('');
+      setBaseUrl('');
+      setTestInput('你好，正在测试语音合成效果。');
+      return;
+    }
+
+    const nextVendor: VoiceVendor = String(initialVoice.vendor || '').trim().toLowerCase() === 'dashscope'
+      ? 'DashScope'
+      : 'OpenAI Compatible';
+    const nextModel = (initialVoice.model || (nextVendor === 'DashScope' ? DASHSCOPE_DEFAULT_MODEL : OPENAI_COMPATIBLE_DEFAULT_MODEL)).trim();
+    const defaultVoiceKey = nextVendor === 'DashScope'
+      ? DASHSCOPE_DEFAULT_VOICE
+      : buildOpenAICompatibleVoiceKey(initialVoice.id || initialVoice.name || '', nextModel);
+
    setVendor(nextVendor);
    setName(initialVoice.name || '');
    setGender(initialVoice.gender || 'Female');
@@ -285,7 +314,7 @@ const AddVoiceModal: React.FC<{
    setSfGain(initialVoice.gain ?? 0);
    setSfPitch(initialVoice.pitch ?? 0);
    setApiKey(initialVoice.apiKey || '');
-    setBaseUrl(initialVoice.baseUrl || '');
+    setBaseUrl(initialVoice.baseUrl || (nextVendor === 'DashScope' ? DASHSCOPE_DEFAULT_BASE_URL : ''));
  }, [initialVoice, isOpen]);

  const handleAudition = async () => {
@@ -316,10 +345,23 @@ const AddVoiceModal: React.FC<{
      return;
    }

+    const resolvedModel = (() => {
+      const current = (openaiCompatibleModel || '').trim();
+      if (current) return current;
+      return vendor === 'DashScope' ? DASHSCOPE_DEFAULT_MODEL : OPENAI_COMPATIBLE_DEFAULT_MODEL;
+    })();
+
    const resolvedVoiceKey = (() => {
      const current = (sfVoiceId || '').trim();
      if (current) return current;
-      return buildOpenAICompatibleVoiceKey(initialVoice?.id || name, openaiCompatibleModel || OPENAI_COMPATIBLE_DEFAULT_MODEL);
+      if (vendor === 'DashScope') return DASHSCOPE_DEFAULT_VOICE;
+      return buildOpenAICompatibleVoiceKey(initialVoice?.id || name, resolvedModel);
+    })();
+
+    const resolvedBaseUrl = (() => {
+      const current = (baseUrl || '').trim();
+      if (current) return current;
+      return vendor === 'DashScope' ? DASHSCOPE_DEFAULT_BASE_URL : '';
    })();

    const newVoice: Voice = {
@@ -328,11 +370,11 @@ const AddVoiceModal: React.FC<{
      vendor,
      gender,
      language,
-      description: description || `Model: ${openaiCompatibleModel}`,
-      model: openaiCompatibleModel,
+      description: description || `Model: ${resolvedModel}`,
+      model: resolvedModel,
      voiceKey: resolvedVoiceKey,
      apiKey,
-      baseUrl,
+      baseUrl: resolvedBaseUrl,
      speed: sfSpeed,
      gain: sfGain,
      pitch: sfPitch,
@@ -346,6 +388,11 @@ const AddVoiceModal: React.FC<{
      setDescription('');
      setApiKey('');
      setBaseUrl('');
+      setOpenaiCompatibleModel(OPENAI_COMPATIBLE_DEFAULT_MODEL);
+      setSfVoiceId(OPENAI_COMPATIBLE_DEFAULT_VOICE);
+      setSfSpeed(1);
+      setSfGain(0);
+      setSfPitch(0);
    } catch (error: any) {
      alert(error?.message || '保存失败');
    } finally {
@@ -370,7 +417,10 @@ const AddVoiceModal: React.FC<{
      <div className="space-y-4 max-h-[75vh] overflow-y-auto px-1 custom-scrollbar">
        <div className="space-y-1.5">
          <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">厂商 (Vendor)</label>
-          <Input value={vendor} readOnly className="h-10 border border-white/10 bg-white/5" />
+          <Select value={vendor} onChange={(e) => setVendor(e.target.value as VoiceVendor)}>
+            <option value="OpenAI Compatible">OpenAI Compatible</option>
+            <option value="DashScope">DashScope</option>
+          </Select>
        </div>

        <div className="h-px bg-white/5"></div>
@@ -388,12 +438,16 @@ const AddVoiceModal: React.FC<{
                  className="font-mono text-xs"
                  value={openaiCompatibleModel}
                  onChange={(e) => setOpenaiCompatibleModel(e.target.value)}
-                  placeholder="例如: FunAudioLLM/CosyVoice2-0.5B"
+                  placeholder={vendor === 'DashScope' ? DASHSCOPE_DEFAULT_MODEL : OPENAI_COMPATIBLE_DEFAULT_MODEL}
                />
              </div>
              <div className="space-y-1.5">
                <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">声音 ID (Voice)</label>
-                <Input value={sfVoiceId} onChange={(e) => setSfVoiceId(e.target.value)} placeholder="FunAudioLLM/CosyVoice2-0.5B:anna" />
+                <Input
+                  value={sfVoiceId}
+                  onChange={(e) => setSfVoiceId(e.target.value)}
+                  placeholder={vendor === 'DashScope' ? DASHSCOPE_DEFAULT_VOICE : OPENAI_COMPATIBLE_DEFAULT_VOICE}
+                />
              </div>
            </div>

@@ -429,7 +483,11 @@ const AddVoiceModal: React.FC<{
          </div>
          <div className="space-y-1.5">
            <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">Base URL</label>
-            <Input value={baseUrl} onChange={(e) => setBaseUrl(e.target.value)} placeholder="https://.../v1" />
+            <Input
+              value={baseUrl}
+              onChange={(e) => setBaseUrl(e.target.value)}
+              placeholder={vendor === 'DashScope' ? DASHSCOPE_DEFAULT_BASE_URL : 'https://.../v1'}
+            />
          </div>
        </div>