Voice library support dashscope

2026-02-26 03:54:52 +08:00
parent b193f91432
commit f77f7c7531
11 changed files with 684 additions and 26 deletions
--- a/api/tests/test_voices.py
+++ b/api/tests/test_voices.py
@@ -171,8 +171,9 @@ class TestVoiceAPI:
            "voice_key": "FunAudioLLM/CosyVoice2-0.5B:anna"
        })
        assert create_resp.status_code == 200
+        voice_id = create_resp.json()["id"]

-        preview_resp = client.post("/api/voices/anna/preview", json={"text": "你好"})
+        preview_resp = client.post(f"/api/voices/{voice_id}/preview", json={"text": "你好"})
        assert preview_resp.status_code == 200
        payload = preview_resp.json()
        assert payload["success"] is True
@@ -228,8 +229,103 @@ class TestVoiceAPI:
            "base_url": "https://api.siliconflow.cn/v1"
        })
        assert create_resp.status_code == 200
+        voice_id = create_resp.json()["id"]

-        preview_resp = client.post("/api/voices/anna2/preview", json={"text": "hello"})
+        preview_resp = client.post(f"/api/voices/{voice_id}/preview", json={"text": "hello"})
        assert preview_resp.status_code == 200
        assert captured_auth["value"] == "Bearer voice-key-123"
        assert captured_url["value"] == "https://api.siliconflow.cn/v1/audio/speech"
+
+    def test_create_voice_dashscope_defaults(self, client):
+        """Test creating DashScope voice applies model/voice defaults."""
+        create_resp = client.post("/api/voices", json={
+            "name": "DashScope Voice",
+            "vendor": "DashScope",
+            "gender": "Female",
+            "language": "zh",
+            "description": "dashscope",
+        })
+        assert create_resp.status_code == 200
+        payload = create_resp.json()
+        assert payload["vendor"] == "DashScope"
+        assert payload["model"] == "qwen3-tts-flash-realtime"
+        assert payload["voice_key"] == "Cherry"
+
+    def test_preview_voice_dashscope_success(self, client, monkeypatch):
+        """DashScope voice preview should return playable wav data url."""
+        from app.routers import voices as voice_router
+
+        captured = {
+            "api_key": "",
+            "model": "",
+            "url": "",
+            "session": {},
+            "text": "",
+        }
+
+        class DummyAudioFormat:
+            PCM_24000HZ_MONO_16BIT = "pcm24k16mono"
+
+        class DummyDashScopeModule:
+            api_key = ""
+
+        class DummyRealtime:
+            def __init__(self, *args, **kwargs):
+                captured["api_key"] = kwargs.get("api_key", "")
+                captured["model"] = kwargs.get("model", "")
+                captured["url"] = kwargs.get("url", "")
+                self.callback = kwargs["callback"]
+
+            def connect(self):
+                self.callback.on_open()
+
+            def update_session(self, **kwargs):
+                captured["session"] = kwargs
+
+            def append_text(self, text):
+                captured["text"] = text
+
+            def commit(self):
+                # 16-bit PCM mono samples
+                raw_pcm = b"\x00\x00\x01\x00\x02\x00\x03\x00"
+                self.callback.on_event({
+                    "type": "response.audio.delta",
+                    "delta": base64.b64encode(raw_pcm).decode("utf-8"),
+                })
+                self.callback.on_event({"type": "response.done"})
+
+            def finish(self):
+                return None
+
+            def close(self):
+                return None
+
+        monkeypatch.setattr(voice_router, "DASHSCOPE_SDK_AVAILABLE", True)
+        monkeypatch.setattr(voice_router, "AudioFormat", DummyAudioFormat)
+        monkeypatch.setattr(voice_router, "QwenTtsRealtime", DummyRealtime)
+        monkeypatch.setattr(voice_router, "dashscope", DummyDashScopeModule())
+
+        create_resp = client.post("/api/voices", json={
+            "name": "DashScope Voice",
+            "vendor": "DashScope",
+            "gender": "Female",
+            "language": "zh",
+            "description": "dashscope",
+            "api_key": "dashscope-key",
+            "base_url": "wss://dashscope.aliyuncs.com/api-ws/v1/realtime",
+        })
+        assert create_resp.status_code == 200
+        voice_id = create_resp.json()["id"]
+
+        preview_resp = client.post(f"/api/voices/{voice_id}/preview", json={"text": "你好"})
+        assert preview_resp.status_code == 200
+        payload = preview_resp.json()
+        assert payload["success"] is True
+        assert payload["audio_url"].startswith("data:audio/wav;base64,")
+        encoded = payload["audio_url"].split(",", 1)[1]
+        wav_bytes = base64.b64decode(encoded)
+        assert wav_bytes.startswith(b"RIFF")
+        assert captured["model"] == "qwen3-tts-flash-realtime"
+        assert captured["url"] == "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
+        assert captured["text"] == "你好"
+        assert captured["session"]["voice"] == "Cherry"