Voice library support dashscope

2026-02-26 03:54:52 +08:00
parent b193f91432
commit f77f7c7531
11 changed files with 684 additions and 26 deletions
--- a/api/tests/test_assistants.py
+++ b/api/tests/test_assistants.py
@@ -186,9 +186,11 @@ class TestAssistantAPI:
        sample_asr_model_data["vendor"] = "OpenAI Compatible"
        llm_resp = client.post("/api/llm", json=sample_llm_model_data)
        assert llm_resp.status_code == 200
+        llm_id = llm_resp.json()["id"]

        asr_resp = client.post("/api/asr", json=sample_asr_model_data)
        assert asr_resp.status_code == 200
+        asr_id = asr_resp.json()["id"]

        sample_voice_data["vendor"] = "OpenAI Compatible"
        sample_voice_data["base_url"] = "https://tts.example.com/v1/audio/speech"
@@ -198,8 +200,8 @@ class TestAssistantAPI:
        voice_id = voice_resp.json()["id"]

        sample_assistant_data.update({
-            "llmModelId": sample_llm_model_data["id"],
-            "asrModelId": sample_asr_model_data["id"],
+            "llmModelId": llm_id,
+            "asrModelId": asr_id,
            "voice": voice_id,
            "prompt": "runtime prompt",
            "opener": "runtime opener",
@@ -220,7 +222,8 @@ class TestAssistantAPI:
        assert metadata["services"]["llm"]["model"] == sample_llm_model_data["model_name"]
        assert metadata["services"]["asr"]["model"] == sample_asr_model_data["model_name"]
        assert metadata["services"]["asr"]["baseUrl"] == sample_asr_model_data["base_url"]
-        assert metadata["services"]["tts"]["voice"] == sample_voice_data["voice_key"]
+        expected_tts_voice = f"{sample_voice_data['model']}:{sample_voice_data['voice_key']}"
+        assert metadata["services"]["tts"]["voice"] == expected_tts_voice
        assert metadata["services"]["tts"]["baseUrl"] == sample_voice_data["base_url"]

    def test_get_engine_config_endpoint(self, client, sample_assistant_data):
@@ -252,6 +255,38 @@ class TestAssistantAPI:
        assert metadata["output"]["mode"] == "text"
        assert metadata["services"]["tts"]["enabled"] is False

+    def test_runtime_config_dashscope_voice_provider(self, client, sample_assistant_data):
+        """DashScope voices should map to dashscope tts provider in runtime metadata."""
+        voice_resp = client.post("/api/voices", json={
+            "name": "DashScope Cherry",
+            "vendor": "DashScope",
+            "gender": "Female",
+            "language": "zh",
+            "description": "dashscope voice",
+            "api_key": "dashscope-key",
+            "base_url": "wss://dashscope.aliyuncs.com/api-ws/v1/realtime",
+        })
+        assert voice_resp.status_code == 200
+        voice_payload = voice_resp.json()
+
+        sample_assistant_data.update({
+            "voice": voice_payload["id"],
+            "voiceOutputEnabled": True,
+        })
+        assistant_resp = client.post("/api/assistants", json=sample_assistant_data)
+        assert assistant_resp.status_code == 200
+        assistant_id = assistant_resp.json()["id"]
+
+        runtime_resp = client.get(f"/api/assistants/{assistant_id}/runtime-config")
+        assert runtime_resp.status_code == 200
+        metadata = runtime_resp.json()["sessionStartMetadata"]
+        tts = metadata["services"]["tts"]
+        assert tts["provider"] == "dashscope"
+        assert tts["voice"] == "Cherry"
+        assert tts["model"] == "qwen3-tts-flash-realtime"
+        assert tts["apiKey"] == "dashscope-key"
+        assert tts["baseUrl"] == "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
+
    def test_assistant_interrupt_and_generated_opener_flags(self, client, sample_assistant_data):
        sample_assistant_data.update({
            "firstTurnMode": "user_first",