Voice library support dashscope
This commit is contained in:
@@ -186,9 +186,11 @@ class TestAssistantAPI:
|
||||
sample_asr_model_data["vendor"] = "OpenAI Compatible"
|
||||
llm_resp = client.post("/api/llm", json=sample_llm_model_data)
|
||||
assert llm_resp.status_code == 200
|
||||
llm_id = llm_resp.json()["id"]
|
||||
|
||||
asr_resp = client.post("/api/asr", json=sample_asr_model_data)
|
||||
assert asr_resp.status_code == 200
|
||||
asr_id = asr_resp.json()["id"]
|
||||
|
||||
sample_voice_data["vendor"] = "OpenAI Compatible"
|
||||
sample_voice_data["base_url"] = "https://tts.example.com/v1/audio/speech"
|
||||
@@ -198,8 +200,8 @@ class TestAssistantAPI:
|
||||
voice_id = voice_resp.json()["id"]
|
||||
|
||||
sample_assistant_data.update({
|
||||
"llmModelId": sample_llm_model_data["id"],
|
||||
"asrModelId": sample_asr_model_data["id"],
|
||||
"llmModelId": llm_id,
|
||||
"asrModelId": asr_id,
|
||||
"voice": voice_id,
|
||||
"prompt": "runtime prompt",
|
||||
"opener": "runtime opener",
|
||||
@@ -220,7 +222,8 @@ class TestAssistantAPI:
|
||||
assert metadata["services"]["llm"]["model"] == sample_llm_model_data["model_name"]
|
||||
assert metadata["services"]["asr"]["model"] == sample_asr_model_data["model_name"]
|
||||
assert metadata["services"]["asr"]["baseUrl"] == sample_asr_model_data["base_url"]
|
||||
assert metadata["services"]["tts"]["voice"] == sample_voice_data["voice_key"]
|
||||
expected_tts_voice = f"{sample_voice_data['model']}:{sample_voice_data['voice_key']}"
|
||||
assert metadata["services"]["tts"]["voice"] == expected_tts_voice
|
||||
assert metadata["services"]["tts"]["baseUrl"] == sample_voice_data["base_url"]
|
||||
|
||||
def test_get_engine_config_endpoint(self, client, sample_assistant_data):
|
||||
@@ -252,6 +255,38 @@ class TestAssistantAPI:
|
||||
assert metadata["output"]["mode"] == "text"
|
||||
assert metadata["services"]["tts"]["enabled"] is False
|
||||
|
||||
def test_runtime_config_dashscope_voice_provider(self, client, sample_assistant_data):
|
||||
"""DashScope voices should map to dashscope tts provider in runtime metadata."""
|
||||
voice_resp = client.post("/api/voices", json={
|
||||
"name": "DashScope Cherry",
|
||||
"vendor": "DashScope",
|
||||
"gender": "Female",
|
||||
"language": "zh",
|
||||
"description": "dashscope voice",
|
||||
"api_key": "dashscope-key",
|
||||
"base_url": "wss://dashscope.aliyuncs.com/api-ws/v1/realtime",
|
||||
})
|
||||
assert voice_resp.status_code == 200
|
||||
voice_payload = voice_resp.json()
|
||||
|
||||
sample_assistant_data.update({
|
||||
"voice": voice_payload["id"],
|
||||
"voiceOutputEnabled": True,
|
||||
})
|
||||
assistant_resp = client.post("/api/assistants", json=sample_assistant_data)
|
||||
assert assistant_resp.status_code == 200
|
||||
assistant_id = assistant_resp.json()["id"]
|
||||
|
||||
runtime_resp = client.get(f"/api/assistants/{assistant_id}/runtime-config")
|
||||
assert runtime_resp.status_code == 200
|
||||
metadata = runtime_resp.json()["sessionStartMetadata"]
|
||||
tts = metadata["services"]["tts"]
|
||||
assert tts["provider"] == "dashscope"
|
||||
assert tts["voice"] == "Cherry"
|
||||
assert tts["model"] == "qwen3-tts-flash-realtime"
|
||||
assert tts["apiKey"] == "dashscope-key"
|
||||
assert tts["baseUrl"] == "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
|
||||
|
||||
def test_assistant_interrupt_and_generated_opener_flags(self, client, sample_assistant_data):
|
||||
sample_assistant_data.update({
|
||||
"firstTurnMode": "user_first",
|
||||
|
||||
@@ -171,8 +171,9 @@ class TestVoiceAPI:
|
||||
"voice_key": "FunAudioLLM/CosyVoice2-0.5B:anna"
|
||||
})
|
||||
assert create_resp.status_code == 200
|
||||
voice_id = create_resp.json()["id"]
|
||||
|
||||
preview_resp = client.post("/api/voices/anna/preview", json={"text": "你好"})
|
||||
preview_resp = client.post(f"/api/voices/{voice_id}/preview", json={"text": "你好"})
|
||||
assert preview_resp.status_code == 200
|
||||
payload = preview_resp.json()
|
||||
assert payload["success"] is True
|
||||
@@ -228,8 +229,103 @@ class TestVoiceAPI:
|
||||
"base_url": "https://api.siliconflow.cn/v1"
|
||||
})
|
||||
assert create_resp.status_code == 200
|
||||
voice_id = create_resp.json()["id"]
|
||||
|
||||
preview_resp = client.post("/api/voices/anna2/preview", json={"text": "hello"})
|
||||
preview_resp = client.post(f"/api/voices/{voice_id}/preview", json={"text": "hello"})
|
||||
assert preview_resp.status_code == 200
|
||||
assert captured_auth["value"] == "Bearer voice-key-123"
|
||||
assert captured_url["value"] == "https://api.siliconflow.cn/v1/audio/speech"
|
||||
|
||||
def test_create_voice_dashscope_defaults(self, client):
|
||||
"""Test creating DashScope voice applies model/voice defaults."""
|
||||
create_resp = client.post("/api/voices", json={
|
||||
"name": "DashScope Voice",
|
||||
"vendor": "DashScope",
|
||||
"gender": "Female",
|
||||
"language": "zh",
|
||||
"description": "dashscope",
|
||||
})
|
||||
assert create_resp.status_code == 200
|
||||
payload = create_resp.json()
|
||||
assert payload["vendor"] == "DashScope"
|
||||
assert payload["model"] == "qwen3-tts-flash-realtime"
|
||||
assert payload["voice_key"] == "Cherry"
|
||||
|
||||
def test_preview_voice_dashscope_success(self, client, monkeypatch):
|
||||
"""DashScope voice preview should return playable wav data url."""
|
||||
from app.routers import voices as voice_router
|
||||
|
||||
captured = {
|
||||
"api_key": "",
|
||||
"model": "",
|
||||
"url": "",
|
||||
"session": {},
|
||||
"text": "",
|
||||
}
|
||||
|
||||
class DummyAudioFormat:
|
||||
PCM_24000HZ_MONO_16BIT = "pcm24k16mono"
|
||||
|
||||
class DummyDashScopeModule:
|
||||
api_key = ""
|
||||
|
||||
class DummyRealtime:
|
||||
def __init__(self, *args, **kwargs):
|
||||
captured["api_key"] = kwargs.get("api_key", "")
|
||||
captured["model"] = kwargs.get("model", "")
|
||||
captured["url"] = kwargs.get("url", "")
|
||||
self.callback = kwargs["callback"]
|
||||
|
||||
def connect(self):
|
||||
self.callback.on_open()
|
||||
|
||||
def update_session(self, **kwargs):
|
||||
captured["session"] = kwargs
|
||||
|
||||
def append_text(self, text):
|
||||
captured["text"] = text
|
||||
|
||||
def commit(self):
|
||||
# 16-bit PCM mono samples
|
||||
raw_pcm = b"\x00\x00\x01\x00\x02\x00\x03\x00"
|
||||
self.callback.on_event({
|
||||
"type": "response.audio.delta",
|
||||
"delta": base64.b64encode(raw_pcm).decode("utf-8"),
|
||||
})
|
||||
self.callback.on_event({"type": "response.done"})
|
||||
|
||||
def finish(self):
|
||||
return None
|
||||
|
||||
def close(self):
|
||||
return None
|
||||
|
||||
monkeypatch.setattr(voice_router, "DASHSCOPE_SDK_AVAILABLE", True)
|
||||
monkeypatch.setattr(voice_router, "AudioFormat", DummyAudioFormat)
|
||||
monkeypatch.setattr(voice_router, "QwenTtsRealtime", DummyRealtime)
|
||||
monkeypatch.setattr(voice_router, "dashscope", DummyDashScopeModule())
|
||||
|
||||
create_resp = client.post("/api/voices", json={
|
||||
"name": "DashScope Voice",
|
||||
"vendor": "DashScope",
|
||||
"gender": "Female",
|
||||
"language": "zh",
|
||||
"description": "dashscope",
|
||||
"api_key": "dashscope-key",
|
||||
"base_url": "wss://dashscope.aliyuncs.com/api-ws/v1/realtime",
|
||||
})
|
||||
assert create_resp.status_code == 200
|
||||
voice_id = create_resp.json()["id"]
|
||||
|
||||
preview_resp = client.post(f"/api/voices/{voice_id}/preview", json={"text": "你好"})
|
||||
assert preview_resp.status_code == 200
|
||||
payload = preview_resp.json()
|
||||
assert payload["success"] is True
|
||||
assert payload["audio_url"].startswith("data:audio/wav;base64,")
|
||||
encoded = payload["audio_url"].split(",", 1)[1]
|
||||
wav_bytes = base64.b64decode(encoded)
|
||||
assert wav_bytes.startswith(b"RIFF")
|
||||
assert captured["model"] == "qwen3-tts-flash-realtime"
|
||||
assert captured["url"] == "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
|
||||
assert captured["text"] == "你好"
|
||||
assert captured["session"]["voice"] == "Cherry"
|
||||
|
||||
Reference in New Issue
Block a user