fix backend TTS provider compatibility

This commit is contained in:
Xin Wang
2026-06-10 12:32:55 +08:00
parent ac3f4dd806
commit 4a948ee609
4 changed files with 15 additions and 5 deletions

View File

@@ -26,6 +26,7 @@ class AssistantConfig(BaseModel):
# 模型/音色选项
model: str = "" # LLM
asr: str = "" # STT
tts_model: str = ""
voice: str = "" # TTS 音色
stt_language: str = ""
tts_speed: float = 1.0

View File

@@ -58,6 +58,7 @@ async def resolve_runtime_config(
# 模型/音色:凭证的模型ID优先
model=(llm.model_id if llm else ""),
asr=(stt.model_id if stt else ""),
tts_model=(tts.model_id if tts else ""),
voice=(tts.voice if tts else ""),
stt_language=(stt.language if stt else ""),
tts_speed=(tts.speed if tts else 1.0),

View File

@@ -84,6 +84,7 @@ async def test_openai_credential(
"model": config.model_id,
"input": "测试",
"voice": config.voice,
"response_format": "pcm",
"speed": config.speed,
},
)

View File

@@ -10,7 +10,7 @@ from models import AssistantConfig
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.services.openai.stt import OpenAISTTService
from pipecat.services.openai.tts import OpenAITTSService
from pipecat.services.openai.tts import VALID_VOICES, OpenAITTSService
from pipecat.transcriptions.language import Language
@@ -48,12 +48,18 @@ def create_llm(cfg: AssistantConfig):
def create_tts(cfg: AssistantConfig):
"""CosyVoice 等,走 OpenAI 兼容的 /v1/audio/speech。"""
voice = cfg.voice or config.TTS_VOICE
# Pipecat 默认只接受 OpenAI 官方音色。OpenAI 兼容服务常使用自定义 voice id,
# 注册为原样映射后仍由 OpenAI SDK 按字符串透传给供应商。
VALID_VOICES.setdefault(voice, voice)
return OpenAITTSService(
api_key=cfg.tts_api_key or config.TTS_API_KEY,
base_url=cfg.tts_base_url or config.TTS_BASE_URL,
model=config.TTS_MODEL,
voice=cfg.voice or config.TTS_VOICE,
speed=cfg.tts_speed,
settings=OpenAITTSService.Settings(
model=cfg.tts_model or config.TTS_MODEL,
voice=voice,
speed=cfg.tts_speed,
),
)
@@ -61,6 +67,7 @@ def create_services(cfg: AssistantConfig):
logger.info(
f"创建服务: stt={cfg.asr or config.STT_MODEL} "
f"llm={cfg.model or config.LLM_MODEL} "
f"tts={cfg.voice or config.TTS_VOICE}"
f"tts={cfg.tts_model or config.TTS_MODEL} "
f"voice={cfg.voice or config.TTS_VOICE}"
)
return create_stt(cfg), create_llm(cfg), create_tts(cfg)