diff --git a/backend/models.py b/backend/models.py index cca7462..347b515 100644 --- a/backend/models.py +++ b/backend/models.py @@ -26,6 +26,7 @@ class AssistantConfig(BaseModel): # 模型/音色选项 model: str = "" # LLM asr: str = "" # STT + tts_model: str = "" voice: str = "" # TTS 音色 stt_language: str = "" tts_speed: float = 1.0 diff --git a/backend/services/config_resolver.py b/backend/services/config_resolver.py index 4314811..f740dc6 100644 --- a/backend/services/config_resolver.py +++ b/backend/services/config_resolver.py @@ -58,6 +58,7 @@ async def resolve_runtime_config( # 模型/音色:凭证的模型ID优先 model=(llm.model_id if llm else ""), asr=(stt.model_id if stt else ""), + tts_model=(tts.model_id if tts else ""), voice=(tts.voice if tts else ""), stt_language=(stt.language if stt else ""), tts_speed=(tts.speed if tts else 1.0), diff --git a/backend/services/credential_tester.py b/backend/services/credential_tester.py index ccfe854..fc32301 100644 --- a/backend/services/credential_tester.py +++ b/backend/services/credential_tester.py @@ -84,6 +84,7 @@ async def test_openai_credential( "model": config.model_id, "input": "测试", "voice": config.voice, + "response_format": "pcm", "speed": config.speed, }, ) diff --git a/backend/services/pipecat/service_factory.py b/backend/services/pipecat/service_factory.py index 627efa0..5839f93 100644 --- a/backend/services/pipecat/service_factory.py +++ b/backend/services/pipecat/service_factory.py @@ -10,7 +10,7 @@ from models import AssistantConfig from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.openai.stt import OpenAISTTService -from pipecat.services.openai.tts import OpenAITTSService +from pipecat.services.openai.tts import VALID_VOICES, OpenAITTSService from pipecat.transcriptions.language import Language @@ -48,12 +48,18 @@ def create_llm(cfg: AssistantConfig): def create_tts(cfg: AssistantConfig): """CosyVoice 等,走 OpenAI 兼容的 /v1/audio/speech。""" + voice = cfg.voice or config.TTS_VOICE + # Pipecat 默认只接受 OpenAI 官方音色。OpenAI 兼容服务常使用自定义 voice id, + # 注册为原样映射后仍由 OpenAI SDK 按字符串透传给供应商。 + VALID_VOICES.setdefault(voice, voice) return OpenAITTSService( api_key=cfg.tts_api_key or config.TTS_API_KEY, base_url=cfg.tts_base_url or config.TTS_BASE_URL, - model=config.TTS_MODEL, - voice=cfg.voice or config.TTS_VOICE, - speed=cfg.tts_speed, + settings=OpenAITTSService.Settings( + model=cfg.tts_model or config.TTS_MODEL, + voice=voice, + speed=cfg.tts_speed, + ), ) @@ -61,6 +67,7 @@ def create_services(cfg: AssistantConfig): logger.info( f"创建服务: stt={cfg.asr or config.STT_MODEL} " f"llm={cfg.model or config.LLM_MODEL} " - f"tts={cfg.voice or config.TTS_VOICE}" + f"tts={cfg.tts_model or config.TTS_MODEL} " + f"voice={cfg.voice or config.TTS_VOICE}" ) return create_stt(cfg), create_llm(cfg), create_tts(cfg)