From 2b4bf57c04e9698469c139d4965a5a0a1fc4303a Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Thu, 30 Jan 2025 09:51:25 -0500 Subject: [PATCH] Improve ElevenLabs language checking logic --- CHANGELOG.md | 5 +++++ src/pipecat/services/elevenlabs.py | 28 +++++++++++++++++++--------- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c3408e6f..03f578dd1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- Improved the language checking logic in `ElevenLabsTTSService` and + `ElevenLabsHttpTTSService` to properly handle language codes based on model + compatibility, with appropriate warnings when language codes cannot be + applied. + - Updated `GoogleLLMContext` to support pushing `LLMMessagesUpdateFrame`s that contain a combination of function calls, function call responses, system messages, or just messages. diff --git a/src/pipecat/services/elevenlabs.py b/src/pipecat/services/elevenlabs.py index 546ec1edd..d37fb499b 100644 --- a/src/pipecat/services/elevenlabs.py +++ b/src/pipecat/services/elevenlabs.py @@ -45,7 +45,10 @@ except ModuleNotFoundError as e: ElevenLabsOutputFormat = Literal["pcm_16000", "pcm_22050", "pcm_24000", "pcm_44100"] # Models that support language codes -# eleven_multilingual_v2 doesn't support language codes, so it's excluded +# The following models are excluded as they don't support language codes: +# - eleven_flash_v2 +# - eleven_turbo_v2 +# - eleven_multilingual_v2 ELEVENLABS_MULTILINGUAL_MODELS = { "eleven_flash_v2_5", "eleven_turbo_v2_5", @@ -137,7 +140,7 @@ def calculate_word_times( class ElevenLabsTTSService(WordTTSService, WebsocketService): class InputParams(BaseModel): - language: Optional[Language] = Language.EN + language: Optional[Language] = None optimize_streaming_latency: Optional[str] = None stability: Optional[float] = None similarity_boost: Optional[float] = None @@ -197,7 +200,7 @@ class ElevenLabsTTSService(WordTTSService, WebsocketService): "sample_rate": sample_rate_from_output_format(output_format), "language": self.language_to_service_language(params.language) if params.language - else "en", + else None, "output_format": output_format, "optimize_streaming_latency": params.optimize_streaming_latency, "stability": params.stability, @@ -327,9 +330,10 @@ class ElevenLabsTTSService(WordTTSService, WebsocketService): # Language can only be used with the ELEVENLABS_MULTILINGUAL_MODELS language = self._settings["language"] - if model in ELEVENLABS_MULTILINGUAL_MODELS: + if model in ELEVENLABS_MULTILINGUAL_MODELS and language is not None: url += f"&language_code={language}" - else: + logger.debug(f"Using language code: {language}") + elif language is not None: logger.warning( f"Language code [{language}] not applied. Language codes can only be used with multilingual models: {', '.join(sorted(ELEVENLABS_MULTILINGUAL_MODELS))}" ) @@ -429,7 +433,7 @@ class ElevenLabsHttpTTSService(TTSService): """ class InputParams(BaseModel): - language: Optional[Language] = Language.EN + language: Optional[Language] = None optimize_streaming_latency: Optional[int] = None stability: Optional[float] = None similarity_boost: Optional[float] = None @@ -460,7 +464,7 @@ class ElevenLabsHttpTTSService(TTSService): "sample_rate": sample_rate_from_output_format(output_format), "language": self.language_to_service_language(params.language) if params.language - else "en", + else None, "output_format": output_format, "optimize_streaming_latency": params.optimize_streaming_latency, "stability": params.stability, @@ -525,8 +529,14 @@ class ElevenLabsHttpTTSService(TTSService): if self._voice_settings: payload["voice_settings"] = self._voice_settings - if self._settings["language"]: - payload["language_code"] = self._settings["language"] + language = self._settings["language"] + if self._model_name in ELEVENLABS_MULTILINGUAL_MODELS and language: + payload["language_code"] = language + logger.debug(f"Using language code: {language}") + elif language: + logger.warning( + f"Language code [{language}] not applied. Language codes can only be used with multilingual models: {', '.join(sorted(ELEVENLABS_MULTILINGUAL_MODELS))}" + ) headers = { "xi-api-key": self._api_key,