diff --git a/changelog/4024.fixed.md b/changelog/4024.fixed.md new file mode 100644 index 000000000..7654cbdf5 --- /dev/null +++ b/changelog/4024.fixed.md @@ -0,0 +1 @@ +- Fixed `Language` enum values (e.g. `Language.ES`) not being converted to service-specific codes when passed via `settings=Service.Settings(language=Language.ES)` at init time. This caused API errors (e.g. 400 from Rime) because the raw enum was sent instead of the expected language code (e.g. `"spa"`). Runtime updates via `UpdateSettingsFrame` were unaffected. The fix centralizes conversion in the base `TTSService` and `STTService` classes so all services handle this consistently. diff --git a/src/pipecat/services/asyncai/tts.py b/src/pipecat/services/asyncai/tts.py index 42c0a09a4..d2ac74445 100644 --- a/src/pipecat/services/asyncai/tts.py +++ b/src/pipecat/services/asyncai/tts.py @@ -171,9 +171,7 @@ class AsyncAITTSService(WebsocketTTSService): if params is not None: self._warn_init_param_moved_to_settings("params") if not settings: - default_settings.language = ( - self.language_to_service_language(params.language) if params.language else None - ) + default_settings.language = params.language # 4. Apply settings delta (canonical API, always wins) if settings is not None: @@ -565,9 +563,7 @@ class AsyncAIHttpTTSService(TTSService): if params is not None: self._warn_init_param_moved_to_settings("params") if not settings: - default_settings.language = ( - self.language_to_service_language(params.language) if params.language else None - ) + default_settings.language = params.language # 4. Apply settings delta (canonical API, always wins) if settings is not None: diff --git a/src/pipecat/services/aws/stt.py b/src/pipecat/services/aws/stt.py index eed1a321d..ace05090d 100644 --- a/src/pipecat/services/aws/stt.py +++ b/src/pipecat/services/aws/stt.py @@ -100,13 +100,13 @@ class AWSTranscribeSTTService(WebsocketSTTService): # 1. Initialize default_settings with hardcoded defaults default_settings = self.Settings( model=None, - language=self.language_to_service_language(Language.EN), + language=Language.EN, ) # 2. Apply direct init arg overrides (deprecated) if language is not None: self._warn_init_param_moved_to_settings("language", "language") - default_settings.language = self.language_to_service_language(language) + default_settings.language = language # 3. (No step 3, as there's no params object to apply) diff --git a/src/pipecat/services/aws/tts.py b/src/pipecat/services/aws/tts.py index 9648c45c8..32266886e 100644 --- a/src/pipecat/services/aws/tts.py +++ b/src/pipecat/services/aws/tts.py @@ -230,11 +230,7 @@ class AWSPollyTTSService(TTSService): self._warn_init_param_moved_to_settings("params") if not settings: default_settings.engine = params.engine - default_settings.language = ( - self.language_to_service_language(params.language) - if params.language - else "en-US" - ) + default_settings.language = params.language if params.language else "en-US" default_settings.pitch = params.pitch default_settings.rate = params.rate default_settings.volume = params.volume diff --git a/src/pipecat/services/azure/stt.py b/src/pipecat/services/azure/stt.py index 857b166cb..57306e06a 100644 --- a/src/pipecat/services/azure/stt.py +++ b/src/pipecat/services/azure/stt.py @@ -106,13 +106,13 @@ class AzureSTTService(STTService): # 1. Initialize default_settings with hardcoded defaults default_settings = self.Settings( model=None, - language=language_to_azure_language(Language.EN_US), + language=Language.EN_US, ) # 2. Apply direct init arg overrides (deprecated) if language is not None and language != Language.EN_US: self._warn_init_param_moved_to_settings("language", "language") - default_settings.language = language_to_azure_language(language) + default_settings.language = language # 3. (No step 3, as there's no params object to apply) diff --git a/src/pipecat/services/azure/tts.py b/src/pipecat/services/azure/tts.py index 0130ef5cb..a9491e9aa 100644 --- a/src/pipecat/services/azure/tts.py +++ b/src/pipecat/services/azure/tts.py @@ -312,11 +312,7 @@ class AzureTTSService(TTSService, AzureBaseTTSService): self._warn_init_param_moved_to_settings("params") if not settings: default_settings.emphasis = params.emphasis - default_settings.language = ( - self.language_to_service_language(params.language) - if params.language - else "en-US" - ) + default_settings.language = params.language if params.language else "en-US" default_settings.pitch = params.pitch default_settings.rate = params.rate default_settings.role = params.role @@ -809,11 +805,7 @@ class AzureHttpTTSService(TTSService, AzureBaseTTSService): self._warn_init_param_moved_to_settings("params") if not settings: default_settings.emphasis = params.emphasis - default_settings.language = ( - self.language_to_service_language(params.language) - if params.language - else "en-US" - ) + default_settings.language = params.language if params.language else "en-US" default_settings.pitch = params.pitch default_settings.rate = params.rate default_settings.role = params.role diff --git a/src/pipecat/services/camb/tts.py b/src/pipecat/services/camb/tts.py index 9c360ed37..b6b83a928 100644 --- a/src/pipecat/services/camb/tts.py +++ b/src/pipecat/services/camb/tts.py @@ -260,9 +260,7 @@ class CambTTSService(TTSService): self._warn_init_param_moved_to_settings("params") if not settings: if params.language is not None: - default_settings.language = ( - self.language_to_service_language(params.language) or "en-us" - ) + default_settings.language = params.language if params.user_instructions is not None: default_settings.user_instructions = params.user_instructions diff --git a/src/pipecat/services/cartesia/tts.py b/src/pipecat/services/cartesia/tts.py index aca5c46c6..b713a0d9a 100644 --- a/src/pipecat/services/cartesia/tts.py +++ b/src/pipecat/services/cartesia/tts.py @@ -302,7 +302,7 @@ class CartesiaTTSService(WebsocketTTSService): default_settings = self.Settings( model="sonic-3", voice=None, - language=language_to_cartesia_language(Language.EN), + language=Language.EN, generation_config=None, pronunciation_dict_id=None, ) @@ -320,7 +320,7 @@ class CartesiaTTSService(WebsocketTTSService): self._warn_init_param_moved_to_settings("params") if not settings: if params.language is not None: - default_settings.language = self.language_to_service_language(params.language) + default_settings.language = params.language if params.generation_config is not None: default_settings.generation_config = params.generation_config if params.pronunciation_dict_id is not None: @@ -749,7 +749,7 @@ class CartesiaHttpTTSService(TTSService): default_settings = self.Settings( model="sonic-3", voice=None, - language=language_to_cartesia_language(Language.EN), + language=Language.EN, generation_config=None, pronunciation_dict_id=None, ) @@ -767,7 +767,7 @@ class CartesiaHttpTTSService(TTSService): self._warn_init_param_moved_to_settings("params") if not settings: if params.language is not None: - default_settings.language = self.language_to_service_language(params.language) + default_settings.language = params.language if params.generation_config is not None: default_settings.generation_config = params.generation_config if params.pronunciation_dict_id is not None: diff --git a/src/pipecat/services/elevenlabs/stt.py b/src/pipecat/services/elevenlabs/stt.py index daca9be3d..aa7fd0659 100644 --- a/src/pipecat/services/elevenlabs/stt.py +++ b/src/pipecat/services/elevenlabs/stt.py @@ -272,7 +272,7 @@ class ElevenLabsSTTService(SegmentedSTTService): # 1. Initialize default_settings with hardcoded defaults default_settings = self.Settings( model="scribe_v2", - language=language_to_elevenlabs_language(Language.EN), + language=Language.EN, tag_audio_events=None, ) @@ -286,7 +286,7 @@ class ElevenLabsSTTService(SegmentedSTTService): self._warn_init_param_moved_to_settings("params") if not settings: if params.language is not None: - default_settings.language = language_to_elevenlabs_language(params.language) + default_settings.language = params.language default_settings.tag_audio_events = params.tag_audio_events # 4. Apply settings delta (canonical API, always wins) diff --git a/src/pipecat/services/elevenlabs/tts.py b/src/pipecat/services/elevenlabs/tts.py index 8cfd1abe2..866d0405f 100644 --- a/src/pipecat/services/elevenlabs/tts.py +++ b/src/pipecat/services/elevenlabs/tts.py @@ -449,7 +449,7 @@ class ElevenLabsTTSService(WebsocketTTSService): self._warn_init_param_moved_to_settings("params") if not settings: if params.language is not None: - default_settings.language = self.language_to_service_language(params.language) + default_settings.language = params.language if params.stability is not None: default_settings.stability = params.stability if params.similarity_boost is not None: @@ -1014,7 +1014,7 @@ class ElevenLabsHttpTTSService(TTSService): self._warn_init_param_moved_to_settings("params") if not settings: if params.language is not None: - default_settings.language = self.language_to_service_language(params.language) + default_settings.language = params.language if params.optimize_streaming_latency is not None: default_settings.optimize_streaming_latency = params.optimize_streaming_latency if params.stability is not None: diff --git a/src/pipecat/services/fal/stt.py b/src/pipecat/services/fal/stt.py index 7bfcfbcfd..65df7e3ab 100644 --- a/src/pipecat/services/fal/stt.py +++ b/src/pipecat/services/fal/stt.py @@ -216,7 +216,7 @@ class FalSTTService(SegmentedSTTService): # 1. Initialize default_settings with hardcoded defaults default_settings = self.Settings( model=None, - language=language_to_fal_language(Language.EN), + language=Language.EN, ) # 2. (no deprecated direct args for this service) @@ -226,7 +226,7 @@ class FalSTTService(SegmentedSTTService): self._warn_init_param_moved_to_settings("params") if not settings: if params.language is not None: - default_settings.language = language_to_fal_language(params.language) + default_settings.language = params.language if params.task != "transcribe": task = params.task if params.chunk_level != "segment": diff --git a/src/pipecat/services/google/tts.py b/src/pipecat/services/google/tts.py index 3455a8125..93053cc94 100644 --- a/src/pipecat/services/google/tts.py +++ b/src/pipecat/services/google/tts.py @@ -653,7 +653,7 @@ class GoogleHttpTTSService(TTSService): if params.emphasis is not None: default_settings.emphasis = params.emphasis if params.language is not None: - default_settings.language = self.language_to_service_language(params.language) + default_settings.language = params.language if params.gender is not None: default_settings.gender = params.gender if params.google_style is not None: @@ -1090,7 +1090,7 @@ class GoogleTTSService(GoogleBaseTTSService): self._warn_init_param_moved_to_settings("params") if not settings: if params.language is not None: - default_settings.language = self.language_to_service_language(params.language) + default_settings.language = params.language if params.speaking_rate is not None: default_settings.speaking_rate = params.speaking_rate @@ -1346,7 +1346,7 @@ class GeminiTTSService(GoogleBaseTTSService): self._warn_init_param_moved_to_settings("params") if not settings: if params.language is not None: - default_settings.language = self.language_to_service_language(params.language) + default_settings.language = params.language if params.prompt is not None: default_settings.prompt = params.prompt if params.multi_speaker is not None: diff --git a/src/pipecat/services/groq/stt.py b/src/pipecat/services/groq/stt.py index 6ff4075ae..3f6c23774 100644 --- a/src/pipecat/services/groq/stt.py +++ b/src/pipecat/services/groq/stt.py @@ -85,7 +85,7 @@ class GroqSTTService(BaseWhisperSTTService): # --- 1. Hardcoded defaults --- default_settings = self.Settings( model="whisper-large-v3-turbo", - language=self.language_to_service_language(Language.EN), + language=Language.EN, prompt=None, temperature=None, ) @@ -96,7 +96,7 @@ class GroqSTTService(BaseWhisperSTTService): default_settings.model = model if language is not None: self._warn_init_param_moved_to_settings("language", "language") - default_settings.language = self.language_to_service_language(language) + default_settings.language = language if prompt is not None: self._warn_init_param_moved_to_settings("prompt", "prompt") default_settings.prompt = prompt diff --git a/src/pipecat/services/kokoro/tts.py b/src/pipecat/services/kokoro/tts.py index 34e703dab..4756d4e74 100644 --- a/src/pipecat/services/kokoro/tts.py +++ b/src/pipecat/services/kokoro/tts.py @@ -150,7 +150,7 @@ class KokoroTTSService(TTSService): default_settings = self.Settings( model=None, voice=None, - language=language_to_kokoro_language(Language.EN), + language=Language.EN, ) # 2. Apply direct init arg overrides (deprecated) @@ -162,7 +162,7 @@ class KokoroTTSService(TTSService): if params is not None: self._warn_init_param_moved_to_settings("params") if not settings: - default_settings.language = language_to_kokoro_language(params.language) + default_settings.language = params.language # 4. Apply settings delta (canonical API, always wins) if settings is not None: diff --git a/src/pipecat/services/lmnt/tts.py b/src/pipecat/services/lmnt/tts.py index 0ca91a107..29d0b60ca 100644 --- a/src/pipecat/services/lmnt/tts.py +++ b/src/pipecat/services/lmnt/tts.py @@ -114,6 +114,10 @@ class LmntTTSService(InterruptibleTTSService): sample_rate: Audio sample rate. If None, uses default. language: Language for synthesis. Defaults to English. + + .. deprecated:: 0.0.106 + Use ``settings=LmntTTSService.Settings(language=...)`` instead. + output_format: Audio output format. One of "pcm_s16le", "pcm_f32le", "mp3", "ulaw", "webm". Defaults to "pcm_s16le". model: TTS model to use. @@ -129,13 +133,16 @@ class LmntTTSService(InterruptibleTTSService): default_settings = self.Settings( model="aurora", voice=None, - language=self.language_to_service_language(language), + language=Language.EN, ) # 2. Apply direct init arg overrides (deprecated) if voice_id is not None: self._warn_init_param_moved_to_settings("voice_id", "voice") default_settings.voice = voice_id + if language is not None: + self._warn_init_param_moved_to_settings("language", "language") + default_settings.language = language if model is not None: self._warn_init_param_moved_to_settings("model", "model") default_settings.model = model diff --git a/src/pipecat/services/neuphonic/tts.py b/src/pipecat/services/neuphonic/tts.py index b345a0ff4..6fb9d3e28 100644 --- a/src/pipecat/services/neuphonic/tts.py +++ b/src/pipecat/services/neuphonic/tts.py @@ -153,7 +153,7 @@ class NeuphonicTTSService(InterruptibleTTSService): default_settings = self.Settings( model=None, voice=None, - language=self.language_to_service_language(Language.EN), + language=Language.EN, speed=1.0, ) @@ -167,7 +167,7 @@ class NeuphonicTTSService(InterruptibleTTSService): self._warn_init_param_moved_to_settings("params") if not settings: if params.language is not None: - default_settings.language = self.language_to_service_language(params.language) + default_settings.language = params.language if params.speed is not None: default_settings.speed = params.speed @@ -487,7 +487,7 @@ class NeuphonicHttpTTSService(TTSService): default_settings = self.Settings( model=None, voice=None, - language=self.language_to_service_language(Language.EN), + language=Language.EN, speed=1.0, ) @@ -501,7 +501,7 @@ class NeuphonicHttpTTSService(TTSService): self._warn_init_param_moved_to_settings("params") if not settings: if params.language is not None: - default_settings.language = self.language_to_service_language(params.language) + default_settings.language = params.language if params.speed is not None: default_settings.speed = params.speed diff --git a/src/pipecat/services/nvidia/stt.py b/src/pipecat/services/nvidia/stt.py index fcca14741..50a654191 100644 --- a/src/pipecat/services/nvidia/stt.py +++ b/src/pipecat/services/nvidia/stt.py @@ -503,7 +503,7 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): # 1. Initialize default_settings with hardcoded defaults default_settings = self.Settings( model=model_function_map.get("model_name"), - language=language_to_nvidia_riva_language(Language.EN_US) or "en-US", + language=Language.EN_US, profanity_filter=False, automatic_punctuation=True, verbatim_transcripts=False, @@ -517,9 +517,7 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): if params is not None: self._warn_init_param_moved_to_settings("params") if not settings: - default_settings.language = ( - language_to_nvidia_riva_language(params.language or Language.EN_US) or "en-US" - ) + default_settings.language = params.language or Language.EN_US default_settings.profanity_filter = params.profanity_filter default_settings.automatic_punctuation = params.automatic_punctuation default_settings.verbatim_transcripts = params.verbatim_transcripts diff --git a/src/pipecat/services/openai/stt.py b/src/pipecat/services/openai/stt.py index 39ca60b25..68ab7e067 100644 --- a/src/pipecat/services/openai/stt.py +++ b/src/pipecat/services/openai/stt.py @@ -119,7 +119,7 @@ class OpenAISTTService(BaseWhisperSTTService): _language = language or Language.EN default_settings = self.Settings( model="gpt-4o-transcribe", - language=self.language_to_service_language(_language), + language=_language, prompt=None, temperature=None, ) diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index 24a5d152b..aec5630b6 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -251,9 +251,7 @@ class RimeTTSService(WebsocketTTSService): if params is not None: self._warn_init_param_moved_to_settings("params") if not settings: - default_settings.language = ( - self.language_to_service_language(params.language) if params.language else None - ) + default_settings.language = params.language default_settings.segment = params.segment default_settings.speedAlpha = params.speed_alpha # Arcana params @@ -754,9 +752,7 @@ class RimeHttpTTSService(TTSService): if params is not None: self._warn_init_param_moved_to_settings("params") if not settings: - default_settings.language = ( - self.language_to_service_language(params.language) if params.language else "eng" - ) + default_settings.language = params.language default_settings.speedAlpha = params.speed_alpha default_settings.reduceLatency = params.reduce_latency default_settings.pauseBetweenBrackets = params.pause_between_brackets @@ -984,9 +980,7 @@ class RimeNonJsonTTSService(InterruptibleTTSService): if params is not None: self._warn_init_param_moved_to_settings("params") if not settings: - default_settings.language = ( - self.language_to_service_language(params.language) if params.language else None - ) + default_settings.language = params.language default_settings.segment = params.segment default_settings.repetition_penalty = params.repetition_penalty default_settings.temperature = params.temperature diff --git a/src/pipecat/services/sambanova/stt.py b/src/pipecat/services/sambanova/stt.py index d3a77b4eb..5cf12d771 100644 --- a/src/pipecat/services/sambanova/stt.py +++ b/src/pipecat/services/sambanova/stt.py @@ -82,7 +82,7 @@ class SambaNovaSTTService(BaseWhisperSTTService): # type: ignore # --- 1. Hardcoded defaults --- default_settings = self.Settings( model="Whisper-Large-v3", - language=self.language_to_service_language(Language.EN), + language=Language.EN, prompt=None, temperature=None, ) @@ -93,7 +93,7 @@ class SambaNovaSTTService(BaseWhisperSTTService): # type: ignore default_settings.model = model if language is not None: self._warn_init_param_moved_to_settings("language", "language") - default_settings.language = self.language_to_service_language(language) + default_settings.language = language if prompt is not None: self._warn_init_param_moved_to_settings("prompt", "prompt") default_settings.prompt = prompt diff --git a/src/pipecat/services/sarvam/tts.py b/src/pipecat/services/sarvam/tts.py index 91ee088cf..c926270de 100644 --- a/src/pipecat/services/sarvam/tts.py +++ b/src/pipecat/services/sarvam/tts.py @@ -473,9 +473,7 @@ class SarvamHttpTTSService(TTSService): self._warn_init_param_moved_to_settings("params") if not settings: if params.language is not None: - default_settings.language = ( - self.language_to_service_language(params.language) or "en-IN" - ) + default_settings.language = params.language if params.enable_preprocessing is not None: default_settings.enable_preprocessing = params.enable_preprocessing if params.pace is not None: @@ -491,10 +489,6 @@ class SarvamHttpTTSService(TTSService): if settings is not None: default_settings.apply_update(settings) - # Convert Language enum to service-specific string - if isinstance(default_settings.language, Language): - default_settings.language = self.language_to_service_language(default_settings.language) - # Get model configuration (validates model exists) resolved_model = default_settings.model if resolved_model not in TTS_MODEL_CONFIGS: @@ -889,9 +883,7 @@ class SarvamTTSService(InterruptibleTTSService): self._warn_init_param_moved_to_settings("params") if not settings: if params.language is not None: - default_settings.language = ( - self.language_to_service_language(params.language) or "en-IN" - ) + default_settings.language = params.language if params.enable_preprocessing is not None: default_settings.enable_preprocessing = params.enable_preprocessing if params.min_buffer_size is not None: @@ -915,10 +907,6 @@ class SarvamTTSService(InterruptibleTTSService): if settings is not None: default_settings.apply_update(settings) - # Convert Language enum to service-specific string - if isinstance(default_settings.language, Language): - default_settings.language = self.language_to_service_language(default_settings.language) - # Get model configuration (validates model exists) resolved_model = default_settings.model if resolved_model not in TTS_MODEL_CONFIGS: diff --git a/src/pipecat/services/stt_service.py b/src/pipecat/services/stt_service.py index a16aa0eaa..c442c41eb 100644 --- a/src/pipecat/services/stt_service.py +++ b/src/pipecat/services/stt_service.py @@ -120,6 +120,15 @@ class STTService(AIService): or STTSettings(), **kwargs, ) + + # Convert Language enum to service-specific format at init time. + # Runtime updates are handled by _update_settings(), but init-time + # settings bypass that path and need explicit conversion. + if isinstance(self._settings.language, Language): + converted = self.language_to_service_language(self._settings.language) + if converted is not None: + self._settings.language = converted + self._audio_passthrough = audio_passthrough self._init_sample_rate = sample_rate self._sample_rate = 0 diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index 44fb98826..e2a9190ab 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -245,6 +245,14 @@ class TTSService(AIService): **kwargs, ) + # Convert Language enum to service-specific format at init time. + # Runtime updates are handled by _update_settings(), but init-time + # settings bypass that path and need explicit conversion. + if isinstance(self._settings.language, Language): + converted = self.language_to_service_language(self._settings.language) + if converted is not None: + self._settings.language = converted + # Resolve text_aggregation_mode from the new param or deprecated aggregate_sentences if aggregate_sentences is not None: import warnings diff --git a/src/pipecat/services/whisper/base_stt.py b/src/pipecat/services/whisper/base_stt.py index 93bb5de34..33d19b0aa 100644 --- a/src/pipecat/services/whisper/base_stt.py +++ b/src/pipecat/services/whisper/base_stt.py @@ -194,7 +194,7 @@ class BaseWhisperSTTService(SegmentedSTTService): default_settings.model = model if language is not None: self._warn_init_param_moved_to_settings("language", "language") - default_settings.language = self.language_to_service_language(language) + default_settings.language = language if prompt is not None: self._warn_init_param_moved_to_settings("prompt", "prompt") default_settings.prompt = prompt diff --git a/src/pipecat/services/xtts/tts.py b/src/pipecat/services/xtts/tts.py index 099fce65c..b164f8945 100644 --- a/src/pipecat/services/xtts/tts.py +++ b/src/pipecat/services/xtts/tts.py @@ -108,6 +108,10 @@ class XTTSService(TTSService): base_url: Base URL of the XTTS streaming server. aiohttp_session: HTTP session for making requests to the server. language: Language for synthesis. Defaults to English. + + .. deprecated:: 0.0.106 + Use ``settings=XTTSService.Settings(language=...)`` instead. + sample_rate: Audio sample rate. If None, uses default. settings: Runtime-updatable settings. When provided alongside deprecated parameters, ``settings`` values take precedence. @@ -117,13 +121,16 @@ class XTTSService(TTSService): default_settings = self.Settings( model=None, voice=None, - language=self.language_to_service_language(language), + language=Language.EN, ) # 2. Apply direct init arg overrides (deprecated) if voice_id is not None: self._warn_init_param_moved_to_settings("voice_id", "voice") default_settings.voice = voice_id + if language is not None: + self._warn_init_param_moved_to_settings("language", "language") + default_settings.language = language # 3. (No step 3, as there's no params object to apply)