Merge pull request #4024 from pipecat-ai/mb/fix-lang-enum-stt-tts

This commit is contained in:
Mark Backman
2026-03-16 21:08:48 -04:00
committed by GitHub
25 changed files with 76 additions and 82 deletions

1
changelog/4024.fixed.md Normal file
View File

@@ -0,0 +1 @@
- Fixed `Language` enum values (e.g. `Language.ES`) not being converted to service-specific codes when passed via `settings=Service.Settings(language=Language.ES)` at init time. This caused API errors (e.g. 400 from Rime) because the raw enum was sent instead of the expected language code (e.g. `"spa"`). Runtime updates via `UpdateSettingsFrame` were unaffected. The fix centralizes conversion in the base `TTSService` and `STTService` classes so all services handle this consistently.

View File

@@ -171,9 +171,7 @@ class AsyncAITTSService(WebsocketTTSService):
if params is not None:
self._warn_init_param_moved_to_settings("params")
if not settings:
default_settings.language = (
self.language_to_service_language(params.language) if params.language else None
)
default_settings.language = params.language
# 4. Apply settings delta (canonical API, always wins)
if settings is not None:
@@ -565,9 +563,7 @@ class AsyncAIHttpTTSService(TTSService):
if params is not None:
self._warn_init_param_moved_to_settings("params")
if not settings:
default_settings.language = (
self.language_to_service_language(params.language) if params.language else None
)
default_settings.language = params.language
# 4. Apply settings delta (canonical API, always wins)
if settings is not None:

View File

@@ -100,13 +100,13 @@ class AWSTranscribeSTTService(WebsocketSTTService):
# 1. Initialize default_settings with hardcoded defaults
default_settings = self.Settings(
model=None,
language=self.language_to_service_language(Language.EN),
language=Language.EN,
)
# 2. Apply direct init arg overrides (deprecated)
if language is not None:
self._warn_init_param_moved_to_settings("language", "language")
default_settings.language = self.language_to_service_language(language)
default_settings.language = language
# 3. (No step 3, as there's no params object to apply)

View File

@@ -230,11 +230,7 @@ class AWSPollyTTSService(TTSService):
self._warn_init_param_moved_to_settings("params")
if not settings:
default_settings.engine = params.engine
default_settings.language = (
self.language_to_service_language(params.language)
if params.language
else "en-US"
)
default_settings.language = params.language if params.language else "en-US"
default_settings.pitch = params.pitch
default_settings.rate = params.rate
default_settings.volume = params.volume

View File

@@ -106,13 +106,13 @@ class AzureSTTService(STTService):
# 1. Initialize default_settings with hardcoded defaults
default_settings = self.Settings(
model=None,
language=language_to_azure_language(Language.EN_US),
language=Language.EN_US,
)
# 2. Apply direct init arg overrides (deprecated)
if language is not None and language != Language.EN_US:
self._warn_init_param_moved_to_settings("language", "language")
default_settings.language = language_to_azure_language(language)
default_settings.language = language
# 3. (No step 3, as there's no params object to apply)

View File

@@ -312,11 +312,7 @@ class AzureTTSService(TTSService, AzureBaseTTSService):
self._warn_init_param_moved_to_settings("params")
if not settings:
default_settings.emphasis = params.emphasis
default_settings.language = (
self.language_to_service_language(params.language)
if params.language
else "en-US"
)
default_settings.language = params.language if params.language else "en-US"
default_settings.pitch = params.pitch
default_settings.rate = params.rate
default_settings.role = params.role
@@ -809,11 +805,7 @@ class AzureHttpTTSService(TTSService, AzureBaseTTSService):
self._warn_init_param_moved_to_settings("params")
if not settings:
default_settings.emphasis = params.emphasis
default_settings.language = (
self.language_to_service_language(params.language)
if params.language
else "en-US"
)
default_settings.language = params.language if params.language else "en-US"
default_settings.pitch = params.pitch
default_settings.rate = params.rate
default_settings.role = params.role

View File

@@ -260,9 +260,7 @@ class CambTTSService(TTSService):
self._warn_init_param_moved_to_settings("params")
if not settings:
if params.language is not None:
default_settings.language = (
self.language_to_service_language(params.language) or "en-us"
)
default_settings.language = params.language
if params.user_instructions is not None:
default_settings.user_instructions = params.user_instructions

View File

@@ -302,7 +302,7 @@ class CartesiaTTSService(WebsocketTTSService):
default_settings = self.Settings(
model="sonic-3",
voice=None,
language=language_to_cartesia_language(Language.EN),
language=Language.EN,
generation_config=None,
pronunciation_dict_id=None,
)
@@ -320,7 +320,7 @@ class CartesiaTTSService(WebsocketTTSService):
self._warn_init_param_moved_to_settings("params")
if not settings:
if params.language is not None:
default_settings.language = self.language_to_service_language(params.language)
default_settings.language = params.language
if params.generation_config is not None:
default_settings.generation_config = params.generation_config
if params.pronunciation_dict_id is not None:
@@ -749,7 +749,7 @@ class CartesiaHttpTTSService(TTSService):
default_settings = self.Settings(
model="sonic-3",
voice=None,
language=language_to_cartesia_language(Language.EN),
language=Language.EN,
generation_config=None,
pronunciation_dict_id=None,
)
@@ -767,7 +767,7 @@ class CartesiaHttpTTSService(TTSService):
self._warn_init_param_moved_to_settings("params")
if not settings:
if params.language is not None:
default_settings.language = self.language_to_service_language(params.language)
default_settings.language = params.language
if params.generation_config is not None:
default_settings.generation_config = params.generation_config
if params.pronunciation_dict_id is not None:

View File

@@ -272,7 +272,7 @@ class ElevenLabsSTTService(SegmentedSTTService):
# 1. Initialize default_settings with hardcoded defaults
default_settings = self.Settings(
model="scribe_v2",
language=language_to_elevenlabs_language(Language.EN),
language=Language.EN,
tag_audio_events=None,
)
@@ -286,7 +286,7 @@ class ElevenLabsSTTService(SegmentedSTTService):
self._warn_init_param_moved_to_settings("params")
if not settings:
if params.language is not None:
default_settings.language = language_to_elevenlabs_language(params.language)
default_settings.language = params.language
default_settings.tag_audio_events = params.tag_audio_events
# 4. Apply settings delta (canonical API, always wins)

View File

@@ -449,7 +449,7 @@ class ElevenLabsTTSService(WebsocketTTSService):
self._warn_init_param_moved_to_settings("params")
if not settings:
if params.language is not None:
default_settings.language = self.language_to_service_language(params.language)
default_settings.language = params.language
if params.stability is not None:
default_settings.stability = params.stability
if params.similarity_boost is not None:
@@ -1014,7 +1014,7 @@ class ElevenLabsHttpTTSService(TTSService):
self._warn_init_param_moved_to_settings("params")
if not settings:
if params.language is not None:
default_settings.language = self.language_to_service_language(params.language)
default_settings.language = params.language
if params.optimize_streaming_latency is not None:
default_settings.optimize_streaming_latency = params.optimize_streaming_latency
if params.stability is not None:

View File

@@ -216,7 +216,7 @@ class FalSTTService(SegmentedSTTService):
# 1. Initialize default_settings with hardcoded defaults
default_settings = self.Settings(
model=None,
language=language_to_fal_language(Language.EN),
language=Language.EN,
)
# 2. (no deprecated direct args for this service)
@@ -226,7 +226,7 @@ class FalSTTService(SegmentedSTTService):
self._warn_init_param_moved_to_settings("params")
if not settings:
if params.language is not None:
default_settings.language = language_to_fal_language(params.language)
default_settings.language = params.language
if params.task != "transcribe":
task = params.task
if params.chunk_level != "segment":

View File

@@ -653,7 +653,7 @@ class GoogleHttpTTSService(TTSService):
if params.emphasis is not None:
default_settings.emphasis = params.emphasis
if params.language is not None:
default_settings.language = self.language_to_service_language(params.language)
default_settings.language = params.language
if params.gender is not None:
default_settings.gender = params.gender
if params.google_style is not None:
@@ -1090,7 +1090,7 @@ class GoogleTTSService(GoogleBaseTTSService):
self._warn_init_param_moved_to_settings("params")
if not settings:
if params.language is not None:
default_settings.language = self.language_to_service_language(params.language)
default_settings.language = params.language
if params.speaking_rate is not None:
default_settings.speaking_rate = params.speaking_rate
@@ -1346,7 +1346,7 @@ class GeminiTTSService(GoogleBaseTTSService):
self._warn_init_param_moved_to_settings("params")
if not settings:
if params.language is not None:
default_settings.language = self.language_to_service_language(params.language)
default_settings.language = params.language
if params.prompt is not None:
default_settings.prompt = params.prompt
if params.multi_speaker is not None:

View File

@@ -85,7 +85,7 @@ class GroqSTTService(BaseWhisperSTTService):
# --- 1. Hardcoded defaults ---
default_settings = self.Settings(
model="whisper-large-v3-turbo",
language=self.language_to_service_language(Language.EN),
language=Language.EN,
prompt=None,
temperature=None,
)
@@ -96,7 +96,7 @@ class GroqSTTService(BaseWhisperSTTService):
default_settings.model = model
if language is not None:
self._warn_init_param_moved_to_settings("language", "language")
default_settings.language = self.language_to_service_language(language)
default_settings.language = language
if prompt is not None:
self._warn_init_param_moved_to_settings("prompt", "prompt")
default_settings.prompt = prompt

View File

@@ -150,7 +150,7 @@ class KokoroTTSService(TTSService):
default_settings = self.Settings(
model=None,
voice=None,
language=language_to_kokoro_language(Language.EN),
language=Language.EN,
)
# 2. Apply direct init arg overrides (deprecated)
@@ -162,7 +162,7 @@ class KokoroTTSService(TTSService):
if params is not None:
self._warn_init_param_moved_to_settings("params")
if not settings:
default_settings.language = language_to_kokoro_language(params.language)
default_settings.language = params.language
# 4. Apply settings delta (canonical API, always wins)
if settings is not None:

View File

@@ -114,6 +114,10 @@ class LmntTTSService(InterruptibleTTSService):
sample_rate: Audio sample rate. If None, uses default.
language: Language for synthesis. Defaults to English.
.. deprecated:: 0.0.106
Use ``settings=LmntTTSService.Settings(language=...)`` instead.
output_format: Audio output format. One of "pcm_s16le", "pcm_f32le",
"mp3", "ulaw", "webm". Defaults to "pcm_s16le".
model: TTS model to use.
@@ -129,13 +133,16 @@ class LmntTTSService(InterruptibleTTSService):
default_settings = self.Settings(
model="aurora",
voice=None,
language=self.language_to_service_language(language),
language=Language.EN,
)
# 2. Apply direct init arg overrides (deprecated)
if voice_id is not None:
self._warn_init_param_moved_to_settings("voice_id", "voice")
default_settings.voice = voice_id
if language is not None:
self._warn_init_param_moved_to_settings("language", "language")
default_settings.language = language
if model is not None:
self._warn_init_param_moved_to_settings("model", "model")
default_settings.model = model

View File

@@ -153,7 +153,7 @@ class NeuphonicTTSService(InterruptibleTTSService):
default_settings = self.Settings(
model=None,
voice=None,
language=self.language_to_service_language(Language.EN),
language=Language.EN,
speed=1.0,
)
@@ -167,7 +167,7 @@ class NeuphonicTTSService(InterruptibleTTSService):
self._warn_init_param_moved_to_settings("params")
if not settings:
if params.language is not None:
default_settings.language = self.language_to_service_language(params.language)
default_settings.language = params.language
if params.speed is not None:
default_settings.speed = params.speed
@@ -487,7 +487,7 @@ class NeuphonicHttpTTSService(TTSService):
default_settings = self.Settings(
model=None,
voice=None,
language=self.language_to_service_language(Language.EN),
language=Language.EN,
speed=1.0,
)
@@ -501,7 +501,7 @@ class NeuphonicHttpTTSService(TTSService):
self._warn_init_param_moved_to_settings("params")
if not settings:
if params.language is not None:
default_settings.language = self.language_to_service_language(params.language)
default_settings.language = params.language
if params.speed is not None:
default_settings.speed = params.speed

View File

@@ -503,7 +503,7 @@ class NvidiaSegmentedSTTService(SegmentedSTTService):
# 1. Initialize default_settings with hardcoded defaults
default_settings = self.Settings(
model=model_function_map.get("model_name"),
language=language_to_nvidia_riva_language(Language.EN_US) or "en-US",
language=Language.EN_US,
profanity_filter=False,
automatic_punctuation=True,
verbatim_transcripts=False,
@@ -517,9 +517,7 @@ class NvidiaSegmentedSTTService(SegmentedSTTService):
if params is not None:
self._warn_init_param_moved_to_settings("params")
if not settings:
default_settings.language = (
language_to_nvidia_riva_language(params.language or Language.EN_US) or "en-US"
)
default_settings.language = params.language or Language.EN_US
default_settings.profanity_filter = params.profanity_filter
default_settings.automatic_punctuation = params.automatic_punctuation
default_settings.verbatim_transcripts = params.verbatim_transcripts

View File

@@ -119,7 +119,7 @@ class OpenAISTTService(BaseWhisperSTTService):
_language = language or Language.EN
default_settings = self.Settings(
model="gpt-4o-transcribe",
language=self.language_to_service_language(_language),
language=_language,
prompt=None,
temperature=None,
)

View File

@@ -251,9 +251,7 @@ class RimeTTSService(WebsocketTTSService):
if params is not None:
self._warn_init_param_moved_to_settings("params")
if not settings:
default_settings.language = (
self.language_to_service_language(params.language) if params.language else None
)
default_settings.language = params.language
default_settings.segment = params.segment
default_settings.speedAlpha = params.speed_alpha
# Arcana params
@@ -754,9 +752,7 @@ class RimeHttpTTSService(TTSService):
if params is not None:
self._warn_init_param_moved_to_settings("params")
if not settings:
default_settings.language = (
self.language_to_service_language(params.language) if params.language else "eng"
)
default_settings.language = params.language
default_settings.speedAlpha = params.speed_alpha
default_settings.reduceLatency = params.reduce_latency
default_settings.pauseBetweenBrackets = params.pause_between_brackets
@@ -984,9 +980,7 @@ class RimeNonJsonTTSService(InterruptibleTTSService):
if params is not None:
self._warn_init_param_moved_to_settings("params")
if not settings:
default_settings.language = (
self.language_to_service_language(params.language) if params.language else None
)
default_settings.language = params.language
default_settings.segment = params.segment
default_settings.repetition_penalty = params.repetition_penalty
default_settings.temperature = params.temperature

View File

@@ -82,7 +82,7 @@ class SambaNovaSTTService(BaseWhisperSTTService): # type: ignore
# --- 1. Hardcoded defaults ---
default_settings = self.Settings(
model="Whisper-Large-v3",
language=self.language_to_service_language(Language.EN),
language=Language.EN,
prompt=None,
temperature=None,
)
@@ -93,7 +93,7 @@ class SambaNovaSTTService(BaseWhisperSTTService): # type: ignore
default_settings.model = model
if language is not None:
self._warn_init_param_moved_to_settings("language", "language")
default_settings.language = self.language_to_service_language(language)
default_settings.language = language
if prompt is not None:
self._warn_init_param_moved_to_settings("prompt", "prompt")
default_settings.prompt = prompt

View File

@@ -473,9 +473,7 @@ class SarvamHttpTTSService(TTSService):
self._warn_init_param_moved_to_settings("params")
if not settings:
if params.language is not None:
default_settings.language = (
self.language_to_service_language(params.language) or "en-IN"
)
default_settings.language = params.language
if params.enable_preprocessing is not None:
default_settings.enable_preprocessing = params.enable_preprocessing
if params.pace is not None:
@@ -491,10 +489,6 @@ class SarvamHttpTTSService(TTSService):
if settings is not None:
default_settings.apply_update(settings)
# Convert Language enum to service-specific string
if isinstance(default_settings.language, Language):
default_settings.language = self.language_to_service_language(default_settings.language)
# Get model configuration (validates model exists)
resolved_model = default_settings.model
if resolved_model not in TTS_MODEL_CONFIGS:
@@ -889,9 +883,7 @@ class SarvamTTSService(InterruptibleTTSService):
self._warn_init_param_moved_to_settings("params")
if not settings:
if params.language is not None:
default_settings.language = (
self.language_to_service_language(params.language) or "en-IN"
)
default_settings.language = params.language
if params.enable_preprocessing is not None:
default_settings.enable_preprocessing = params.enable_preprocessing
if params.min_buffer_size is not None:
@@ -915,10 +907,6 @@ class SarvamTTSService(InterruptibleTTSService):
if settings is not None:
default_settings.apply_update(settings)
# Convert Language enum to service-specific string
if isinstance(default_settings.language, Language):
default_settings.language = self.language_to_service_language(default_settings.language)
# Get model configuration (validates model exists)
resolved_model = default_settings.model
if resolved_model not in TTS_MODEL_CONFIGS:

View File

@@ -120,6 +120,15 @@ class STTService(AIService):
or STTSettings(),
**kwargs,
)
# Convert Language enum to service-specific format at init time.
# Runtime updates are handled by _update_settings(), but init-time
# settings bypass that path and need explicit conversion.
if isinstance(self._settings.language, Language):
converted = self.language_to_service_language(self._settings.language)
if converted is not None:
self._settings.language = converted
self._audio_passthrough = audio_passthrough
self._init_sample_rate = sample_rate
self._sample_rate = 0

View File

@@ -245,6 +245,14 @@ class TTSService(AIService):
**kwargs,
)
# Convert Language enum to service-specific format at init time.
# Runtime updates are handled by _update_settings(), but init-time
# settings bypass that path and need explicit conversion.
if isinstance(self._settings.language, Language):
converted = self.language_to_service_language(self._settings.language)
if converted is not None:
self._settings.language = converted
# Resolve text_aggregation_mode from the new param or deprecated aggregate_sentences
if aggregate_sentences is not None:
import warnings

View File

@@ -194,7 +194,7 @@ class BaseWhisperSTTService(SegmentedSTTService):
default_settings.model = model
if language is not None:
self._warn_init_param_moved_to_settings("language", "language")
default_settings.language = self.language_to_service_language(language)
default_settings.language = language
if prompt is not None:
self._warn_init_param_moved_to_settings("prompt", "prompt")
default_settings.prompt = prompt

View File

@@ -108,6 +108,10 @@ class XTTSService(TTSService):
base_url: Base URL of the XTTS streaming server.
aiohttp_session: HTTP session for making requests to the server.
language: Language for synthesis. Defaults to English.
.. deprecated:: 0.0.106
Use ``settings=XTTSService.Settings(language=...)`` instead.
sample_rate: Audio sample rate. If None, uses default.
settings: Runtime-updatable settings. When provided alongside deprecated
parameters, ``settings`` values take precedence.
@@ -117,13 +121,16 @@ class XTTSService(TTSService):
default_settings = self.Settings(
model=None,
voice=None,
language=self.language_to_service_language(language),
language=Language.EN,
)
# 2. Apply direct init arg overrides (deprecated)
if voice_id is not None:
self._warn_init_param_moved_to_settings("voice_id", "voice")
default_settings.voice = voice_id
if language is not None:
self._warn_init_param_moved_to_settings("language", "language")
default_settings.language = language
# 3. (No step 3, as there's no params object to apply)