diff --git a/changelog/3946.added.md b/changelog/3946.added.md new file mode 100644 index 000000000..6aabfefb2 --- /dev/null +++ b/changelog/3946.added.md @@ -0,0 +1 @@ +- Runtime settings updates (via `STTUpdateSettingsFrame`) now work for AWS Transcribe, Azure, Cartesia, Deepgram, ElevenLabs Realtime, Gradium, and Soniox STT services. Previously, changing settings at runtime only stored the new values without reconnecting. diff --git a/src/pipecat/services/aws/stt.py b/src/pipecat/services/aws/stt.py index ca3b0140c..7355bb1b7 100644 --- a/src/pipecat/services/aws/stt.py +++ b/src/pipecat/services/aws/stt.py @@ -107,7 +107,7 @@ class AWSTranscribeSTTService(WebsocketSTTService): _warn_deprecated_param("language", AWSTranscribeSTTSettings, "language") default_settings.language = self.language_to_service_language(language) - # 3. No params to apply + # 3. (No step 3, as there's no params object to apply) # 4. Apply settings delta (canonical API, always wins) if settings is not None: @@ -161,7 +161,7 @@ class AWSTranscribeSTTService(WebsocketSTTService): """Apply a settings delta and reconnect if anything changed.""" changed = await super()._update_settings(delta) - if changed: + if changed and self._websocket: await self._disconnect() await self._connect() diff --git a/src/pipecat/services/azure/stt.py b/src/pipecat/services/azure/stt.py index f66cfeb42..f940ea9c0 100644 --- a/src/pipecat/services/azure/stt.py +++ b/src/pipecat/services/azure/stt.py @@ -112,7 +112,7 @@ class AzureSTTService(STTService): _warn_deprecated_param("language", AzureSTTSettings, "language") default_settings.language = language_to_azure_language(language) - # 3. No params to apply + # 3. (No step 3, as there's no params object to apply) # 4. Apply settings delta (canonical API, always wins) if settings is not None: @@ -166,8 +166,9 @@ class AzureSTTService(STTService): self._speech_config.speech_recognition_language = ( self._settings.language or language_to_azure_language(Language.EN_US) ) - await self._disconnect() - await self._connect() + if self._audio_stream: + await self._disconnect() + await self._connect() return changed diff --git a/src/pipecat/services/deepgram/stt.py b/src/pipecat/services/deepgram/stt.py index 17570911a..14020631b 100644 --- a/src/pipecat/services/deepgram/stt.py +++ b/src/pipecat/services/deepgram/stt.py @@ -365,7 +365,9 @@ class DeepgramSTTService(STTService): vad_events=False, ) - # 2. Apply live_options overrides — only if settings not provided + # 2. (No step 2, as there are no deprecated direct args) + + # 3. Apply live_options overrides — only if settings not provided if live_options is not None: _warn_deprecated_param("live_options", DeepgramSTTSettings) if not settings: @@ -402,7 +404,7 @@ class DeepgramSTTService(STTService): delta = DeepgramSTTSettings.from_mapping(lo_dict) default_settings.apply_update(delta) - # 3. Apply settings delta (canonical API, always wins) + # 4. Apply settings delta (canonical API, always wins) if settings is not None: default_settings.apply_update(settings) @@ -494,8 +496,9 @@ class DeepgramSTTService(STTService): if isinstance(self._settings, DeepgramSTTSettings): self._settings._sync_extra_to_fields() - await self._disconnect() - await self._connect() + if self._connection: + await self._disconnect() + await self._connect() return changed diff --git a/src/pipecat/services/deepgram/tts.py b/src/pipecat/services/deepgram/tts.py index 7749fc370..5d6e5ffdc 100644 --- a/src/pipecat/services/deepgram/tts.py +++ b/src/pipecat/services/deepgram/tts.py @@ -110,6 +110,8 @@ class DeepgramTTSService(WebsocketTTSService): default_settings.model = voice default_settings.voice = voice + # 3. (No step 3, as there's no params object to apply) + # 4. Apply settings delta (canonical API, always wins) if settings is not None: default_settings.apply_update(settings) @@ -423,6 +425,8 @@ class DeepgramHttpTTSService(TTSService): default_settings.model = voice default_settings.voice = voice + # 3. (No step 3, as there's no params object to apply) + # 4. Apply settings delta (canonical API, always wins) if settings is not None: default_settings.apply_update(settings) diff --git a/src/pipecat/services/elevenlabs/stt.py b/src/pipecat/services/elevenlabs/stt.py index 1802bb095..dff501d22 100644 --- a/src/pipecat/services/elevenlabs/stt.py +++ b/src/pipecat/services/elevenlabs/stt.py @@ -605,8 +605,9 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): if not changed: return changed - await self._disconnect() - await self._connect() + if self._websocket: + await self._disconnect() + await self._connect() return changed diff --git a/src/pipecat/services/gradium/stt.py b/src/pipecat/services/gradium/stt.py index 05a6bb8b5..814d478e4 100644 --- a/src/pipecat/services/gradium/stt.py +++ b/src/pipecat/services/gradium/stt.py @@ -12,7 +12,7 @@ WebSocket API for streaming audio transcription. import base64 import json -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Any, AsyncGenerator, Optional from loguru import logger @@ -28,7 +28,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import STTSettings, _warn_deprecated_param +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven, _warn_deprecated_param from pipecat.services.stt_latency import GRADIUM_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.transcriptions.language import Language, resolve_language @@ -77,7 +77,7 @@ class GradiumSTTSettings(STTSettings): Default is 10 (800ms). Lower values like 7-8 give faster response. """ - delay_in_frames: Optional[int] = None + delay_in_frames: Optional[int] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class GradiumSTTService(WebsocketSTTService): @@ -157,7 +157,9 @@ class GradiumSTTService(WebsocketSTTService): delay_in_frames=None, ) - # 2. Apply params overrides — only if settings not provided + # 2. (No step 2, as there are no deprecated direct args) + + # 3. Apply params overrides — only if settings not provided if params is not None: _warn_deprecated_param("params", GradiumSTTSettings) if not settings: @@ -165,7 +167,7 @@ class GradiumSTTService(WebsocketSTTService): if params.delay_in_frames is not None: default_settings.delay_in_frames = params.delay_in_frames - # 3. Apply settings delta (canonical API, always wins) + # 4. Apply settings delta (canonical API, always wins) if settings is not None: default_settings.apply_update(settings) @@ -213,8 +215,9 @@ class GradiumSTTService(WebsocketSTTService): if not changed: return changed - await self._disconnect() - await self._connect() + if self._websocket: + await self._disconnect() + await self._connect() return changed async def start(self, frame: StartFrame): diff --git a/src/pipecat/services/lmnt/tts.py b/src/pipecat/services/lmnt/tts.py index 3cc6778af..f6bf46649 100644 --- a/src/pipecat/services/lmnt/tts.py +++ b/src/pipecat/services/lmnt/tts.py @@ -139,7 +139,7 @@ class LmntTTSService(InterruptibleTTSService): _warn_deprecated_param("model", LmntTTSSettings, "model") default_settings.model = model - # 3. No params for this service + # 3. (No step 3, as there's no params object to apply) # 4. Apply settings delta (canonical API, always wins) if settings is not None: diff --git a/src/pipecat/services/piper/tts.py b/src/pipecat/services/piper/tts.py index 337dcd00c..fb7b627cd 100644 --- a/src/pipecat/services/piper/tts.py +++ b/src/pipecat/services/piper/tts.py @@ -82,7 +82,7 @@ class PiperTTSService(TTSService): _warn_deprecated_param("voice_id", PiperTTSSettings, "voice") default_settings.voice = voice_id - # 3. No params for this service + # 3. (No step 3, as there's no params object to apply) # 4. Apply settings delta (canonical API, always wins) if settings is not None: @@ -232,7 +232,7 @@ class PiperHttpTTSService(TTSService): _warn_deprecated_param("voice_id", PiperHttpTTSSettings, "voice") default_settings.voice = voice_id - # 3. No params for this service + # 3. (No step 3, as there's no params object to apply) # 4. Apply settings delta (canonical API, always wins) if settings is not None: diff --git a/src/pipecat/services/resembleai/tts.py b/src/pipecat/services/resembleai/tts.py index f0a93a182..9713cea44 100644 --- a/src/pipecat/services/resembleai/tts.py +++ b/src/pipecat/services/resembleai/tts.py @@ -94,7 +94,7 @@ class ResembleAITTSService(WebsocketTTSService): _warn_deprecated_param("voice_id", ResembleAITTSSettings, "voice") default_settings.voice = voice_id - # 3. No params for this service + # 3. (No step 3, as there's no params object to apply) # 4. Apply settings delta (canonical API, always wins) if settings is not None: diff --git a/src/pipecat/services/xtts/tts.py b/src/pipecat/services/xtts/tts.py index 4da8db72a..78ac6dfb8 100644 --- a/src/pipecat/services/xtts/tts.py +++ b/src/pipecat/services/xtts/tts.py @@ -124,6 +124,8 @@ class XTTSService(TTSService): _warn_deprecated_param("voice_id", XTTSTTSSettings, "voice") default_settings.voice = voice_id + # 3. (No step 3, as there's no params object to apply) + # 4. Apply settings delta (canonical API, always wins) if settings is not None: default_settings.apply_update(settings) diff --git a/tests/test_service_init.py b/tests/test_service_init.py index 67dcbb324..377300f64 100644 --- a/tests/test_service_init.py +++ b/tests/test_service_init.py @@ -34,7 +34,6 @@ new services are covered automatically with no per-service maintenance. import importlib import inspect import pkgutil -import warnings from dataclasses import fields import pytest