diff --git a/changelog/3946.added.md b/changelog/3946.added.md new file mode 100644 index 000000000..6aabfefb2 --- /dev/null +++ b/changelog/3946.added.md @@ -0,0 +1 @@ +- Runtime settings updates (via `STTUpdateSettingsFrame`) now work for AWS Transcribe, Azure, Cartesia, Deepgram, ElevenLabs Realtime, Gradium, and Soniox STT services. Previously, changing settings at runtime only stored the new values without reconnecting. diff --git a/examples/foundational/07i-interruptible-xtts.py b/examples/foundational/07i-interruptible-xtts.py index 40845fac0..c5bb61419 100644 --- a/examples/foundational/07i-interruptible-xtts.py +++ b/examples/foundational/07i-interruptible-xtts.py @@ -25,7 +25,7 @@ from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService, OpenAILLMSettings -from pipecat.services.xtts.tts import XTTSService, XTTSSettings +from pipecat.services.xtts.tts import XTTSService, XTTSTTSSettings from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams @@ -59,7 +59,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): tts = XTTSService( aiohttp_session=session, - settings=XTTSSettings( + settings=XTTSTTSSettings( voice="Claribel Dervla", ), base_url="http://localhost:8000", diff --git a/examples/foundational/07m-interruptible-aws-strands.py b/examples/foundational/07m-interruptible-aws-strands.py index c65709a7b..e5c6f6f01 100644 --- a/examples/foundational/07m-interruptible-aws-strands.py +++ b/examples/foundational/07m-interruptible-aws-strands.py @@ -104,7 +104,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): # Create Strands agent processor try: - agent = build_agent(model_id="us.anthropic.claude-3-5-haiku-20241022-v1:0", max_tokens=8000) + agent = build_agent(model_id="us.anthropic.claude-sonnet-4-6", max_tokens=8000) llm = StrandsAgentsProcessor(agent=agent) logger.info("Successfully created Strands agent for NAB customer service coaching") except Exception as e: @@ -152,7 +152,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages=[ { "role": "user", - "content": f"Greet the user and introduce yourself.", + "content": f"Greet the user and introduce yourself. Don't use emojis.", } ], run_llm=True, diff --git a/examples/foundational/07m-interruptible-aws.py b/examples/foundational/07m-interruptible-aws.py index 9cea445c9..8ddec82ef 100644 --- a/examples/foundational/07m-interruptible-aws.py +++ b/examples/foundational/07m-interruptible-aws.py @@ -64,7 +64,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): llm = AWSBedrockLLMService( aws_region="us-west-2", settings=AWSBedrockLLMSettings( - model="us.anthropic.claude-haiku-4-5-20251001-v1:0", + model="us.anthropic.claude-sonnet-4-6", temperature=0.8, system_instruction="You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", ), diff --git a/examples/foundational/07n-interruptible-gemini-image.py b/examples/foundational/07n-interruptible-gemini-image.py index 96c7cfd47..8369f1de6 100644 --- a/examples/foundational/07n-interruptible-gemini-image.py +++ b/examples/foundational/07n-interruptible-gemini-image.py @@ -72,7 +72,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): stt = GoogleSTTService( credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"), settings=GoogleSTTSettings( - languages=Language.EN_US, + languages=[Language.EN_US], ), ) diff --git a/examples/foundational/07n-interruptible-gemini.py b/examples/foundational/07n-interruptible-gemini.py index a1d58a1cd..92220bce5 100644 --- a/examples/foundational/07n-interruptible-gemini.py +++ b/examples/foundational/07n-interruptible-gemini.py @@ -55,7 +55,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): stt = GoogleSTTService( settings=GoogleSTTSettings( - languages=Language.EN_US, + languages=[Language.EN_US], ), credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"), ) diff --git a/examples/foundational/07n-interruptible-google-http.py b/examples/foundational/07n-interruptible-google-http.py index a607653f7..b137bcd7f 100644 --- a/examples/foundational/07n-interruptible-google-http.py +++ b/examples/foundational/07n-interruptible-google-http.py @@ -55,8 +55,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): stt = GoogleSTTService( settings=GoogleSTTSettings( - languages=Language.EN_US, - model="chirp_3", + languages=[Language.EN_US], + # Add model to use a specific model + # model="chirp_3", ), credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"), location="us", diff --git a/examples/foundational/07t-interruptible-fish.py b/examples/foundational/07t-interruptible-fish.py index e7aeb993b..caeab20c6 100644 --- a/examples/foundational/07t-interruptible-fish.py +++ b/examples/foundational/07t-interruptible-fish.py @@ -58,7 +58,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): tts = FishAudioTTSService( api_key=os.getenv("FISH_API_KEY"), settings=FishAudioTTSSettings( - model="4ce7e917cedd4bc2bb2e6ff3a46acaa1", # Barack Obama + voice="4ce7e917cedd4bc2bb2e6ff3a46acaa1", # Barack Obama ), ) diff --git a/examples/foundational/07z-interruptible-sarvam-http.py b/examples/foundational/07z-interruptible-sarvam-http.py index 4a0baa65c..ac6ece100 100644 --- a/examples/foundational/07z-interruptible-sarvam-http.py +++ b/examples/foundational/07z-interruptible-sarvam-http.py @@ -24,7 +24,7 @@ from pipecat.processors.aggregators.llm_response_universal import ( from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.openai.llm import OpenAILLMService, OpenAILLMSettings -from pipecat.services.sarvam.stt import SarvamSTTService +from pipecat.services.sarvam.stt import SarvamSTTService, SarvamSTTSettings from pipecat.services.sarvam.tts import SarvamHttpTTSService, SarvamHttpTTSSettings from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams @@ -59,14 +59,16 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): async with aiohttp.ClientSession() as session: stt = SarvamSTTService( api_key=os.getenv("SARVAM_API_KEY"), - model="saarika:v2.5", + settings=SarvamSTTSettings( + model="saarika:v2.5", + ), ) tts = SarvamHttpTTSService( api_key=os.getenv("SARVAM_API_KEY"), aiohttp_session=session, settings=SarvamHttpTTSSettings( - language=Language.EN, + language=Language.EN_IN, ), ) diff --git a/examples/foundational/12b-describe-image-aws.py b/examples/foundational/12b-describe-image-aws.py index eaa5e3d19..86ec2e66d 100644 --- a/examples/foundational/12b-describe-image-aws.py +++ b/examples/foundational/12b-describe-image-aws.py @@ -61,11 +61,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): llm = AWSBedrockLLMService( aws_region="us-west-2", settings=AWSBedrockLLMSettings( - model="us.anthropic.claude-3-7-sonnet-20250219-v1:0", - # Note: usually, prefer providing latency="optimized" param. - # Here we can't because AWS Bedrock doesn't support it for Claude 3.7, - # which we need for image input. - params=AWSBedrockLLMService.InputParams(temperature=0.8), + model="us.anthropic.claude-sonnet-4-6", + temperature=0.8, system_instruction="You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way. You are also able to describe images.", ), ) diff --git a/examples/foundational/14r-function-calling-aws.py b/examples/foundational/14r-function-calling-aws.py index 3693cba6c..2414e06f0 100644 --- a/examples/foundational/14r-function-calling-aws.py +++ b/examples/foundational/14r-function-calling-aws.py @@ -76,7 +76,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): llm = AWSBedrockLLMService( aws_region="us-west-2", settings=AWSBedrockLLMSettings( - model="us.anthropic.claude-haiku-4-5-20251001-v1:0", + model="us.anthropic.claude-sonnet-4-6", temperature=0.8, system_instruction="You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", ), diff --git a/examples/foundational/15a-switch-languages.py b/examples/foundational/15a-switch-languages.py index c514d9a5d..d1bfc2289 100644 --- a/examples/foundational/15a-switch-languages.py +++ b/examples/foundational/15a-switch-languages.py @@ -7,7 +7,6 @@ import os -from deepgram import LiveOptions from dotenv import load_dotenv from loguru import logger @@ -28,7 +27,7 @@ from pipecat.processors.filters.function_filter import FunctionFilter from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.cartesia.tts import CartesiaTTSService, CartesiaTTSSettings -from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.deepgram.stt import DeepgramSTTService, DeepgramSTTSettings from pipecat.services.llm_service import FunctionCallParams from pipecat.services.openai.llm import OpenAILLMService, OpenAILLMSettings from pipecat.transports.base_transport import BaseTransport, TransportParams @@ -102,7 +101,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info(f"Starting bot") stt = DeepgramSTTService( - api_key=os.getenv("DEEPGRAM_API_KEY"), live_options=LiveOptions(language="multi") + api_key=os.getenv("DEEPGRAM_API_KEY"), + settings=DeepgramSTTSettings( + language="multi", + ), ) tts = SwitchLanguage() diff --git a/examples/foundational/55a-update-settings-deepgram-sagemaker-stt.py b/examples/foundational/55a-update-settings-deepgram-sagemaker-stt.py index 21604829f..30ce506a7 100644 --- a/examples/foundational/55a-update-settings-deepgram-sagemaker-stt.py +++ b/examples/foundational/55a-update-settings-deepgram-sagemaker-stt.py @@ -7,7 +7,6 @@ import asyncio import os -from deepgram import LiveOptions from dotenv import load_dotenv from loguru import logger @@ -114,7 +113,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): STTUpdateSettingsFrame( delta=DeepgramSageMakerSTTSettings( language=Language.ES, - live_options=LiveOptions(punctuate=False), + punctuate=False, ) ) ) diff --git a/examples/foundational/55a-update-settings-deepgram-stt.py b/examples/foundational/55a-update-settings-deepgram-stt.py index c7442549c..100887f80 100644 --- a/examples/foundational/55a-update-settings-deepgram-stt.py +++ b/examples/foundational/55a-update-settings-deepgram-stt.py @@ -7,7 +7,6 @@ import asyncio import os -from deepgram import LiveOptions from dotenv import load_dotenv from loguru import logger @@ -108,7 +107,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): STTUpdateSettingsFrame( delta=DeepgramSTTSettings( language=Language.ES, - live_options=LiveOptions(punctuate=False), + punctuate=False, ) ) ) diff --git a/examples/foundational/55zp-update-settings-aws-bedrock-llm.py b/examples/foundational/55zp-update-settings-aws-bedrock-llm.py index 0ae2e0c57..06a131814 100644 --- a/examples/foundational/55zp-update-settings-aws-bedrock-llm.py +++ b/examples/foundational/55zp-update-settings-aws-bedrock-llm.py @@ -62,7 +62,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): llm = AWSBedrockLLMService( aws_region="us-west-2", settings=AWSBedrockLLMSettings( - model="us.anthropic.claude-haiku-4-5-20251001-v1:0", + model="us.anthropic.claude-sonnet-4-6", temperature=0.8, system_instruction="You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", ), diff --git a/examples/foundational/55zr-update-settings-gradium-stt.py b/examples/foundational/55zr-update-settings-gradium-stt.py index 7b40638a5..9e76d3bec 100644 --- a/examples/foundational/55zr-update-settings-gradium-stt.py +++ b/examples/foundational/55zr-update-settings-gradium-stt.py @@ -104,7 +104,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Gradium STT settings: delay_in_frames=5") - await task.queue_frame(STTUpdateSettingsFrame(delta=GradiumSTTSettings(delay_in_frames=5))) + await task.queue_frame(STTUpdateSettingsFrame(delta=GradiumSTTSettings(delay_in_frames=16))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/scripts/evals/run-release-evals.py b/scripts/evals/run-release-evals.py index e473403b8..625f33564 100644 --- a/scripts/evals/run-release-evals.py +++ b/scripts/evals/run-release-evals.py @@ -146,6 +146,7 @@ TESTS_07 = [ ("07zg-interruptible-camb.py", EVAL_SIMPLE_MATH), ("07zi-interruptible-piper.py", EVAL_SIMPLE_MATH), ("07zj-interruptible-kokoro.py", EVAL_SIMPLE_MATH), + ("07zk-interruptible-resembleai.py", EVAL_SIMPLE_MATH), # Needs a local XTTS docker instance running. # ("07i-interruptible-xtts.py", EVAL_SIMPLE_MATH), ] diff --git a/src/pipecat/services/asyncai/tts.py b/src/pipecat/services/asyncai/tts.py index 56a7d0e82..7bcc1fdb1 100644 --- a/src/pipecat/services/asyncai/tts.py +++ b/src/pipecat/services/asyncai/tts.py @@ -74,7 +74,7 @@ def language_to_async_language(language: Language) -> Optional[str]: @dataclass class AsyncAITTSSettings(TTSSettings): - """Settings for Async AI TTS services.""" + """Settings for AsyncAITTSService and AsyncAIHttpTTSService.""" pass diff --git a/src/pipecat/services/aws/stt.py b/src/pipecat/services/aws/stt.py index 879fa99ab..7355bb1b7 100644 --- a/src/pipecat/services/aws/stt.py +++ b/src/pipecat/services/aws/stt.py @@ -107,7 +107,7 @@ class AWSTranscribeSTTService(WebsocketSTTService): _warn_deprecated_param("language", AWSTranscribeSTTSettings, "language") default_settings.language = self.language_to_service_language(language) - # 3. No params to apply + # 3. (No step 3, as there's no params object to apply) # 4. Apply settings delta (canonical API, always wins) if settings is not None: @@ -158,22 +158,12 @@ class AWSTranscribeSTTService(WebsocketSTTService): return encoding_map.get(encoding, encoding) async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: - """Apply a settings delta. - - Settings are stored but not applied to the active connection. - """ + """Apply a settings delta and reconnect if anything changed.""" changed = await super()._update_settings(delta) - if not changed: - return changed - - # TODO: someday we could reconnect here to apply updated settings. - # Code might look something like the below: - # if changed and self._websocket: - # await self._disconnect() - # await self._connect() - - self._warn_unhandled_updated_settings(changed) + if changed and self._websocket: + await self._disconnect() + await self._connect() return changed diff --git a/src/pipecat/services/aws/tts.py b/src/pipecat/services/aws/tts.py index 285026bca..12eab245c 100644 --- a/src/pipecat/services/aws/tts.py +++ b/src/pipecat/services/aws/tts.py @@ -123,7 +123,7 @@ def language_to_aws_language(language: Language) -> Optional[str]: @dataclass class AWSPollyTTSSettings(TTSSettings): - """Settings for AWS Polly TTS service. + """Settings for AWSPollyTTSService. Parameters: engine: TTS engine to use ('standard', 'neural', etc.). diff --git a/src/pipecat/services/azure/stt.py b/src/pipecat/services/azure/stt.py index c1db76b82..f940ea9c0 100644 --- a/src/pipecat/services/azure/stt.py +++ b/src/pipecat/services/azure/stt.py @@ -112,7 +112,7 @@ class AzureSTTService(STTService): _warn_deprecated_param("language", AzureSTTSettings, "language") default_settings.language = language_to_azure_language(language) - # 3. No params to apply + # 3. (No step 3, as there's no params object to apply) # 4. Apply settings delta (canonical API, always wins) if settings is not None: @@ -159,23 +159,16 @@ class AzureSTTService(STTService): return language_to_azure_language(language) async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: - """Apply a settings delta. - - Settings are stored but not applied to the active recognizer. - """ + """Apply a settings delta and reconnect if language changed.""" changed = await super()._update_settings(delta) - # TODO: someday we could reconnect here to apply updated settings. - # Code might look something like the below: - # if "language" in changed: - # self._speech_config.speech_recognition_language = self._settings.language - # if self._speech_recognizer: - # # Requires refactoring to set up and tear down recognizer, as - # # language is applied at recognizer initialization - # await self._disconnect() - # await self._connect() - - self._warn_unhandled_updated_settings(changed) + if "language" in changed: + self._speech_config.speech_recognition_language = ( + self._settings.language or language_to_azure_language(Language.EN_US) + ) + if self._audio_stream: + await self._disconnect() + await self._connect() return changed @@ -202,14 +195,32 @@ class AzureSTTService(STTService): async def start(self, frame: StartFrame): """Start the speech recognition service. - Initializes the Azure speech recognizer with audio stream configuration - and begins continuous speech recognition. - Args: frame: Frame indicating the start of processing. """ await super().start(frame) + await self._connect() + async def stop(self, frame: EndFrame): + """Stop the speech recognition service. + + Args: + frame: Frame indicating the end of processing. + """ + await super().stop(frame) + await self._disconnect() + + async def cancel(self, frame: CancelFrame): + """Cancel the speech recognition service. + + Args: + frame: Frame indicating cancellation. + """ + await super().cancel(frame) + await self._disconnect() + + async def _connect(self): + """Initialize the Azure speech recognizer and begin continuous recognition.""" if self._audio_stream: return @@ -231,37 +242,15 @@ class AzureSTTService(STTService): error_msg=f"Uncaught exception during initialization: {e}", exception=e ) - async def stop(self, frame: EndFrame): - """Stop the speech recognition service. - - Cleanly shuts down the Azure speech recognizer and closes audio streams. - - Args: - frame: Frame indicating the end of processing. - """ - await super().stop(frame) - - if self._speech_recognizer: - self._speech_recognizer.stop_continuous_recognition_async() - - if self._audio_stream: - self._audio_stream.close() - - async def cancel(self, frame: CancelFrame): - """Cancel the speech recognition service. - - Immediately stops recognition and closes resources. - - Args: - frame: Frame indicating cancellation. - """ - await super().cancel(frame) - + async def _disconnect(self): + """Stop recognition and close audio streams.""" if self._speech_recognizer: self._speech_recognizer.stop_continuous_recognition_async() + self._speech_recognizer = None if self._audio_stream: self._audio_stream.close() + self._audio_stream = None @traced_stt async def _handle_transcription( diff --git a/src/pipecat/services/azure/tts.py b/src/pipecat/services/azure/tts.py index f710482e9..c2884b860 100644 --- a/src/pipecat/services/azure/tts.py +++ b/src/pipecat/services/azure/tts.py @@ -68,7 +68,7 @@ def sample_rate_to_output_format(sample_rate: int) -> SpeechSynthesisOutputForma @dataclass class AzureTTSSettings(TTSSettings): - """Settings for Azure TTS services. + """Settings for AzureTTSService and AzureHttpTTSService. Parameters: emphasis: Emphasis level for speech ("strong", "moderate", "reduced"). diff --git a/src/pipecat/services/camb/tts.py b/src/pipecat/services/camb/tts.py index b30726fda..12b33974e 100644 --- a/src/pipecat/services/camb/tts.py +++ b/src/pipecat/services/camb/tts.py @@ -135,7 +135,7 @@ def _get_aligned_audio(buffer: bytes) -> tuple[bytes, bytes]: @dataclass class CambTTSSettings(TTSSettings): - """Settings for Camb.ai TTS service. + """Settings for CambTTSService. Parameters: user_instructions: Custom instructions for mars-instruct model only. diff --git a/src/pipecat/services/cartesia/tts.py b/src/pipecat/services/cartesia/tts.py index 3f708d06f..c71a84f41 100644 --- a/src/pipecat/services/cartesia/tts.py +++ b/src/pipecat/services/cartesia/tts.py @@ -23,7 +23,6 @@ from pipecat.frames.frames import ( Frame, StartFrame, TTSAudioRawFrame, - TTSStartedFrame, TTSStoppedFrame, ) from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven, _warn_deprecated_param @@ -188,7 +187,7 @@ class CartesiaEmotion(str, Enum): @dataclass class CartesiaTTSSettings(TTSSettings): - """Settings for Cartesia TTS services. + """Settings for CartesiaTTSService and CartesiaHttpTTSService. Parameters: generation_config: Generation configuration for Sonic-3 models. Includes volume, @@ -705,7 +704,7 @@ class CartesiaHttpTTSService(TTSService): voice_id: Optional[str] = None, model: Optional[str] = None, base_url: str = "https://api.cartesia.ai", - cartesia_version: str = "2024-11-13", + cartesia_version: str = "2026-03-01", aiohttp_session: Optional[aiohttp.ClientSession] = None, sample_rate: Optional[int] = None, encoding: str = "pcm_s16le", diff --git a/src/pipecat/services/deepgram/sagemaker/tts.py b/src/pipecat/services/deepgram/sagemaker/tts.py index 9e8c30ad7..fa5b1cebd 100644 --- a/src/pipecat/services/deepgram/sagemaker/tts.py +++ b/src/pipecat/services/deepgram/sagemaker/tts.py @@ -40,7 +40,7 @@ from pipecat.utils.tracing.service_decorators import traced_tts @dataclass class DeepgramSageMakerTTSSettings(TTSSettings): - """Settings for Deepgram SageMaker TTS service.""" + """Settings for DeepgramSageMakerTTSService.""" pass diff --git a/src/pipecat/services/deepgram/stt.py b/src/pipecat/services/deepgram/stt.py index caa6233b3..14020631b 100644 --- a/src/pipecat/services/deepgram/stt.py +++ b/src/pipecat/services/deepgram/stt.py @@ -365,7 +365,9 @@ class DeepgramSTTService(STTService): vad_events=False, ) - # 2. Apply live_options overrides — only if settings not provided + # 2. (No step 2, as there are no deprecated direct args) + + # 3. Apply live_options overrides — only if settings not provided if live_options is not None: _warn_deprecated_param("live_options", DeepgramSTTSettings) if not settings: @@ -402,7 +404,7 @@ class DeepgramSTTService(STTService): delta = DeepgramSTTSettings.from_mapping(lo_dict) default_settings.apply_update(delta) - # 3. Apply settings delta (canonical API, always wins) + # 4. Apply settings delta (canonical API, always wins) if settings is not None: default_settings.apply_update(settings) @@ -494,8 +496,9 @@ class DeepgramSTTService(STTService): if isinstance(self._settings, DeepgramSTTSettings): self._settings._sync_extra_to_fields() - await self._disconnect() - await self._connect() + if self._connection: + await self._disconnect() + await self._connect() return changed @@ -594,13 +597,16 @@ class DeepgramSTTService(STTService): return logger.debug("Disconnecting from Deepgram") - # Ask Deepgram to close the stream gracefully before cancelling the task. - if self._connection: - await self._connection.send_close_stream() + # Clear self._connection first to prevent run_stt from sending audio + # during the close handshake, then close gracefully on the saved ref. + connection = self._connection + self._connection = None + + if connection: + await connection.send_close_stream() await self.cancel_task(self._connection_task) self._connection_task = None - self._connection = None async def _connection_handler(self): """Manages the full WebSocket lifecycle inside a single async with block. diff --git a/src/pipecat/services/deepgram/tts.py b/src/pipecat/services/deepgram/tts.py index 6c8685bee..5d6e5ffdc 100644 --- a/src/pipecat/services/deepgram/tts.py +++ b/src/pipecat/services/deepgram/tts.py @@ -45,7 +45,7 @@ except ModuleNotFoundError as e: @dataclass class DeepgramTTSSettings(TTSSettings): - """Settings for Deepgram TTS service.""" + """Settings for DeepgramTTSService and DeepgramHttpTTSService.""" pass @@ -110,6 +110,8 @@ class DeepgramTTSService(WebsocketTTSService): default_settings.model = voice default_settings.voice = voice + # 3. (No step 3, as there's no params object to apply) + # 4. Apply settings delta (canonical API, always wins) if settings is not None: default_settings.apply_update(settings) @@ -423,6 +425,8 @@ class DeepgramHttpTTSService(TTSService): default_settings.model = voice default_settings.voice = voice + # 3. (No step 3, as there's no params object to apply) + # 4. Apply settings delta (canonical API, always wins) if settings is not None: default_settings.apply_update(settings) diff --git a/src/pipecat/services/elevenlabs/stt.py b/src/pipecat/services/elevenlabs/stt.py index 230e8a368..dff501d22 100644 --- a/src/pipecat/services/elevenlabs/stt.py +++ b/src/pipecat/services/elevenlabs/stt.py @@ -200,18 +200,12 @@ class ElevenLabsRealtimeSTTSettings(STTSettings): vad_threshold: VAD sensitivity (0.1-0.9, lower is more sensitive). min_speech_duration_ms: Minimum speech duration for VAD (50-2000ms). min_silence_duration_ms: Minimum silence duration for VAD (50-2000ms). - include_timestamps: Whether to include word-level timestamps in transcripts. - enable_logging: Whether to enable logging on ElevenLabs' side. - include_language_detection: Whether to include language detection in transcripts. """ vad_silence_threshold_secs: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) vad_threshold: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) min_speech_duration_ms: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) min_silence_duration_ms: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - include_timestamps: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - enable_logging: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - include_language_detection: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class ElevenLabsSTTService(SegmentedSTTService): @@ -496,6 +490,9 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): commit_strategy: CommitStrategy = CommitStrategy.MANUAL, model: Optional[str] = None, sample_rate: Optional[int] = None, + include_timestamps: bool = False, + enable_logging: bool = False, + include_language_detection: bool = False, params: Optional[InputParams] = None, settings: Optional[ElevenLabsRealtimeSTTSettings] = None, ttfs_p99_latency: Optional[float] = ELEVENLABS_REALTIME_TTFS_P99, @@ -515,6 +512,9 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): Use ``settings=ElevenLabsRealtimeSTTSettings(model=...)`` instead. sample_rate: Audio sample rate in Hz. If not provided, uses the pipeline's rate. + include_timestamps: Whether to include word-level timestamps in transcripts. + enable_logging: Whether to enable logging on ElevenLabs' side. + include_language_detection: Whether to include language detection in transcripts. params: Configuration parameters for the STT service. .. deprecated:: 0.0.105 @@ -534,9 +534,6 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): vad_threshold=None, min_speech_duration_ms=None, min_silence_duration_ms=None, - include_timestamps=False, - enable_logging=False, - include_language_detection=False, ) # 2. Apply direct init arg overrides (deprecated) @@ -555,9 +552,9 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): default_settings.vad_threshold = params.vad_threshold default_settings.min_speech_duration_ms = params.min_speech_duration_ms default_settings.min_silence_duration_ms = params.min_silence_duration_ms - default_settings.include_timestamps = params.include_timestamps - default_settings.enable_logging = params.enable_logging - default_settings.include_language_detection = params.include_language_detection + include_timestamps = params.include_timestamps + enable_logging = params.enable_logging + include_language_detection = params.include_language_detection # 4. Apply settings delta (canonical API, always wins) if settings is not None: @@ -579,6 +576,9 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): # Init-only config (not runtime-updatable). self._commit_strategy = commit_strategy + self._include_timestamps = include_timestamps + self._enable_logging = enable_logging + self._include_language_detection = include_language_detection self._connected_event = asyncio.Event() self._connected_event.set() @@ -605,8 +605,9 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): if not changed: return changed - await self._disconnect() - await self._connect() + if self._websocket: + await self._disconnect() + await self._connect() return changed @@ -762,17 +763,15 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): params.append(f"commit_strategy={self._commit_strategy.value}") # Add optional parameters - if self._settings.include_timestamps: - params.append( - f"include_timestamps={str(self._settings.include_timestamps).lower()}" - ) + if self._include_timestamps: + params.append(f"include_timestamps={str(self._include_timestamps).lower()}") - if self._settings.enable_logging: - params.append(f"enable_logging={str(self._settings.enable_logging).lower()}") + if self._enable_logging: + params.append(f"enable_logging={str(self._enable_logging).lower()}") - if self._settings.include_language_detection: + if self._include_language_detection: params.append( - f"include_language_detection={str(self._settings.include_language_detection).lower()}" + f"include_language_detection={str(self._include_language_detection).lower()}" ) # Add VAD parameters if using VAD commit strategy and values are specified @@ -920,7 +919,7 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): """ # If timestamps are enabled, skip this message and wait for the # committed_transcript_with_timestamps message which contains all the data - if self._settings.include_timestamps: + if self._include_timestamps: return text = data.get("text", "").strip() diff --git a/src/pipecat/services/elevenlabs/tts.py b/src/pipecat/services/elevenlabs/tts.py index de930d1f2..ae413fd3b 100644 --- a/src/pipecat/services/elevenlabs/tts.py +++ b/src/pipecat/services/elevenlabs/tts.py @@ -187,7 +187,7 @@ class PronunciationDictionaryLocator(BaseModel): @dataclass class ElevenLabsTTSSettings(TTSSettings): - """Settings for the ElevenLabs WebSocket TTS service. + """Settings for ElevenLabsTTSService. Fields that appear in the WebSocket URL (``voice``, ``model``, ``language``) require a full reconnect when changed. Fields that @@ -225,7 +225,7 @@ class ElevenLabsTTSSettings(TTSSettings): @dataclass class ElevenLabsHttpTTSSettings(TTSSettings): - """Settings for the ElevenLabs HTTP TTS service. + """Settings for ElevenLabsHttpTTSService. Parameters: optimize_streaming_latency: Latency optimization level (0-4). @@ -358,6 +358,9 @@ class ElevenLabsTTSService(WebsocketTTSService): model: Optional[str] = None, url: str = "wss://api.elevenlabs.io", sample_rate: Optional[int] = None, + auto_mode: bool = True, + enable_ssml_parsing: Optional[bool] = None, + enable_logging: Optional[bool] = None, pronunciation_dictionary_locators: Optional[List[PronunciationDictionaryLocator]] = None, params: Optional[InputParams] = None, settings: Optional[ElevenLabsTTSSettings] = None, @@ -381,6 +384,9 @@ class ElevenLabsTTSService(WebsocketTTSService): url: WebSocket URL for ElevenLabs TTS API. sample_rate: Audio sample rate. If None, uses default. + auto_mode: Whether to enable automatic mode optimization. + enable_ssml_parsing: Whether to parse SSML tags in text. + enable_logging: Whether to enable ElevenLabs server-side logging. pronunciation_dictionary_locators: List of pronunciation dictionary locators to use. params: Additional input parameters for voice customization. @@ -428,11 +434,6 @@ class ElevenLabsTTSService(WebsocketTTSService): apply_text_normalization=None, ) - # Track init-only URL params through the override chain - _auto_mode = True - _enable_ssml_parsing = None - _enable_logging = None - # 2. Apply direct init arg overrides (deprecated) if voice_id is not None: _warn_deprecated_param("voice_id", ElevenLabsTTSSettings, "voice") @@ -459,11 +460,11 @@ class ElevenLabsTTSService(WebsocketTTSService): if params.speed is not None: default_settings.speed = params.speed if params.auto_mode is not None: - _auto_mode = str(params.auto_mode).lower() + auto_mode = params.auto_mode if params.enable_ssml_parsing is not None: - _enable_ssml_parsing = params.enable_ssml_parsing + enable_ssml_parsing = params.enable_ssml_parsing if params.enable_logging is not None: - _enable_logging = params.enable_logging + enable_logging = params.enable_logging if params.apply_text_normalization is not None: default_settings.apply_text_normalization = params.apply_text_normalization if _pronunciation_dictionary_locators is None: @@ -488,9 +489,9 @@ class ElevenLabsTTSService(WebsocketTTSService): self._url = url # Init-only WebSocket URL params (not runtime-updatable). - self._auto_mode = _auto_mode - self._enable_ssml_parsing = _enable_ssml_parsing - self._enable_logging = _enable_logging + self._auto_mode = auto_mode + self._enable_ssml_parsing = enable_ssml_parsing + self._enable_logging = enable_logging self._output_format = "" # initialized in start() self._voice_settings = self._set_voice_settings() @@ -664,7 +665,7 @@ class ElevenLabsTTSService(WebsocketTTSService): voice_id = self._settings.voice model = self._settings.model output_format = self._output_format - url = f"{self._url}/v1/text-to-speech/{voice_id}/multi-stream-input?model_id={model}&output_format={output_format}&auto_mode={self._auto_mode}" + url = f"{self._url}/v1/text-to-speech/{voice_id}/multi-stream-input?model_id={model}&output_format={output_format}&auto_mode={str(self._auto_mode).lower()}" if self._enable_ssml_parsing: url += f"&enable_ssml_parsing={self._enable_ssml_parsing}" diff --git a/src/pipecat/services/fish/tts.py b/src/pipecat/services/fish/tts.py index 64c3bccd9..88e0cc8dd 100644 --- a/src/pipecat/services/fish/tts.py +++ b/src/pipecat/services/fish/tts.py @@ -10,9 +10,8 @@ This module provides integration with Fish Audio's real-time TTS WebSocket API for streaming text-to-speech synthesis with customizable voice parameters. """ -import uuid from dataclasses import dataclass, field -from typing import Any, AsyncGenerator, ClassVar, Dict, Literal, Mapping, Optional, Self +from typing import Any, AsyncGenerator, Literal, Mapping, Optional, Self from loguru import logger from pydantic import BaseModel @@ -25,7 +24,6 @@ from pipecat.frames.frames import ( InterruptionFrame, StartFrame, TTSAudioRawFrame, - TTSStartedFrame, TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection @@ -49,21 +47,23 @@ FishAudioOutputFormat = Literal["opus", "mp3", "pcm", "wav"] @dataclass class FishAudioTTSSettings(TTSSettings): - """Settings for Fish Audio TTS service. + """Settings for FishAudioTTSService. Parameters: - latency: Latency mode ("normal" or "balanced"). Defaults to "normal". + latency: Latency mode ("normal" or "balanced"). Defaults to "balanced". normalize: Whether to normalize audio output. Defaults to True. + temperature: Controls randomness in speech generation (0.0-1.0). + top_p: Controls diversity via nucleus sampling (0.0-1.0). prosody_speed: Speech speed multiplier (0.5-2.0). Defaults to 1.0. - prosody_volume: Volume adjustment in dB. Defaults to 0. - reference_id: Reference ID of the voice model. + prosody_volume: Volume adjustment in dB (-20 to 20). Defaults to 0. """ latency: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) normalize: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + temperature: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + top_p: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) prosody_speed: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) prosody_volume: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - reference_id: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) @classmethod def from_mapping(cls, settings: Mapping[str, Any]) -> Self: @@ -174,18 +174,18 @@ class FishAudioTTSService(InterruptibleTTSService): model="s1", voice=None, language=None, - latency="normal", + latency="balanced", normalize=True, + temperature=None, + top_p=None, prosody_speed=1.0, prosody_volume=0, - reference_id=None, ) # 2. Apply direct init arg overrides (deprecated) if reference_id is not None: _warn_deprecated_param("reference_id", FishAudioTTSSettings, "voice") default_settings.voice = reference_id - default_settings.reference_id = reference_id if model_id is not None: _warn_deprecated_param("model_id", FishAudioTTSSettings, "model") default_settings.model = model_id @@ -317,8 +317,12 @@ class FishAudioTTSService(InterruptibleTTSService): "speed": self._settings.prosody_speed, "volume": self._settings.prosody_volume, }, - "reference_id": self._settings.reference_id, + "reference_id": self._settings.voice, } + if self._settings.temperature is not None: + request_settings["temperature"] = self._settings.temperature + if self._settings.top_p is not None: + request_settings["top_p"] = self._settings.top_p start_message = {"event": "start", "request": {"text": "", **request_settings}} await self._websocket.send(ormsgpack.packb(start_message)) logger.debug("Sent start message to Fish Audio") @@ -375,7 +379,14 @@ class FishAudioTTSService(InterruptibleTTSService): frame = TTSAudioRawFrame(audio_data, self.sample_rate, 1) await self.push_frame(frame) await self.stop_ttfb_metrics() - continue + elif event == "finish": + reason = msg.get("reason", "unknown") + if reason == "error": + await self.push_error( + error_msg="Fish Audio server error during synthesis" + ) + else: + logger.debug(f"Fish Audio session finished: {reason}") except Exception as e: await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e) diff --git a/src/pipecat/services/gradium/stt.py b/src/pipecat/services/gradium/stt.py index 2a912c355..814d478e4 100644 --- a/src/pipecat/services/gradium/stt.py +++ b/src/pipecat/services/gradium/stt.py @@ -12,7 +12,7 @@ WebSocket API for streaming audio transcription. import base64 import json -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Any, AsyncGenerator, Optional from loguru import logger @@ -28,7 +28,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import STTSettings, _warn_deprecated_param +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven, _warn_deprecated_param from pipecat.services.stt_latency import GRADIUM_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.transcriptions.language import Language, resolve_language @@ -68,9 +68,16 @@ def language_to_gradium_language(language: Language) -> Optional[str]: @dataclass class GradiumSTTSettings(STTSettings): - """Settings for GradiumSTTService.""" + """Settings for GradiumSTTService. - pass + Parameters: + delay_in_frames: Delay in audio frames (80ms each) before text is + generated. Higher delays allow more context but increase latency. + Allowed values: 7, 8, 10, 12, 14, 16, 20, 24, 36, 48. + Default is 10 (800ms). Lower values like 7-8 give faster response. + """ + + delay_in_frames: Optional[int] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class GradiumSTTService(WebsocketSTTService): @@ -107,7 +114,6 @@ class GradiumSTTService(WebsocketSTTService): *, api_key: str, api_endpoint_base_url: str = "wss://eu.api.gradium.ai/api/speech/asr", - delay_in_frames: Optional[int] = None, params: Optional[InputParams] = None, json_config: Optional[str] = None, settings: Optional[GradiumSTTSettings] = None, @@ -119,9 +125,6 @@ class GradiumSTTService(WebsocketSTTService): Args: api_key: Gradium API key for authentication. api_endpoint_base_url: WebSocket endpoint URL. Defaults to Gradium's streaming endpoint. - delay_in_frames: Delay in audio frames (80ms each) before text is - generated. Higher delays allow more context but increase latency. - Allowed values: 7, 8, 10, 12, 14, 16, 20, 24, 36, 48. params: Configuration parameters for language and delay settings. .. deprecated:: 0.0.105 @@ -151,9 +154,10 @@ class GradiumSTTService(WebsocketSTTService): default_settings = GradiumSTTSettings( model=None, language=None, + delay_in_frames=None, ) - # 2. (no deprecated direct args for this service) + # 2. (No step 2, as there are no deprecated direct args) # 3. Apply params overrides — only if settings not provided if params is not None: @@ -161,7 +165,7 @@ class GradiumSTTService(WebsocketSTTService): if not settings: default_settings.language = params.language if params.delay_in_frames is not None: - delay_in_frames = params.delay_in_frames + default_settings.delay_in_frames = params.delay_in_frames # 4. Apply settings delta (canonical API, always wins) if settings is not None: @@ -178,7 +182,6 @@ class GradiumSTTService(WebsocketSTTService): self._api_endpoint_base_url = api_endpoint_base_url self._websocket = None self._json_config = json_config - self._config_delay_in_frames = delay_in_frames self._receive_task = None @@ -212,8 +215,9 @@ class GradiumSTTService(WebsocketSTTService): if not changed: return changed - await self._disconnect() - await self._connect() + if self._websocket: + await self._disconnect() + await self._connect() return changed async def start(self, frame: StartFrame): @@ -358,8 +362,8 @@ class GradiumSTTService(WebsocketSTTService): gradium_language = language_to_gradium_language(self._settings.language) if gradium_language: json_config["language"] = gradium_language - if self._config_delay_in_frames: - json_config["delay_in_frames"] = self._config_delay_in_frames + if self._settings.delay_in_frames: + json_config["delay_in_frames"] = self._settings.delay_in_frames if json_config: setup_msg["json_config"] = json_config await self._websocket.send(json.dumps(setup_msg)) diff --git a/src/pipecat/services/gradium/tts.py b/src/pipecat/services/gradium/tts.py index 745a77f56..3dd663185 100644 --- a/src/pipecat/services/gradium/tts.py +++ b/src/pipecat/services/gradium/tts.py @@ -39,7 +39,7 @@ SAMPLE_RATE = 48000 @dataclass class GradiumTTSSettings(TTSSettings): - """Settings for the Gradium TTS service.""" + """Settings for GradiumTTSService.""" pass diff --git a/src/pipecat/services/groq/tts.py b/src/pipecat/services/groq/tts.py index 139816834..4e56312e5 100644 --- a/src/pipecat/services/groq/tts.py +++ b/src/pipecat/services/groq/tts.py @@ -34,7 +34,7 @@ except ModuleNotFoundError as e: @dataclass class GroqTTSSettings(TTSSettings): - """Settings for the Groq TTS service. + """Settings for GroqTTSService. Parameters: speed: Speech speed multiplier. Defaults to 1.0. diff --git a/src/pipecat/services/hume/tts.py b/src/pipecat/services/hume/tts.py index ff5eb7522..135806da7 100644 --- a/src/pipecat/services/hume/tts.py +++ b/src/pipecat/services/hume/tts.py @@ -50,7 +50,7 @@ DEFAULT_HEADERS = { @dataclass class HumeTTSSettings(TTSSettings): - """Settings for Hume TTS service. + """Settings for HumeTTSService. Parameters: description: Natural-language acting directions (up to 100 characters). diff --git a/src/pipecat/services/inworld/tts.py b/src/pipecat/services/inworld/tts.py index d602efb52..17a0e8040 100644 --- a/src/pipecat/services/inworld/tts.py +++ b/src/pipecat/services/inworld/tts.py @@ -68,32 +68,19 @@ from pipecat.utils.tracing.service_decorators import traced_tts @dataclass class InworldTTSSettings(TTSSettings): - """Settings for Inworld TTS services. + """Settings for InworldTTSService and InworldHttpTTSService. Parameters: speaking_rate: Speaking rate for speech synthesis. temperature: Temperature for speech synthesis. - auto_mode: Whether to use auto mode. Recommended when texts are sent - in full sentences/phrases. When enabled, the server controls - flushing of buffered text to achieve minimal latency while - maintaining high quality audio output. If None (default), - automatically set based on aggregate_sentences. - apply_text_normalization: Whether to apply text normalization. - timestamp_transport_strategy: Strategy for timestamp transport ("ASYNC" or "SYNC"). """ speaking_rate: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) temperature: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - auto_mode: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - apply_text_normalization: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - timestamp_transport_strategy: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) _aliases: ClassVar[Dict[str, str]] = { "voiceId": "voice", "modelId": "model", - "applyTextNormalization": "apply_text_normalization", - "autoMode": "auto_mode", - "timestampTransportStrategy": "timestamp_transport_strategy", } @classmethod @@ -141,6 +128,7 @@ class InworldHttpTTSService(TTSService): streaming: bool = True, sample_rate: Optional[int] = None, encoding: str = "LINEAR16", + timestamp_transport_strategy: Optional[Literal["ASYNC", "SYNC"]] = "ASYNC", params: Optional[InputParams] = None, settings: Optional[InworldTTSSettings] = None, **kwargs, @@ -163,6 +151,8 @@ class InworldHttpTTSService(TTSService): streaming: Whether to use streaming mode. sample_rate: Audio sample rate in Hz. encoding: Audio encoding format. + timestamp_transport_strategy: Strategy for timestamp transport + ("ASYNC" or "SYNC"). Defaults to "ASYNC". params: Input parameters for Inworld TTS configuration. .. deprecated:: 0.0.105 @@ -179,9 +169,6 @@ class InworldHttpTTSService(TTSService): language=None, speaking_rate=None, temperature=None, - timestamp_transport_strategy="ASYNC", - auto_mode=None, # Not applicable for HTTP TTS - apply_text_normalization=None, # Not applicable for HTTP TTS ) # 2. Apply direct init arg overrides (deprecated) @@ -201,9 +188,7 @@ class InworldHttpTTSService(TTSService): if params.temperature is not None: default_settings.temperature = params.temperature if params.timestamp_transport_strategy is not None: - default_settings.timestamp_transport_strategy = ( - params.timestamp_transport_strategy - ) + timestamp_transport_strategy = params.timestamp_transport_strategy # 4. Apply settings delta (canonical API, always wins) if settings is not None: @@ -230,9 +215,10 @@ class InworldHttpTTSService(TTSService): self._cumulative_time = 0.0 - # Init-only audio format config (not runtime-updatable). + # Init-only config (not runtime-updatable). self._audio_encoding = encoding self._audio_sample_rate = 0 # Set in start() + self._timestamp_transport_strategy = timestamp_transport_strategy def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -251,22 +237,6 @@ class InworldHttpTTSService(TTSService): await super().start(frame) self._audio_sample_rate = self.sample_rate - async def stop(self, frame: EndFrame): - """Stop the Inworld TTS service. - - Args: - frame: The end frame. - """ - await super().stop(frame) - - async def cancel(self, frame: CancelFrame): - """Cancel the Inworld TTS service. - - Args: - frame: The cancel frame. - """ - await super().cancel(frame) - async def push_frame(self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM): """Push a frame and handle state changes. @@ -347,8 +317,8 @@ class InworldHttpTTSService(TTSService): # Use WORD timestamps for simplicity and correct spacing/capitalization payload["timestampType"] = self._timestamp_type - if self._settings.timestamp_transport_strategy is not None: - payload["timestampTransportStrategy"] = self._settings.timestamp_transport_strategy + if self._timestamp_transport_strategy is not None: + payload["timestampTransportStrategy"] = self._timestamp_transport_strategy request_id = str(uuid.uuid4()) headers = { @@ -556,6 +526,9 @@ class InworldTTSService(WebsocketTTSService): url: str = "wss://api.inworld.ai/tts/v1/voice:streamBidirectional", sample_rate: Optional[int] = None, encoding: str = "LINEAR16", + auto_mode: Optional[bool] = None, + apply_text_normalization: Optional[str] = None, + timestamp_transport_strategy: Optional[Literal["ASYNC", "SYNC"]] = "ASYNC", params: Optional[InputParams] = None, settings: Optional[InworldTTSSettings] = None, aggregate_sentences: Optional[bool] = None, @@ -580,6 +553,12 @@ class InworldTTSService(WebsocketTTSService): url: URL of the Inworld WebSocket API. sample_rate: Audio sample rate in Hz. encoding: Audio encoding format. + auto_mode: Whether to use auto mode. When enabled, the server + controls flushing of buffered text. If None (default), + automatically set based on ``aggregate_sentences``. + apply_text_normalization: Whether to apply text normalization. + timestamp_transport_strategy: Strategy for timestamp transport + ("ASYNC" or "SYNC"). Defaults to "ASYNC". params: Input parameters for Inworld WebSocket TTS configuration. .. deprecated:: 0.0.105 @@ -596,6 +575,10 @@ class InworldTTSService(WebsocketTTSService): append_trailing_space: Whether to append a trailing space to text before sending to TTS. **kwargs: Additional arguments passed to the parent class. """ + # Derive auto_mode from aggregate_sentences if not explicitly set + if auto_mode is None: + auto_mode = True if aggregate_sentences is None else aggregate_sentences + # 1. Initialize default_settings with hardcoded defaults default_settings = InworldTTSSettings( model="inworld-tts-1.5-max", @@ -603,9 +586,6 @@ class InworldTTSService(WebsocketTTSService): language=None, speaking_rate=None, temperature=None, - apply_text_normalization=None, - timestamp_transport_strategy="ASYNC", - auto_mode=True if aggregate_sentences is None else aggregate_sentences, ) # 2. Apply direct init arg overrides (deprecated) @@ -627,13 +607,11 @@ class InworldTTSService(WebsocketTTSService): if params.temperature is not None: default_settings.temperature = params.temperature if params.apply_text_normalization is not None: - default_settings.apply_text_normalization = params.apply_text_normalization + apply_text_normalization = params.apply_text_normalization if params.timestamp_transport_strategy is not None: - default_settings.timestamp_transport_strategy = ( - params.timestamp_transport_strategy - ) + timestamp_transport_strategy = params.timestamp_transport_strategy if params.auto_mode is not None: - default_settings.auto_mode = params.auto_mode + auto_mode = params.auto_mode _buffer_max_delay_ms = params.max_buffer_delay_ms _buffer_char_threshold = params.buffer_char_threshold @@ -673,9 +651,12 @@ class InworldTTSService(WebsocketTTSService): # Track the end time of the last word in the current generation self._generation_end_time = 0.0 - # Init-only audio format config (not runtime-updatable). + # Init-only config (not runtime-updatable). self._audio_encoding = encoding self._audio_sample_rate = 0 # Set in start() + self._auto_mode = auto_mode + self._apply_text_normalization = apply_text_normalization + self._timestamp_transport_strategy = timestamp_transport_strategy def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -926,7 +907,7 @@ class InworldTTSService(WebsocketTTSService): for k in ["contextCreated", "audioChunk", "flushCompleted", "contextClosed"] if k in result ] - logger.debug(f"{self}: Received message types={msg_types}, ctx_id={ctx_id}") + logger.trace(f"{self}: Received message types={msg_types}, ctx_id={ctx_id}") # Check for errors status = result.get("status", {}) @@ -1036,14 +1017,12 @@ class InworldTTSService(WebsocketTTSService): if self._settings.temperature is not None: create_config["temperature"] = self._settings.temperature - if self._settings.apply_text_normalization is not None: - create_config["applyTextNormalization"] = self._settings.apply_text_normalization - if self._settings.auto_mode is not None: - create_config["autoMode"] = self._settings.auto_mode - if self._settings.timestamp_transport_strategy is not None: - create_config["timestampTransportStrategy"] = ( - self._settings.timestamp_transport_strategy - ) + if self._apply_text_normalization is not None: + create_config["applyTextNormalization"] = self._apply_text_normalization + if self._auto_mode is not None: + create_config["autoMode"] = self._auto_mode + if self._timestamp_transport_strategy is not None: + create_config["timestampTransportStrategy"] = self._timestamp_transport_strategy # Set buffer settings for timely audio generation. # Use provided values or defaults that work well for streaming LLM output. diff --git a/src/pipecat/services/kokoro/tts.py b/src/pipecat/services/kokoro/tts.py index e69ef7a67..bfc39daed 100644 --- a/src/pipecat/services/kokoro/tts.py +++ b/src/pipecat/services/kokoro/tts.py @@ -89,7 +89,7 @@ def language_to_kokoro_language(language: Language) -> str: @dataclass class KokoroTTSSettings(TTSSettings): - """Settings for the Kokoro TTS service.""" + """Settings for KokoroTTSService.""" pass diff --git a/src/pipecat/services/lmnt/tts.py b/src/pipecat/services/lmnt/tts.py index c8bfcaf55..f6bf46649 100644 --- a/src/pipecat/services/lmnt/tts.py +++ b/src/pipecat/services/lmnt/tts.py @@ -19,7 +19,6 @@ from pipecat.frames.frames import ( Frame, StartFrame, TTSAudioRawFrame, - TTSStartedFrame, TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection @@ -48,6 +47,7 @@ def language_to_lmnt_language(language: Language) -> Optional[str]: The corresponding LMNT language code, or None if not supported. """ LANGUAGE_MAP = { + Language.AR: "ar", Language.DE: "de", Language.EN: "en", Language.ES: "es", @@ -65,6 +65,7 @@ def language_to_lmnt_language(language: Language) -> Optional[str]: Language.TH: "th", Language.TR: "tr", Language.UK: "uk", + Language.UR: "ur", Language.VI: "vi", Language.ZH: "zh", } @@ -74,7 +75,7 @@ def language_to_lmnt_language(language: Language) -> Optional[str]: @dataclass class LmntTTSSettings(TTSSettings): - """Settings for LMNT TTS service.""" + """Settings for LmntTTSService.""" pass @@ -96,6 +97,7 @@ class LmntTTSService(InterruptibleTTSService): voice_id: Optional[str] = None, sample_rate: Optional[int] = None, language: Language = Language.EN, + output_format: str = "pcm_s16le", model: Optional[str] = None, settings: Optional[LmntTTSSettings] = None, **kwargs, @@ -111,6 +113,8 @@ class LmntTTSService(InterruptibleTTSService): sample_rate: Audio sample rate. If None, uses default. language: Language for synthesis. Defaults to English. + output_format: Audio output format. One of "pcm_s16le", "pcm_f32le", + "mp3", "ulaw", "webm". Defaults to "pcm_s16le". model: TTS model to use. .. deprecated:: 0.0.105 @@ -122,7 +126,7 @@ class LmntTTSService(InterruptibleTTSService): """ # 1. Initialize default_settings with hardcoded defaults default_settings = LmntTTSSettings( - model="blizzard", + model="aurora", voice=None, language=self.language_to_service_language(language), ) @@ -135,7 +139,7 @@ class LmntTTSService(InterruptibleTTSService): _warn_deprecated_param("model", LmntTTSSettings, "model") default_settings.model = model - # 3. No params for this service + # 3. (No step 3, as there's no params object to apply) # 4. Apply settings delta (canonical API, always wins) if settings is not None: @@ -151,7 +155,7 @@ class LmntTTSService(InterruptibleTTSService): ) self._api_key = api_key - self._output_format = "raw" + self._output_format = output_format self._receive_task = None def can_generate_metrics(self) -> bool: diff --git a/src/pipecat/services/minimax/tts.py b/src/pipecat/services/minimax/tts.py index 33e0669e1..1edd8bf78 100644 --- a/src/pipecat/services/minimax/tts.py +++ b/src/pipecat/services/minimax/tts.py @@ -87,10 +87,9 @@ def language_to_minimax_language(language: Language) -> Optional[str]: @dataclass class MiniMaxTTSSettings(TTSSettings): - """Settings for MiniMax TTS service. + """Settings for MiniMaxHttpTTSService. Parameters: - stream: Whether to use streaming mode. speed: Speech speed (range: 0.5 to 2.0). volume: Speech volume (range: 0 to 10). pitch: Pitch adjustment (range: -12 to 12). @@ -101,7 +100,6 @@ class MiniMaxTTSSettings(TTSSettings): language_boost: Language boost string for multilingual support. """ - stream: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) speed: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) volume: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) pitch: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) @@ -189,6 +187,7 @@ class MiniMaxHttpTTSService(TTSService): voice_id: Optional[str] = None, aiohttp_session: aiohttp.ClientSession, sample_rate: Optional[int] = None, + stream: bool = True, params: Optional[InputParams] = None, settings: Optional[MiniMaxTTSSettings] = None, **kwargs, @@ -217,6 +216,7 @@ class MiniMaxHttpTTSService(TTSService): aiohttp_session: aiohttp.ClientSession for API communication. sample_rate: Output audio sample rate in Hz. If None, uses pipeline default. + stream: Whether to use streaming mode. Defaults to True. params: Additional configuration parameters. .. deprecated:: 0.0.105 @@ -231,7 +231,6 @@ class MiniMaxHttpTTSService(TTSService): model="speech-02-turbo", voice="Calm_Woman", language=None, - stream=True, speed=1.0, volume=1.0, pitch=0, @@ -311,6 +310,7 @@ class MiniMaxHttpTTSService(TTSService): self._api_key = api_key self._group_id = group_id + self._stream = stream self._base_url = f"{base_url}?GroupId={group_id}" self._session = aiohttp_session @@ -392,7 +392,7 @@ class MiniMaxHttpTTSService(TTSService): # Create payload from settings payload = { - "stream": self._settings.stream, + "stream": self._stream, "voice_setting": voice_setting, "audio_setting": audio_setting, "model": self._settings.model, diff --git a/src/pipecat/services/neuphonic/tts.py b/src/pipecat/services/neuphonic/tts.py index c1916414e..441d222ea 100644 --- a/src/pipecat/services/neuphonic/tts.py +++ b/src/pipecat/services/neuphonic/tts.py @@ -26,12 +26,10 @@ from pipecat.frames.frames import ( EndFrame, ErrorFrame, Frame, - InterruptionFrame, LLMFullResponseEndFrame, StartFrame, TTSAudioRawFrame, TTSSpeakFrame, - TTSStartedFrame, TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection @@ -76,7 +74,7 @@ def language_to_neuphonic_lang_code(language: Language) -> Optional[str]: @dataclass class NeuphonicTTSSettings(TTSSettings): - """Settings for Neuphonic TTS service. + """Settings for NeuphonicTTSService and NeuphonicHttpTTSService. Parameters: speed: Speech speed multiplier. Defaults to 1.0. @@ -487,7 +485,7 @@ class NeuphonicHttpTTSService(TTSService): default_settings = NeuphonicTTSSettings( model=None, voice=None, - language=self.language_to_service_language(Language.EN) or "en", + language=self.language_to_service_language(Language.EN), speed=1.0, ) @@ -501,9 +499,7 @@ class NeuphonicHttpTTSService(TTSService): _warn_deprecated_param("params", NeuphonicTTSSettings) if not settings: if params.language is not None: - default_settings.language = ( - self.language_to_service_language(params.language) or "en" - ) + default_settings.language = self.language_to_service_language(params.language) if params.speed is not None: default_settings.speed = params.speed diff --git a/src/pipecat/services/nvidia/tts.py b/src/pipecat/services/nvidia/tts.py index 7f7638f5f..fb701185c 100644 --- a/src/pipecat/services/nvidia/tts.py +++ b/src/pipecat/services/nvidia/tts.py @@ -44,7 +44,7 @@ except ModuleNotFoundError as e: @dataclass class NvidiaTTSSettings(TTSSettings): - """Settings for NVIDIA Riva TTS service. + """Settings for NvidiaTTSService. Parameters: quality: Audio quality setting (0-100). diff --git a/src/pipecat/services/openai/tts.py b/src/pipecat/services/openai/tts.py index a50129349..71c04c59f 100644 --- a/src/pipecat/services/openai/tts.py +++ b/src/pipecat/services/openai/tts.py @@ -62,7 +62,7 @@ VALID_VOICES: Dict[str, ValidVoice] = { @dataclass class OpenAITTSSettings(TTSSettings): - """Settings for OpenAI TTS service. + """Settings for OpenAITTSService. Parameters: instructions: Instructions to guide voice synthesis behavior. diff --git a/src/pipecat/services/piper/tts.py b/src/pipecat/services/piper/tts.py index f0343947b..fb7b627cd 100644 --- a/src/pipecat/services/piper/tts.py +++ b/src/pipecat/services/piper/tts.py @@ -33,7 +33,7 @@ except ModuleNotFoundError as e: @dataclass class PiperTTSSettings(TTSSettings): - """Settings for Piper TTS service.""" + """Settings for PiperTTSService.""" pass @@ -82,7 +82,7 @@ class PiperTTSService(TTSService): _warn_deprecated_param("voice_id", PiperTTSSettings, "voice") default_settings.voice = voice_id - # 3. No params for this service + # 3. (No step 3, as there's no params object to apply) # 4. Apply settings delta (canonical API, always wins) if settings is not None: @@ -186,7 +186,7 @@ class PiperTTSService(TTSService): # @dataclass class PiperHttpTTSSettings(TTSSettings): - """Settings for Piper HTTP TTS service.""" + """Settings for PiperHttpTTSService.""" pass @@ -232,7 +232,7 @@ class PiperHttpTTSService(TTSService): _warn_deprecated_param("voice_id", PiperHttpTTSSettings, "voice") default_settings.voice = voice_id - # 3. No params for this service + # 3. (No step 3, as there's no params object to apply) # 4. Apply settings delta (canonical API, always wins) if settings is not None: diff --git a/src/pipecat/services/resembleai/tts.py b/src/pipecat/services/resembleai/tts.py index 45f8fc229..9713cea44 100644 --- a/src/pipecat/services/resembleai/tts.py +++ b/src/pipecat/services/resembleai/tts.py @@ -38,7 +38,7 @@ except ModuleNotFoundError as e: @dataclass class ResembleAITTSSettings(TTSSettings): - """Settings for Resemble AI TTS service.""" + """Settings for ResembleAITTSService.""" pass @@ -94,7 +94,7 @@ class ResembleAITTSService(WebsocketTTSService): _warn_deprecated_param("voice_id", ResembleAITTSSettings, "voice") default_settings.voice = voice_id - # 3. No params for this service + # 3. (No step 3, as there's no params object to apply) # 4. Apply settings delta (canonical API, always wins) if settings is not None: diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index 27580504b..8a9186c59 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -73,7 +73,7 @@ def language_to_rime_language(language: Language) -> str: @dataclass class RimeTTSSettings(TTSSettings): - """Settings for Rime WS JSON and HTTP TTS services. + """Settings for RimeTTSService and RimeHttpTTSService. Parameters: segment: Text segmentation mode ("immediate", "bySentence", "never"). @@ -106,7 +106,7 @@ class RimeTTSSettings(TTSSettings): @dataclass class RimeNonJsonTTSSettings(TTSSettings): - """Settings for Rime non-JSON WS TTS service. + """Settings for RimeNonJsonTTSService. Parameters: segment: Text segmentation mode ("immediate", "bySentence", "never"). diff --git a/src/pipecat/services/sarvam/stt.py b/src/pipecat/services/sarvam/stt.py index 3e4136c41..cd6d19ac0 100644 --- a/src/pipecat/services/sarvam/stt.py +++ b/src/pipecat/services/sarvam/stt.py @@ -400,12 +400,13 @@ class SarvamSTTService(STTService): changed = await super()._update_settings(delta) - # Prompt is a WebSocket connect-time parameter; reconnect to apply. - if "prompt" in changed: + # Language and prompt are WebSocket connect-time parameters; reconnect to apply. + reconnect_fields = {"language", "prompt"} + if changed.keys() & reconnect_fields: await self._disconnect() await self._connect() - unhandled = {k: v for k, v in changed.items() if k != "prompt"} + unhandled = {k: v for k, v in changed.items() if k not in reconnect_fields} if unhandled: self._warn_unhandled_updated_settings(unhandled) @@ -483,7 +484,6 @@ class SarvamSTTService(STTService): Frame: None (transcription results come via WebSocket callbacks). """ if not self._socket_client: - logger.warning("WebSocket not connected, cannot process audio") yield None return @@ -636,18 +636,22 @@ class SarvamSTTService(STTService): await self.cancel_task(self._receive_task) self._receive_task = None - if self._websocket_context and self._socket_client: + # Clear references first to prevent run_stt from sending audio + # during the close handshake. + socket_client = self._socket_client + websocket_context = self._websocket_context + self._socket_client = None + self._websocket_context = None + + if websocket_context and socket_client: try: - # Exit the async context manager - await self._websocket_context.__aexit__(None, None, None) + await websocket_context.__aexit__(None, None, None) except Exception as e: await self.push_error( error_msg=f"Error closing WebSocket connection: {e}", exception=e ) finally: logger.debug("Disconnected from Sarvam WebSocket") - self._socket_client = None - self._websocket_context = None async def _receive_task_handler(self): """Handle incoming messages from Sarvam WebSocket. diff --git a/src/pipecat/services/sarvam/tts.py b/src/pipecat/services/sarvam/tts.py index 6bf38bb24..c0a74b198 100644 --- a/src/pipecat/services/sarvam/tts.py +++ b/src/pipecat/services/sarvam/tts.py @@ -53,11 +53,9 @@ from pipecat.frames.frames import ( EndFrame, ErrorFrame, Frame, - InterruptionFrame, LLMFullResponseEndFrame, StartFrame, TTSAudioRawFrame, - TTSStartedFrame, TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection @@ -230,16 +228,27 @@ def language_to_sarvam_language(language: Language) -> Optional[str]: """ LANGUAGE_MAP = { Language.BN: "bn-IN", # Bengali + Language.BN_IN: "bn-IN", Language.EN: "en-IN", # English (India) + Language.EN_IN: "en-IN", Language.GU: "gu-IN", # Gujarati + Language.GU_IN: "gu-IN", Language.HI: "hi-IN", # Hindi + Language.HI_IN: "hi-IN", Language.KN: "kn-IN", # Kannada + Language.KN_IN: "kn-IN", Language.ML: "ml-IN", # Malayalam + Language.ML_IN: "ml-IN", Language.MR: "mr-IN", # Marathi + Language.MR_IN: "mr-IN", Language.OR: "od-IN", # Odia + Language.OR_IN: "od-IN", Language.PA: "pa-IN", # Punjabi + Language.PA_IN: "pa-IN", Language.TA: "ta-IN", # Tamil + Language.TA_IN: "ta-IN", Language.TE: "te-IN", # Telugu + Language.TE_IN: "te-IN", } return resolve_language(language, LANGUAGE_MAP, use_base_code=False) @@ -247,7 +256,7 @@ def language_to_sarvam_language(language: Language) -> Optional[str]: @dataclass class SarvamHttpTTSSettings(TTSSettings): - """Settings for Sarvam HTTP TTS service. + """Settings for SarvamHttpTTSService. Parameters: enable_preprocessing: Whether to enable text preprocessing. Defaults to False. @@ -273,7 +282,7 @@ class SarvamHttpTTSSettings(TTSSettings): @dataclass class SarvamTTSSettings(SarvamHttpTTSSettings): - """Settings for Sarvam WebSocket TTS service. + """Settings for SarvamTTSService. Extends :class:`SarvamHttpTTSSettings` with WebSocket-specific buffering parameters. @@ -481,6 +490,10 @@ class SarvamHttpTTSService(TTSService): if settings is not None: default_settings.apply_update(settings) + # Convert Language enum to service-specific string + if isinstance(default_settings.language, Language): + default_settings.language = self.language_to_service_language(default_settings.language) + # Get model configuration (validates model exists) resolved_model = default_settings.model if resolved_model not in TTS_MODEL_CONFIGS: @@ -900,6 +913,10 @@ class SarvamTTSService(InterruptibleTTSService): if settings is not None: default_settings.apply_update(settings) + # Convert Language enum to service-specific string + if isinstance(default_settings.language, Language): + default_settings.language = self.language_to_service_language(default_settings.language) + # Get model configuration (validates model exists) resolved_model = default_settings.model if resolved_model not in TTS_MODEL_CONFIGS: diff --git a/src/pipecat/services/soniox/stt.py b/src/pipecat/services/soniox/stt.py index 85277a41b..613e35a28 100644 --- a/src/pipecat/services/soniox/stt.py +++ b/src/pipecat/services/soniox/stt.py @@ -297,9 +297,7 @@ class SonioxSTTService(WebsocketSTTService): await self._connect() async def _update_settings(self, delta: SonioxSTTSettings) -> dict[str, Any]: - """Apply settings delta. - - Settings are stored but not applied to the active connection. + """Apply settings delta and reconnect if anything changed. Args: delta: A settings delta. @@ -309,15 +307,9 @@ class SonioxSTTService(WebsocketSTTService): """ changed = await super()._update_settings(delta) - if not changed: - return changed - - # TODO: someday we could reconnect here to apply updated settings. - # Code might look something like the below: - # await self._disconnect() - # await self._connect() - - self._warn_unhandled_updated_settings(changed) + if changed: + await self._disconnect() + await self._connect() return changed diff --git a/src/pipecat/services/speechmatics/tts.py b/src/pipecat/services/speechmatics/tts.py index 22b47f3fc..a93d9c78a 100644 --- a/src/pipecat/services/speechmatics/tts.py +++ b/src/pipecat/services/speechmatics/tts.py @@ -37,7 +37,7 @@ except ModuleNotFoundError as e: @dataclass class SpeechmaticsTTSSettings(TTSSettings): - """Settings for Speechmatics TTS service. + """Settings for SpeechmaticsTTSService. Parameters: max_retries: Maximum number of retries for HTTP requests. diff --git a/src/pipecat/services/xtts/tts.py b/src/pipecat/services/xtts/tts.py index 539ddc88c..78ac6dfb8 100644 --- a/src/pipecat/services/xtts/tts.py +++ b/src/pipecat/services/xtts/tts.py @@ -70,7 +70,7 @@ def language_to_xtts_language(language: Language) -> Optional[str]: @dataclass class XTTSTTSSettings(TTSSettings): - """Settings for XTTS TTS service.""" + """Settings for XTTSService.""" pass @@ -124,6 +124,8 @@ class XTTSService(TTSService): _warn_deprecated_param("voice_id", XTTSTTSSettings, "voice") default_settings.voice = voice_id + # 3. (No step 3, as there's no params object to apply) + # 4. Apply settings delta (canonical API, always wins) if settings is not None: default_settings.apply_update(settings) diff --git a/tests/test_service_init.py b/tests/test_service_init.py index 67dcbb324..377300f64 100644 --- a/tests/test_service_init.py +++ b/tests/test_service_init.py @@ -34,7 +34,6 @@ new services are covered automatically with no per-service maintenance. import importlib import inspect import pkgutil -import warnings from dataclasses import fields import pytest