From 1cc69d475da52336161de2cc9c88318026de3008 Mon Sep 17 00:00:00 2001 From: Hwuiwon Kim Date: Wed, 19 Nov 2025 22:49:16 -0500 Subject: [PATCH 1/2] feat: Add speaking rate control to Inworld TTS service & fix param cases --- src/pipecat/services/inworld/tts.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/pipecat/services/inworld/tts.py b/src/pipecat/services/inworld/tts.py index dc2282b91..ab218b3c0 100644 --- a/src/pipecat/services/inworld/tts.py +++ b/src/pipecat/services/inworld/tts.py @@ -146,6 +146,8 @@ class InworldTTSService(TTSService): Parameters: temperature: Voice temperature control for synthesis variability (e.g., 1.1). Valid range: [0, 2]. Higher values increase variability. + speaking_rate: Speaking speed control (range: [0.5, 1.5]). Defaults to 1.0 when + unset. Note: Language is automatically inferred from the input text by Inworld's TTS models, @@ -153,6 +155,7 @@ class InworldTTSService(TTSService): """ temperature: Optional[float] = None # optional temperature control (range: [0, 2]) + speaking_rate: Optional[float] = None # optional speaking rate control (range: [0.5, 1.5]) def __init__( self, @@ -198,6 +201,7 @@ class InworldTTSService(TTSService): - Other formats as supported by Inworld API params: Optional input parameters for additional configuration. Use this to specify: - temperature: Voice temperature control for variability (range: [0, 2], e.g., 1.1, optional) + - speaking_rate: Set desired speaking speed (range: [0.5, 1.5], optional) Language is automatically inferred from input text. **kwargs: Additional arguments passed to the parent TTSService class. @@ -228,15 +232,18 @@ class InworldTTSService(TTSService): self._settings = { "voiceId": voice_id, # Voice selection from direct parameter "modelId": model, # TTS model selection from direct parameter - "audio_config": { # Audio format configuration - "audio_encoding": encoding, # Format: LINEAR16, MP3, etc. - "sample_rate_hertz": 0, # Will be set in start() from parent service + "audioConfig": { # Audio format configuration + "audioEncoding": encoding, # Format: LINEAR16, MP3, etc. + "sampleRateHertz": 0, # Will be set in start() from parent service }, } # Add optional temperature parameter if provided (valid range: [0, 2]) if params and params.temperature is not None: self._settings["temperature"] = params.temperature + # Add optional speaking rate if provided (valid range: [0.5, 1.5]) + if params and params.speaking_rate is not None: + self._settings["audioConfig"]["speakingRate"] = params.speaking_rate # Register voice and model with parent service for metrics and tracking self.set_voice(voice_id) # Used for logging and metrics @@ -257,7 +264,7 @@ class InworldTTSService(TTSService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings["audio_config"]["sample_rate_hertz"] = self.sample_rate + self._settings["audioConfig"]["sampleRateHertz"] = self.sample_rate async def stop(self, frame: EndFrame): """Stop the Inworld TTS service. @@ -323,9 +330,7 @@ class InworldTTSService(TTSService): "text": text, # Text to synthesize "voiceId": self._settings["voiceId"], # Voice selection (Ashley, Hades, etc.) "modelId": self._settings["modelId"], # TTS model (inworld-tts-1) - "audio_config": self._settings[ - "audio_config" - ], # Audio format settings (LINEAR16, 48kHz) + "audioConfig": self._settings["audioConfig"], # Audio format settings (LINEAR16, 48kHz) } # Add optional temperature parameter if configured (valid range: [0, 2]) From ead361f665f6e610a9a2f71af3a202837fc7ece8 Mon Sep 17 00:00:00 2001 From: Hwuiwon Kim Date: Thu, 20 Nov 2025 07:45:13 -0500 Subject: [PATCH 2/2] fix --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d69eb897a..5749e60da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added word-level timestamps support to Hume TTS service +- Added optional speaking rate control to `InworldTTSService`. + ### Changed - ⚠️ Breaking change: `LLMContext.create_image_message()`, @@ -89,6 +91,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Prevented `HeyGenVideoService` from automatically disconnecting after 5 minutes. +- Fixed `InworldTTSService` audio config payload to use camelCase keys expected + by the Inworld API. + ## [0.0.94] - 2025-11-10 ### Changed