From 406f8b730b2e6ea61adf891e5fec2534069def02 Mon Sep 17 00:00:00 2001 From: Ian Lee Date: Thu, 7 May 2026 14:49:57 -0700 Subject: [PATCH] [inworld] default to using PCM encoding * server returns audio bytes without headers --- changelog/4446.change.md | 1 + src/pipecat/services/inworld/tts.py | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) create mode 100644 changelog/4446.change.md diff --git a/changelog/4446.change.md b/changelog/4446.change.md new file mode 100644 index 000000000..20efc50d8 --- /dev/null +++ b/changelog/4446.change.md @@ -0,0 +1 @@ +- Updated `InworldHttpTTSService` and `InworldTTSService` to use PCM audio encoding by default, which returns audio bytes without headers. \ No newline at end of file diff --git a/src/pipecat/services/inworld/tts.py b/src/pipecat/services/inworld/tts.py index 5c0194ff1..fc6b32f89 100644 --- a/src/pipecat/services/inworld/tts.py +++ b/src/pipecat/services/inworld/tts.py @@ -94,7 +94,7 @@ class InworldHttpTTSService(TTSService): """Inworld AI HTTP-based TTS service. Supports both streaming and non-streaming modes via the `streaming` parameter. - Outputs LINEAR16 audio at configurable sample rates with word-level timestamps. + Outputs PCM audio at configurable sample rates with word-level timestamps. """ Settings = InworldTTSSettings @@ -125,7 +125,7 @@ class InworldHttpTTSService(TTSService): model: str | None = None, streaming: bool = True, sample_rate: int | None = None, - encoding: str = "LINEAR16", + encoding: str = "PCM", timestamp_transport_strategy: Literal["ASYNC", "SYNC"] | None = "ASYNC", params: InputParams | None = None, settings: Settings | None = None, @@ -505,7 +505,7 @@ class InworldTTSService(WebsocketTTSService): """Inworld AI WebSocket-based TTS service. Uses bidirectional WebSocket for lower latency streaming. Supports multiple - independent audio contexts per connection (max 5). Outputs LINEAR16 audio + independent audio contexts per connection (max 5). Outputs PCM audio with word-level timestamps. """ @@ -548,7 +548,7 @@ class InworldTTSService(WebsocketTTSService): model: str | None = None, url: str = "wss://api.inworld.ai/tts/v1/voice:streamBidirectional", sample_rate: int | None = None, - encoding: str = "LINEAR16", + encoding: str = "PCM", auto_mode: bool | None = None, apply_text_normalization: str | None = None, timestamp_transport_strategy: Literal["ASYNC", "SYNC"] | None = "ASYNC",