[inworld] default to using PCM encoding

* server returns audio bytes without headers
2026-05-07 14:49:57 -07:00
parent 3722ee223c
commit 406f8b730b
2 changed files with 5 additions and 4 deletions
--- a/changelog/4446.change.md
+++ b/changelog/4446.change.md
@@ -0,0 +1 @@
+- Updated `InworldHttpTTSService` and `InworldTTSService` to use PCM audio encoding by default, which returns audio bytes without headers.
--- a/src/pipecat/services/inworld/tts.py
+++ b/src/pipecat/services/inworld/tts.py
@@ -94,7 +94,7 @@ class InworldHttpTTSService(TTSService):
    """Inworld AI HTTP-based TTS service.

    Supports both streaming and non-streaming modes via the `streaming` parameter.
-    Outputs LINEAR16 audio at configurable sample rates with word-level timestamps.
+    Outputs PCM audio at configurable sample rates with word-level timestamps.
    """

    Settings = InworldTTSSettings
@@ -125,7 +125,7 @@ class InworldHttpTTSService(TTSService):
        model: str | None = None,
        streaming: bool = True,
        sample_rate: int | None = None,
-        encoding: str = "LINEAR16",
+        encoding: str = "PCM",
        timestamp_transport_strategy: Literal["ASYNC", "SYNC"] | None = "ASYNC",
        params: InputParams | None = None,
        settings: Settings | None = None,
@@ -505,7 +505,7 @@ class InworldTTSService(WebsocketTTSService):
    """Inworld AI WebSocket-based TTS service.

    Uses bidirectional WebSocket for lower latency streaming. Supports multiple
-    independent audio contexts per connection (max 5). Outputs LINEAR16 audio
+    independent audio contexts per connection (max 5). Outputs PCM audio
    with word-level timestamps.
    """

@@ -548,7 +548,7 @@ class InworldTTSService(WebsocketTTSService):
        model: str | None = None,
        url: str = "wss://api.inworld.ai/tts/v1/voice:streamBidirectional",
        sample_rate: int | None = None,
-        encoding: str = "LINEAR16",
+        encoding: str = "PCM",
        auto_mode: bool | None = None,
        apply_text_normalization: str | None = None,
        timestamp_transport_strategy: Literal["ASYNC", "SYNC"] | None = "ASYNC",
				`@@ -0,0 +1 @@`
				- Updated `InworldHttpTTSService` and `InworldTTSService` to use PCM audio encoding by default, which returns audio bytes without headers.