feat(inworld): default to inworld-tts-2

Flip the default Inworld TTS model from inworld-tts-1.5-max to inworld-tts-2 across: - InworldHttpTTSService (HTTP) - InworldTTSService (WebSocket) - InworldRealtimeLLMService (cascade Realtime) inworld-tts-1.5-max and inworld-tts-1.5-mini remain valid options; existing users can pin the prior model explicitly via the model setting. Docstring examples updated to reference the new default.
2026-05-05 09:20:16 -07:00
parent 83190d38e9
commit fe3af5d9f7
3 changed files with 7 additions and 7 deletions
--- a/src/pipecat/services/inworld/realtime/events.py
+++ b/src/pipecat/services/inworld/realtime/events.py
@@ -124,7 +124,7 @@ class AudioOutput(BaseModel):

    Parameters:
        format: The format configuration for output audio.
-        model: The TTS model to use (e.g. "inworld-tts-1.5-max").
+        model: The TTS model to use (e.g. "inworld-tts-2").
        voice: The voice ID to use (e.g. "Sarah", "Clive").
    """

--- a/src/pipecat/services/inworld/realtime/llm.py
+++ b/src/pipecat/services/inworld/realtime/llm.py
@@ -206,7 +206,7 @@ class InworldRealtimeLLMService(LLMService[InworldRealtimeLLMAdapter]):
            api_key=os.getenv("INWORLD_API_KEY"),
            llm_model="openai/gpt-4.1-nano",
            voice="Sarah",
-            tts_model="inworld-tts-1.5-max",
+            tts_model="inworld-tts-2",
        )

    For full control over session properties (note: ``session_properties``
@@ -231,7 +231,7 @@ class InworldRealtimeLLMService(LLMService[InworldRealtimeLLMAdapter]):
                        output=AudioOutput(
                            format=PCMAudioFormat(rate=24000),
                            voice="Sarah",
-                            model="inworld-tts-1.5-max",
+                            model="inworld-tts-2",
                        ),
                    ),
                ),
@@ -269,7 +269,7 @@ class InworldRealtimeLLMService(LLMService[InworldRealtimeLLMAdapter]):
                Shorthand for ``session_properties.model``.
            voice: Voice ID for TTS output (e.g. "Sarah", "Clive").
                Shorthand for ``session_properties.audio.output.voice``.
-            tts_model: TTS model to use (e.g. "inworld-tts-1.5-max").
+            tts_model: TTS model to use (e.g. "inworld-tts-2").
                Shorthand for ``session_properties.audio.output.model``.
            stt_model: STT model for input transcription
                (e.g. "assemblyai/universal-streaming-multilingual").
@@ -286,7 +286,7 @@ class InworldRealtimeLLMService(LLMService[InworldRealtimeLLMAdapter]):
        """
        default_model = llm_model or "openai/gpt-4.1-mini"
        default_voice = voice or "Clive"
-        default_tts_model = tts_model or "inworld-tts-1.5-max"
+        default_tts_model = tts_model or "inworld-tts-2"
        default_stt_model = stt_model or "assemblyai/u3-rt-pro"

        default_settings = self.Settings(
--- a/src/pipecat/services/inworld/tts.py
+++ b/src/pipecat/services/inworld/tts.py
@@ -162,7 +162,7 @@ class InworldHttpTTSService(TTSService):
        """
        # 1. Initialize default_settings with hardcoded defaults
        default_settings = self.Settings(
-            model="inworld-tts-1.5-max",
+            model="inworld-tts-2",
            voice="Ashley",
            language=None,
            speaking_rate=None,
@@ -604,7 +604,7 @@ class InworldTTSService(WebsocketTTSService):

        # 1. Initialize default_settings with hardcoded defaults
        default_settings = self.Settings(
-            model="inworld-tts-1.5-max",
+            model="inworld-tts-2",
            voice="Ashley",
            language=None,
            speaking_rate=None,