Merge pull request #3941 from pipecat-ai/mb/stt-settings-updates

STT services: settings and examples fixes
2026-03-06 15:21:30 -05:00
parent 1a1c5668de 7d41049b35
commit d59c0ea6c1
38 changed files with 125 additions and 104 deletions
--- a/COMMUNITY_INTEGRATIONS.md
+++ b/COMMUNITY_INTEGRATIONS.md
@@ -233,14 +233,14 @@ def can_generate_metrics(self) -> bool:

 ### Service Settings

-Every STT, LLM, TTS, and image-generation service exposes a **Settings dataclass** that serves two roles:
+Every AI service (STT, LLM, TTS, image generation, etc.) exposes a **Settings dataclass** that serves two roles:

 1. **Store mode** — the service's `self._settings` holds the current value of every runtime-updatable field.
-2. **Delta mode** — an update frame carries only the fields that changed; unset fields remain `NOT_GIVEN`.
+2. **Delta mode** — an update frame (e.g. `TTSUpdateSettingsFrame`) specifies only the fields that should change; unspecified fields remain `NOT_GIVEN`.

 #### Defining your Settings class

-Extend `STTSettings`, `TTSSettings`, `LLMSettings`, or `ImageGenSettings`. The base classes already provide common fields (e.g. `model`, `voice`, `language`). You only need to add **service-specific knobs that should be runtime-updatable**:
+Extend `STTSettings`, `TTSSettings`, `LLMSettings`, or `ImageGenSettings` (or, if your service directly subclasses `AIService`, `ServiceSettings`). The base classes already provide common fields (e.g. `model`, `voice`, `language`). You only need to add **service-specific knobs that should be runtime-updatable**:

 ```python
 from dataclasses import dataclass, field
@@ -320,7 +320,7 @@ svc = MyTTSService(

 #### Reacting to runtime changes

-STT, LLM, and TTS services support runtime configuration changes via `*UpdateSettingsFrame`s (e.g. `STTUpdateSettingsFrame`, `TTSUpdateSettingsFrame`, `LLMUpdateSettingsFrame`).
+AI services support runtime configuration changes via `*UpdateSettingsFrame`s (e.g. `STTUpdateSettingsFrame`, `TTSUpdateSettingsFrame`, `LLMUpdateSettingsFrame`).

 To react to runtime setting changes, override `_update_settings`. The base implementation applies the delta to `self._settings` and returns a `dict` mapping each changed field name to its **pre-update** value. Your override should call `super()` first, then act on the changed fields. A common implementation might look like:

--- a/examples/foundational/07f-interruptible-azure-http.py
+++ b/examples/foundational/07f-interruptible-azure-http.py
@@ -65,8 +65,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    llm = AzureLLMService(
        api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
        endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
-        model=os.getenv("AZURE_CHATGPT_MODEL"),
        settings=AzureLLMSettings(
+            model=os.getenv("AZURE_CHATGPT_MODEL"),
            system_instruction="You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
        ),
    )
--- a/examples/foundational/07f-interruptible-azure.py
+++ b/examples/foundational/07f-interruptible-azure.py
@@ -65,8 +65,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    llm = AzureLLMService(
        api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
        endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
-        model=os.getenv("AZURE_CHATGPT_MODEL"),
        settings=AzureLLMSettings(
+            model=os.getenv("AZURE_CHATGPT_MODEL"),
            system_instruction="You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
        ),
    )
--- a/examples/foundational/07m-interruptible-aws.py
+++ b/examples/foundational/07m-interruptible-aws.py
@@ -63,9 +63,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    llm = AWSBedrockLLMService(
        aws_region="us-west-2",
-        model="us.anthropic.claude-haiku-4-5-20251001-v1:0",
-        params=AWSBedrockLLMService.InputParams(temperature=0.8),
        settings=AWSBedrockLLMSettings(
+            model="us.anthropic.claude-haiku-4-5-20251001-v1:0",
+            temperature=0.8,
            system_instruction="You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
        ),
    )
--- a/examples/foundational/07n-interruptible-google.py
+++ b/examples/foundational/07n-interruptible-google.py
@@ -55,8 +55,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    stt = GoogleSTTService(
        settings=GoogleSTTSettings(
-            languages=Language.EN_US,
-            model="chirp_3",
+            languages=[Language.EN_US],
+            # Add model to use a specific model
+            # model="chirp_3",
        ),
        credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"),
        location="us",
--- a/examples/foundational/07o-interruptible-assemblyai-turn-detection.py
+++ b/examples/foundational/07o-interruptible-assemblyai-turn-detection.py
@@ -94,7 +94,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        api_key=os.getenv("ASSEMBLYAI_API_KEY"),
        vad_force_turn_endpoint=False,  # Use AssemblyAI's built-in turn detection
        settings=AssemblyAISTTSettings(
-            speech_model="u3-rt-pro",
+            model="u3-rt-pro",
            # Optional: Tune turn detection timing (defaults shown below)
            # min_turn_silence=100,  # Default
            # max_turn_silence=1000,  # Default
--- a/examples/foundational/07za-interruptible-soniox.py
+++ b/examples/foundational/07za-interruptible-soniox.py
@@ -51,13 +51,13 @@ transport_params = {
 async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    logger.info(f"Starting bot")

-    stt = (
-        SonioxSTTService(
-            api_key=os.getenv("SONIOX_API_KEY"),
-            settings=SonioxSTTSettings(
-                language_hints=[Language.EN],
-                language_hints_strict=True,
-            ),
+    stt = SonioxSTTService(
+        api_key=os.getenv("SONIOX_API_KEY"),
+        settings=SonioxSTTSettings(
+            # Add language hints to use a specific language
+            # Add strict mode to enforce the language hints
+            language_hints=[Language.EN],
+            language_hints_strict=True,
        ),
    )

--- a/examples/foundational/14d-function-calling-aws-video.py
+++ b/examples/foundational/14d-function-calling-aws-video.py
@@ -99,7 +99,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    llm = AWSBedrockLLMService(
        aws_region="us-west-2",
        settings=AWSBedrockLLMSettings(
-            model="us.anthropic.claude-3-7-sonnet-20250219-v1:0",
+            model="us.anthropic.claude-sonnet-4-6",
            # Note: usually, prefer providing latency="optimized" param.
            # Here we can't because AWS Bedrock doesn't support it for Claude 3.7,
            # which we need for image input.
@@ -170,7 +170,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        context.add_message(
            {
                "role": "user",
-                "content": f"Please introduce yourself to the user. Use '{client_id}' as the user ID during function calls.",
+                "content": f"Please introduce yourself to the user briefly; don't mention the camera. Use '{client_id}' as the user ID during function calls.",
            }
        )
        await task.queue_frames([LLMRunFrame()])
--- a/examples/foundational/55zzn-update-settings-groq-stt.py
+++ b/examples/foundational/55zzn-update-settings-groq-stt.py
@@ -99,7 +99,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    @transport.event_handler("on_client_connected")
    async def on_client_connected(transport, client):
        logger.info(f"Client connected")
-        context.add_message({"user": "system", "content": "Please introduce yourself to the user."})
+        context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
        await task.queue_frames([LLMRunFrame()])

        await asyncio.sleep(10)
--- a/src/pipecat/processors/aggregators/llm_context.py
+++ b/src/pipecat/processors/aggregators/llm_context.py
@@ -255,7 +255,7 @@ class LLMContext:
        this method, which is part of the public API of OpenAILLMContext but
        doesn't need to be for LLMContext.

-        .. deprecated::
+        .. deprecated:: 0.0.92
            Use `get_messages()` instead.

        Returns:
--- a/src/pipecat/processors/user_idle_processor.py
+++ b/src/pipecat/processors/user_idle_processor.py
@@ -27,7 +27,7 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
 class UserIdleProcessor(FrameProcessor):
    """Monitors user inactivity and triggers callbacks after timeout periods.

-    .. deprecated::
+    .. deprecated:: 0.0.100
        UserIdleProcessor is deprecated in 0.0.100 and will be removed in a future version.
        Use LLMUserAggregator with user_idle_timeout parameter instead.

--- a/src/pipecat/services/anthropic/llm.py
+++ b/src/pipecat/services/anthropic/llm.py
@@ -170,7 +170,7 @@ class AnthropicLLMService(LLMService):
    class InputParams(BaseModel):
        """Input parameters for Anthropic model inference.

-        .. deprecated::
+        .. deprecated:: 0.0.105
            Use ``AnthropicLLMSettings`` instead. Pass settings directly via the
            ``settings`` parameter of :class:`AnthropicLLMService`.

@@ -231,12 +231,12 @@ class AnthropicLLMService(LLMService):
            api_key: Anthropic API key for authentication.
            model: Model name to use.

-                .. deprecated::
+                .. deprecated:: 0.0.105
                    Use ``settings=AnthropicLLMSettings(model=...)`` instead.

            params: Optional model parameters for inference.

-                .. deprecated::
+                .. deprecated:: 0.0.105
                    Use ``settings=AnthropicLLMSettings(...)`` instead.

            settings: Runtime-updatable settings for this service.  When both
--- a/src/pipecat/services/assemblyai/stt.py
+++ b/src/pipecat/services/assemblyai/stt.py
@@ -81,7 +81,7 @@ def map_language_from_assemblyai(language_code: str) -> Language:

@dataclass
 class AssemblyAISTTSettings(STTSettings):
-    """Settings for the AssemblyAI STT service.
+    """Settings for AssemblyAISTTService.

    Parameters:
        formatted_finals: Whether to enable transcript formatting.
@@ -99,6 +99,8 @@ class AssemblyAISTTSettings(STTSettings):
        language_detection: Enable automatic language detection.
        format_turns: Whether to format transcript turns.
        speaker_labels: Enable speaker diarization.
+        vad_threshold: VAD confidence threshold (0.0–1.0) for classifying
+            audio frames as silence. Only applicable to u3-rt-pro.
    """

    formatted_finals: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
@@ -115,6 +117,7 @@ class AssemblyAISTTSettings(STTSettings):
    language_detection: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
    format_turns: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
    speaker_labels: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
+    vad_threshold: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)


 class AssemblyAISTTService(WebsocketSTTService):
@@ -199,6 +202,7 @@ class AssemblyAISTTService(WebsocketSTTService):
            language_detection=None,
            format_turns=True,
            speaker_labels=None,
+            vad_threshold=None,
        )

        # 2. Apply direct init arg overrides (deprecated)
@@ -227,6 +231,7 @@ class AssemblyAISTTService(WebsocketSTTService):
                default_settings.language_detection = connection_params.language_detection
                default_settings.format_turns = connection_params.format_turns
                default_settings.speaker_labels = connection_params.speaker_labels
+                default_settings.vad_threshold = connection_params.vad_threshold

        # 4. Apply settings delta (canonical API, always wins)
        if settings is not None:
@@ -463,6 +468,7 @@ class AssemblyAISTTService(WebsocketSTTService):
            "language_detection": s.language_detection,
            "format_turns": s.format_turns,
            "speaker_labels": s.speaker_labels,
+            "vad_threshold": s.vad_threshold,
        }

        for k, v in optional_fields.items():
@@ -651,7 +657,7 @@ class AssemblyAISTTService(WebsocketSTTService):
        await self.start_processing_metrics()
        await self.broadcast_frame(UserStartedSpeakingFrame)
        if self._should_interrupt:
-            await self.push_interruption_task_frame_and_wait()
+            await self.broadcast_interruption()
        self._user_speaking = True

    async def _handle_termination(self, message: TerminationMessage):
--- a/src/pipecat/services/aws/llm.py
+++ b/src/pipecat/services/aws/llm.py
@@ -754,7 +754,7 @@ class AWSBedrockLLMService(LLMService):
    class InputParams(BaseModel):
        """Input parameters for AWS Bedrock LLM service.

-        .. deprecated::
+        .. deprecated:: 0.0.105
            Use ``AWSBedrockLLMSettings`` instead. Pass settings directly via the
            ``settings`` parameter of :class:`AWSBedrockLLMService`.

@@ -795,7 +795,7 @@ class AWSBedrockLLMService(LLMService):
        Args:
            model: The AWS Bedrock model identifier to use.

-                .. deprecated::
+                .. deprecated:: 0.0.105
                    Use ``settings=AWSBedrockLLMSettings(model=...)`` instead.

            aws_access_key: AWS access key ID. If None, uses default credentials.
@@ -804,7 +804,7 @@ class AWSBedrockLLMService(LLMService):
            aws_region: AWS region for the Bedrock service.
            params: Model parameters and configuration.

-                .. deprecated::
+                .. deprecated:: 0.0.105
                    Use ``settings=AWSBedrockLLMSettings(...)`` instead.

            settings: Runtime-updatable settings for this service.  When both
--- a/src/pipecat/services/aws/nova_sonic/llm.py
+++ b/src/pipecat/services/aws/nova_sonic/llm.py
@@ -280,7 +280,7 @@ class AWSNovaSonicLLMService(LLMService):
                - Nova Sonic (the older model): "us-east-1", "ap-northeast-1"
            model: Model identifier. Defaults to "amazon.nova-2-sonic-v1:0".

-                .. deprecated::
+                .. deprecated:: 0.0.105
                    Use ``settings=AWSNovaSonicLLMSettings(model=...)`` instead.

            voice_id: Voice ID for speech synthesis.
@@ -289,7 +289,7 @@ class AWSNovaSonicLLMService(LLMService):
                - Nova 2 Sonic (the default model): see https://docs.aws.amazon.com/nova/latest/nova2-userguide/sonic-language-support.html
                - Nova Sonic (the older model): see https://docs.aws.amazon.com/nova/latest/userguide/available-voices.html.

-                .. deprecated::
+                .. deprecated:: 0.0.105
                    Use ``settings=AWSNovaSonicLLMSettings(voice=...)`` instead.

            params: Model parameters for audio configuration and inference.
--- a/src/pipecat/services/aws/stt.py
+++ b/src/pipecat/services/aws/stt.py
@@ -47,7 +47,7 @@ except ModuleNotFoundError as e:

@dataclass
 class AWSTranscribeSTTSettings(STTSettings):
-    """Settings for the AWS Transcribe STT service."""
+    """Settings for AWSTranscribeSTTService."""

    pass

@@ -99,13 +99,13 @@ class AWSTranscribeSTTService(WebsocketSTTService):
        # 1. Initialize default_settings with hardcoded defaults
        default_settings = AWSTranscribeSTTSettings(
            model=None,
-            language=self.language_to_service_language(Language.EN) or "en-US",
+            language=self.language_to_service_language(Language.EN),
        )

        # 2. Apply direct init arg overrides (deprecated)
        if language is not None:
            _warn_deprecated_param("language", AWSTranscribeSTTSettings, "language")
-            default_settings.language = self.language_to_service_language(language) or "en-US"
+            default_settings.language = self.language_to_service_language(language)

        # 3. No params to apply

--- a/src/pipecat/services/azure/stt.py
+++ b/src/pipecat/services/azure/stt.py
@@ -53,7 +53,7 @@ except ModuleNotFoundError as e:

@dataclass
 class AzureSTTSettings(STTSettings):
-    """Settings for the Azure STT service."""
+    """Settings for AzureSTTService."""

    pass

--- a/src/pipecat/services/cartesia/stt.py
+++ b/src/pipecat/services/cartesia/stt.py
@@ -46,7 +46,7 @@ except ModuleNotFoundError as e:

@dataclass
 class CartesiaSTTSettings(STTSettings):
-    """Settings for the Cartesia STT service."""
+    """Settings for CartesiaSTTService."""

    pass

--- a/src/pipecat/services/deepgram/flux/stt.py
+++ b/src/pipecat/services/deepgram/flux/stt.py
@@ -71,7 +71,7 @@ class FluxEventType(str, Enum):

@dataclass
 class DeepgramFluxSTTSettings(STTSettings):
-    """Settings for the Deepgram Flux STT service.
+    """Settings for DeepgramFluxSTTService.

    Parameters:
        eager_eot_threshold: EagerEndOfTurn/TurnResumed threshold. Off by default.
@@ -81,7 +81,6 @@ class DeepgramFluxSTTSettings(STTSettings):
        eot_timeout_ms: Time in ms after speech to finish a turn regardless of EOT
            confidence (default 5000).
        keyterm: Keyterms to boost recognition accuracy for specialized terminology.
-        tag: Tags to label requests for identification during usage reporting.
        min_confidence: Minimum confidence required to create a TranscriptionFrame.
    """

@@ -89,7 +88,6 @@ class DeepgramFluxSTTSettings(STTSettings):
    eot_threshold: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
    eot_timeout_ms: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
    keyterm: list | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
-    tag: list | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
    min_confidence: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)


@@ -157,6 +155,7 @@ class DeepgramFluxSTTService(WebsocketSTTService):
        mip_opt_out: Optional[bool] = None,
        model: Optional[str] = None,
        flux_encoding: str = "linear16",
+        tag: Optional[list] = None,
        params: Optional[InputParams] = None,
        should_interrupt: bool = True,
        settings: Optional[DeepgramFluxSTTSettings] = None,
@@ -177,6 +176,7 @@ class DeepgramFluxSTTService(WebsocketSTTService):

            flux_encoding: Audio encoding format required by Flux API. Must be "linear16".
                Raw signed little-endian 16-bit PCM encoding.
+            tag: Tags to label requests for identification during usage reporting.
            params: InputParams instance containing detailed API configuration options.

                .. deprecated:: 0.0.105
@@ -224,7 +224,6 @@ class DeepgramFluxSTTService(WebsocketSTTService):
            eot_threshold=None,
            eot_timeout_ms=None,
            keyterm=[],
-            tag=[],
            min_confidence=None,
        )

@@ -241,7 +240,8 @@ class DeepgramFluxSTTService(WebsocketSTTService):
                default_settings.eot_threshold = params.eot_threshold
                default_settings.eot_timeout_ms = params.eot_timeout_ms
                default_settings.keyterm = params.keyterm or []
-                default_settings.tag = params.tag or []
+                if params.tag and tag is None:
+                    tag = params.tag
                default_settings.min_confidence = params.min_confidence
                if params.mip_opt_out is not None:
                    mip_opt_out = params.mip_opt_out
@@ -261,6 +261,7 @@ class DeepgramFluxSTTService(WebsocketSTTService):
        self._should_interrupt = should_interrupt
        self._encoding = flux_encoding
        self._mip_opt_out = mip_opt_out
+        self._tag = tag or []
        self._websocket_url = None
        self._receive_task = None

@@ -469,7 +470,7 @@ class DeepgramFluxSTTService(WebsocketSTTService):
            url_params.append(urlencode({"keyterm": keyterm}))

        # Add tag parameters (can have multiple)
-        for tag_value in self._settings.tag:
+        for tag_value in self._tag:
            url_params.append(urlencode({"tag": tag_value}))

        self._websocket_url = f"{self._url}?{'&'.join(url_params)}"
--- a/src/pipecat/services/deepgram/stt.py
+++ b/src/pipecat/services/deepgram/stt.py
@@ -177,7 +177,7 @@ class LiveOptions:

@dataclass
 class DeepgramSTTSettings(STTSettings):
-    """Settings for Deepgram STT services.
+    """Settings for DeepgramSTTService.

    ``model`` and ``language`` are inherited from ``STTSettings`` /
    ``ServiceSettings``.  Additional Deepgram connection params may
--- a/src/pipecat/services/elevenlabs/stt.py
+++ b/src/pipecat/services/elevenlabs/stt.py
@@ -179,19 +179,19 @@ class CommitStrategy(str, Enum):

@dataclass
 class ElevenLabsSTTSettings(STTSettings):
-    """Settings for the ElevenLabs file-based STT service.
+    """Settings for ElevenLabsSTTService.

    Parameters:
        tag_audio_events: Whether to include audio events like (laughter),
            (coughing) in the transcription.
    """

-    tag_audio_events: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
+    tag_audio_events: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)


@dataclass
 class ElevenLabsRealtimeSTTSettings(STTSettings):
-    """Settings for the ElevenLabs Realtime STT service.
+    """Settings for ElevenLabsRealtimeSTTService.

    See ``ElevenLabsRealtimeSTTService.InputParams`` for detailed descriptions.

@@ -277,8 +277,8 @@ class ElevenLabsSTTService(SegmentedSTTService):
        # 1. Initialize default_settings with hardcoded defaults
        default_settings = ElevenLabsSTTSettings(
            model="scribe_v2",
-            language="eng",
-            tag_audio_events=True,
+            language=language_to_elevenlabs_language(Language.EN),
+            tag_audio_events=None,
        )

        # 2. Apply direct init arg overrides (deprecated)
@@ -291,9 +291,7 @@ class ElevenLabsSTTService(SegmentedSTTService):
            _warn_deprecated_param("params", ElevenLabsSTTSettings)
            if not settings:
                if params.language is not None:
-                    default_settings.language = (
-                        self.language_to_service_language(params.language) or "eng"
-                    )
+                    default_settings.language = language_to_elevenlabs_language(params.language)
                default_settings.tag_audio_events = params.tag_audio_events

        # 4. Apply settings delta (canonical API, always wins)
@@ -354,10 +352,11 @@ class ElevenLabsSTTService(SegmentedSTTService):
            content_type="audio/x-wav",
        )

-        # Add required model_id, language_code, and tag_audio_events
+        # Add required model_id and language_code
        data.add_field("model_id", self._settings.model)
        data.add_field("language_code", self._settings.language)
-        data.add_field("tag_audio_events", str(self._settings.tag_audio_events).lower())
+        if self._settings.tag_audio_events is not None:
+            data.add_field("tag_audio_events", str(self._settings.tag_audio_events).lower())

        async with self._session.post(url, data=data, headers=headers) as response:
            if response.status != 200:
--- a/src/pipecat/services/fal/stt.py
+++ b/src/pipecat/services/fal/stt.py
@@ -143,7 +143,7 @@ def language_to_fal_language(language: Language) -> Optional[str]:

@dataclass
 class FalSTTSettings(STTSettings):
-    """Settings for the Fal Wizper STT service."""
+    """Settings for FalSTTService."""

    pass

@@ -215,7 +215,7 @@ class FalSTTService(SegmentedSTTService):
        # 1. Initialize default_settings with hardcoded defaults
        default_settings = FalSTTSettings(
            model=None,
-            language=language_to_fal_language(Language.EN) or "en",
+            language=language_to_fal_language(Language.EN),
        )

        # 2. (no deprecated direct args for this service)
@@ -224,9 +224,8 @@ class FalSTTService(SegmentedSTTService):
        if params is not None:
            _warn_deprecated_param("params", FalSTTSettings)
            if not settings:
-                default_settings.language = (
-                    language_to_fal_language(params.language) if params.language else "en"
-                )
+                if params.language is not None:
+                    default_settings.language = language_to_fal_language(params.language)
                if params.task != "transcribe":
                    task = params.task
                if params.chunk_level != "segment":
--- a/src/pipecat/services/gladia/stt.py
+++ b/src/pipecat/services/gladia/stt.py
@@ -188,7 +188,7 @@ class _InputParamsDescriptor:

@dataclass
 class GladiaSTTSettings(STTSettings):
-    """Settings for Gladia STT service.
+    """Settings for GladiaSTTService.

    Parameters:
        language_config: Language detection and handling configuration.
--- a/src/pipecat/services/google/gemini_live/llm.py
+++ b/src/pipecat/services/google/gemini_live/llm.py
@@ -552,7 +552,7 @@ class ContextWindowCompressionParams(BaseModel):
 class InputParams(BaseModel):
    """Input parameters for Gemini Live generation.

-    .. deprecated::
+    .. deprecated:: 0.0.105
        Use ``GeminiLiveLLMSettings`` instead.

    Parameters:
@@ -678,7 +678,7 @@ class GeminiLiveLLMService(LLMService):

            model: Model identifier to use.

-                .. deprecated::
+                .. deprecated:: 0.0.105
                    Use ``settings=GeminiLiveLLMSettings(model=...)`` instead.

            voice_id: TTS voice identifier. Defaults to "Charon".
@@ -691,7 +691,7 @@ class GeminiLiveLLMService(LLMService):
            tools: Tools/functions available to the model. Defaults to None.
            params: Configuration parameters for the model.

-                .. deprecated::
+                .. deprecated:: 0.0.105
                    Use ``settings=GeminiLiveLLMSettings(...)`` instead.

            settings: Gemini Live LLM settings. If provided together with deprecated
--- a/src/pipecat/services/google/gemini_live/llm_vertex.py
+++ b/src/pipecat/services/google/gemini_live/llm_vertex.py
@@ -88,7 +88,7 @@ class GeminiLiveVertexLLMService(GeminiLiveLLMService):
            project_id: Google Cloud project ID.
            model: Model identifier to use.

-                .. deprecated::
+                .. deprecated:: 0.0.105
                    Use ``settings=GeminiLiveLLMSettings(model=...)`` instead.

            voice_id: TTS voice identifier. Defaults to "Charon".
@@ -102,7 +102,7 @@ class GeminiLiveVertexLLMService(GeminiLiveLLMService):
            params: Configuration parameters for the model along with Vertex AI
                location and project ID.

-                .. deprecated::
+                .. deprecated:: 0.0.105
                    Use ``settings=GeminiLiveLLMSettings(...)`` instead.

            settings: Gemini Live LLM settings. If provided together with deprecated
--- a/src/pipecat/services/google/llm.py
+++ b/src/pipecat/services/google/llm.py
@@ -754,7 +754,7 @@ class GoogleLLMService(LLMService):
    class InputParams(BaseModel):
        """Input parameters for Google AI models.

-        .. deprecated::
+        .. deprecated:: 0.0.105
            Use ``settings=GoogleLLMSettings(...)`` instead.

        Parameters:
@@ -797,12 +797,12 @@ class GoogleLLMService(LLMService):
            api_key: Google AI API key for authentication.
            model: Model name to use.

-                .. deprecated::
+                .. deprecated:: 0.0.105
                    Use ``settings=GoogleLLMSettings(model=...)`` instead.

            params: Optional model parameters for inference.

-                .. deprecated::
+                .. deprecated:: 0.0.105
                    Use ``settings=GoogleLLMSettings(...)`` instead.

            settings: Runtime-updatable settings for this service.  When both
--- a/src/pipecat/services/google/llm_vertex.py
+++ b/src/pipecat/services/google/llm_vertex.py
@@ -128,14 +128,14 @@ class GoogleVertexLLMService(GoogleLLMService):
            credentials_path: Path to the service account JSON file.
            model: Model identifier (e.g., "gemini-2.5-flash").

-                .. deprecated::
+                .. deprecated:: 0.0.105
                    Use ``settings=GoogleLLMSettings(model=...)`` instead.

            location: GCP region for Vertex AI endpoint (e.g., "us-east4").
            project_id: Google Cloud project ID.
            params: Input parameters for the model.

-                .. deprecated::
+                .. deprecated:: 0.0.105
                    Use ``settings=GoogleLLMSettings(...)`` instead.

            settings: Runtime-updatable settings for this service.  When both
--- a/src/pipecat/services/google/stt.py
+++ b/src/pipecat/services/google/stt.py
@@ -360,7 +360,7 @@ def language_to_google_stt_language(language: Language) -> Optional[str]:

@dataclass
 class GoogleSTTSettings(STTSettings):
-    """Settings for Google Cloud Speech-to-Text V2.
+    """Settings for GoogleSTTService.

    Parameters:
        languages: List of ``Language`` enums for recognition
@@ -653,7 +653,7 @@ class GoogleSTTService(STTService):
    async def set_languages(self, languages: List[Language]):
        """Update the service's recognition languages.

-        .. deprecated::
+        .. deprecated:: 0.0.104
            Use ``STTUpdateSettingsFrame`` with ``GoogleSTTSettings(languages=...)``
            instead.

--- a/src/pipecat/services/google/tts.py
+++ b/src/pipecat/services/google/tts.py
@@ -482,7 +482,7 @@ def language_to_gemini_tts_language(language: Language) -> Optional[str]:

@dataclass
 class GoogleHttpTTSSettings(TTSSettings):
-    """Settings for Google HTTP TTS service.
+    """Settings for GoogleHttpTTSService.

    Parameters:
        pitch: Voice pitch adjustment (e.g., "+2st", "-50%").
@@ -512,8 +512,8 @@ class GoogleHttpTTSSettings(TTSSettings):


@dataclass
-class GoogleStreamTTSSettings(TTSSettings):
-    """Settings for Google streaming TTS service.
+class GoogleTTSSettings(TTSSettings):
+    """Settings for GoogleTTSService.

    Parameters:
        speaking_rate: The speaking rate, in the range [0.25, 2.0].
@@ -522,9 +522,14 @@ class GoogleStreamTTSSettings(TTSSettings):
    speaking_rate: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)


+#: .. deprecated:: 0.0.105
+#:     Use ``GoogleTTSSettings`` instead.
+GoogleStreamTTSSettings = GoogleTTSSettings
+
+
@dataclass
 class GeminiTTSSettings(TTSSettings):
-    """Settings for Gemini TTS service.
+    """Settings for GeminiTTSService.

    Parameters:
        prompt: Optional style instructions for how to synthesize the content.
@@ -619,6 +624,13 @@ class GoogleHttpTTSService(TTSService):
            model=None,
            voice="en-US-Chirp3-HD-Charon",
            language="en-US",
+            pitch=None,
+            rate=None,
+            speaking_rate=None,
+            volume=None,
+            emphasis=None,
+            gender=None,
+            google_style=None,
        )

        # 2. Apply direct init arg overrides (deprecated)
@@ -1008,13 +1020,13 @@ class GoogleTTSService(GoogleBaseTTSService):
        )
    """

-    _settings: GoogleStreamTTSSettings
+    _settings: GoogleTTSSettings

    class InputParams(BaseModel):
        """Input parameters for Google streaming TTS configuration.

        .. deprecated:: 0.0.105
-            Use ``GoogleStreamTTSSettings`` directly via the ``settings`` parameter instead.
+            Use ``GoogleTTSSettings`` directly via the ``settings`` parameter instead.

        Parameters:
            language: Language for synthesis. Defaults to English.
@@ -1034,7 +1046,7 @@ class GoogleTTSService(GoogleBaseTTSService):
        voice_cloning_key: Optional[str] = None,
        sample_rate: Optional[int] = None,
        params: Optional[InputParams] = None,
-        settings: Optional[GoogleStreamTTSSettings] = None,
+        settings: Optional[GoogleTTSSettings] = None,
        **kwargs,
    ):
        """Initializes the Google streaming TTS service.
@@ -1046,34 +1058,35 @@ class GoogleTTSService(GoogleBaseTTSService):
            voice_id: Google TTS voice identifier (e.g., "en-US-Chirp3-HD-Charon").

                .. deprecated:: 0.0.105
-                    Use ``settings=GoogleStreamTTSSettings(voice=...)`` instead.
+                    Use ``settings=GoogleTTSSettings(voice=...)`` instead.

            voice_cloning_key: The voice cloning key for Chirp 3 custom voices.
            sample_rate: Audio sample rate in Hz. If None, uses default.
            params: Language configuration parameters.

                .. deprecated:: 0.0.105
-                    Use ``settings=GoogleStreamTTSSettings(...)`` instead.
+                    Use ``settings=GoogleTTSSettings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            **kwargs: Additional arguments passed to parent TTSService.
        """
        # 1. Initialize default_settings with hardcoded defaults
-        default_settings = GoogleStreamTTSSettings(
+        default_settings = GoogleTTSSettings(
            model=None,
            voice="en-US-Chirp3-HD-Charon",
            language="en-US",
+            speaking_rate=None,
        )

        # 2. Apply direct init arg overrides (deprecated)
        if voice_id is not None:
-            _warn_deprecated_param("voice_id", GoogleStreamTTSSettings, "voice")
+            _warn_deprecated_param("voice_id", GoogleTTSSettings, "voice")
            default_settings.voice = voice_id

        # 3. Apply params overrides — only if settings not provided
        if params is not None:
-            _warn_deprecated_param("params", GoogleStreamTTSSettings)
+            _warn_deprecated_param("params", GoogleTTSSettings)
            if not settings:
                if params.language is not None:
                    default_settings.language = self.language_to_service_language(params.language)
@@ -1104,7 +1117,7 @@ class GoogleTTSService(GoogleBaseTTSService):
        Args:
            delta: Settings delta. Can include 'speaking_rate' (float).
        """
-        if isinstance(delta, GoogleStreamTTSSettings) and is_given(delta.speaking_rate):
+        if isinstance(delta, GoogleTTSSettings) and is_given(delta.speaking_rate):
            rate_value = float(delta.speaking_rate)
            if not (0.25 <= rate_value <= 2.0):
                logger.warning(
@@ -1308,6 +1321,9 @@ class GeminiTTSService(GoogleBaseTTSService):
            model="gemini-2.5-flash-tts",
            voice="Kore",
            language="en-US",
+            prompt=None,
+            multi_speaker=False,
+            speaker_configs=None,
        )

        # 2. Apply direct init arg overrides (deprecated)
--- a/src/pipecat/services/gradium/stt.py
+++ b/src/pipecat/services/gradium/stt.py
@@ -68,7 +68,7 @@ def language_to_gradium_language(language: Language) -> Optional[str]:

@dataclass
 class GradiumSTTSettings(STTSettings):
-    """Settings for the Gradium STT service."""
+    """Settings for GradiumSTTService."""

    pass

--- a/src/pipecat/services/nvidia/stt.py
+++ b/src/pipecat/services/nvidia/stt.py
@@ -93,14 +93,14 @@ def language_to_nvidia_riva_language(language: Language) -> Optional[str]:

@dataclass
 class NvidiaSTTSettings(STTSettings):
-    """Settings for the NVIDIA Riva streaming STT service."""
+    """Settings for NvidiaSTTService."""

    pass


@dataclass
 class NvidiaSegmentedSTTSettings(STTSettings):
-    """Settings for the NVIDIA Riva segmented STT service.
+    """Settings for NvidiaSegmentedSTTService.

    Parameters:
        profanity_filter: Whether to filter profanity from results.
--- a/src/pipecat/services/openai/stt.py
+++ b/src/pipecat/services/openai/stt.py
@@ -182,7 +182,7 @@ _OPENAI_SAMPLE_RATE = 24000

@dataclass
 class OpenAIRealtimeSTTSettings(STTSettings):
-    """Settings for the OpenAI Realtime STT service.
+    """Settings for OpenAIRealtimeSTTService.

    Parameters:
        prompt: Optional prompt text to guide transcription style.
--- a/src/pipecat/services/openai_realtime_beta/openai.py
+++ b/src/pipecat/services/openai_realtime_beta/openai.py
@@ -134,7 +134,7 @@ class OpenAIRealtimeBetaLLMService(LLMService):
            api_key: OpenAI API key for authentication.
            model: OpenAI model name.

-                .. deprecated::
+                .. deprecated:: 0.0.105
                    Use ``settings=OpenAIRealtimeBetaLLMSettings(model=...)`` instead.

            base_url: WebSocket base URL for the realtime API.
--- a/src/pipecat/services/sarvam/stt.py
+++ b/src/pipecat/services/sarvam/stt.py
@@ -139,7 +139,7 @@ MODEL_CONFIGS: Dict[str, ModelConfig] = {

@dataclass
 class SarvamSTTSettings(STTSettings):
-    """Settings for the Sarvam STT service.
+    """Settings for SarvamSTTService.

    Parameters:
        prompt: Optional prompt to guide transcription/translation style/context.
@@ -414,7 +414,7 @@ class SarvamSTTService(STTService):
    async def set_prompt(self, prompt: Optional[str]):
        """Set the transcription/translation prompt and reconnect.

-        .. deprecated::
+        .. deprecated:: 0.0.104
            Use ``STTUpdateSettingsFrame(SarvamSTTSettings(prompt=...))`` instead.

        Args:
--- a/src/pipecat/services/soniox/stt.py
+++ b/src/pipecat/services/soniox/stt.py
@@ -141,7 +141,7 @@ def _prepare_language_hints(

@dataclass
 class SonioxSTTSettings(STTSettings):
-    """Settings for Soniox STT service.
+    """Settings for SonioxSTTService.

    Parameters:
        language_hints: List of language hints to use for transcription.
--- a/src/pipecat/services/speechmatics/stt.py
+++ b/src/pipecat/services/speechmatics/stt.py
@@ -85,12 +85,11 @@ class TurnDetectionMode(str, Enum):

@dataclass
 class SpeechmaticsSTTSettings(STTSettings):
-    """Settings for Speechmatics STT service.
+    """Settings for SpeechmaticsSTTService.

    See ``SpeechmaticsSTTService.InputParams`` for detailed descriptions of each field.

    Parameters:
-        model: The operating point / model name.
        domain: Domain for Speechmatics API.
        turn_detection_mode: Endpoint handling mode.
        speaker_active_format: Formatter for active speaker ID.
@@ -490,16 +489,16 @@ class SpeechmaticsSTTService(STTService):
                default_settings.prefer_current_speaker = _params.prefer_current_speaker
                default_settings.extra_params = _params.extra_params

-        # Build SDK config from settings, then resolve model from operating_point
+        # --- 4. Settings delta (canonical API, always wins) ---
+        if settings is not None:
+            default_settings.apply_update(settings)
+
+        # Build SDK config from settings, set model name before calling super
        self._client: VoiceAgentClient | None = None
        self._audio_encoding = encoding
        self._config: VoiceAgentConfig = self._build_config(default_settings)
        default_settings.model = self._config.operating_point.value

-        # --- 4. Settings delta (canonical API, always wins) ---
-        if settings is not None:
-            default_settings.apply_update(settings)
-
        super().__init__(
            sample_rate=sample_rate,
            ttfs_p99_latency=ttfs_p99_latency,
--- a/src/pipecat/services/whisper/base_stt.py
+++ b/src/pipecat/services/whisper/base_stt.py
@@ -28,7 +28,7 @@ from pipecat.utils.tracing.service_decorators import traced_stt

@dataclass
 class BaseWhisperSTTSettings(STTSettings):
-    """Settings for Whisper API-based STT services.
+    """Settings for BaseWhisperSTTService.

    Parameters:
        prompt: Optional text to guide the model's style or continue
--- a/src/pipecat/services/whisper/stt.py
+++ b/src/pipecat/services/whisper/stt.py
@@ -176,7 +176,7 @@ def language_to_whisper_language(language: Language) -> Optional[str]:

@dataclass
 class WhisperSTTSettings(STTSettings):
-    """Settings for the local Whisper (Faster Whisper) STT service.
+    """Settings for WhisperSTTService.

    Parameters:
        no_speech_prob: Probability threshold for filtering non-speech segments.
@@ -187,7 +187,7 @@ class WhisperSTTSettings(STTSettings):

@dataclass
 class WhisperMLXSTTSettings(STTSettings):
-    """Settings for the MLX Whisper STT service.
+    """Settings for WhisperMLXSTTService.

    Parameters:
        no_speech_prob: Probability threshold for filtering non-speech segments.