Set vad_force_turn_endpoint to False in SonioxSTTService

2026-02-10 09:16:03 -05:00
parent d47d95e1f0
commit 88e981c013
3 changed files with 4 additions and 3 deletions
--- a/changelog/3697.changed.2.md
+++ b/changelog/3697.changed.2.md
@@ -0,0 +1 @@
+- Update `SonioxSTTService` to set `vad_force_turn_endpoint` to `True`. This setting disabled the turn detection logic available natively in Soniox. Instead, Soniox relies on a local VAD to finalize the transcript. This configuration meaningfully reduces the time to final segment for Soniox. With this setting enabled, Soniox outputs a transcript in ~250ms (median). Pipecat enables smart-turn detection by default using the `LocalSmartTurnAnalyzerV3`. To use the native turn detection logic in Soniox, just set `vad_force_turn_endpoint` to `False`.
--- a/examples/foundational/07za-interruptible-soniox.py
+++ b/examples/foundational/07za-interruptible-soniox.py
@@ -53,7 +53,6 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    stt = SonioxSTTService(
        api_key=os.getenv("SONIOX_API_KEY"),
-        vad_force_turn_endpoint=True,
        params=SonioxInputParams(
            language_hints=[Language.EN],
            language_hints_strict=True,
--- a/src/pipecat/services/soniox/stt.py
+++ b/src/pipecat/services/soniox/stt.py
@@ -152,7 +152,7 @@ class SonioxSTTService(WebsocketSTTService):
        url: str = "wss://stt-rt.soniox.com/transcribe-websocket",
        sample_rate: Optional[int] = None,
        params: Optional[SonioxInputParams] = None,
-        vad_force_turn_endpoint: bool = False,
+        vad_force_turn_endpoint: bool = True,
        ttfs_p99_latency: Optional[float] = SONIOX_TTFS_P99,
        **kwargs,
    ):
@@ -164,7 +164,8 @@ class SonioxSTTService(WebsocketSTTService):
            sample_rate: Audio sample rate.
            params: Additional configuration parameters, such as language hints, context and
                speaker diarization.
-            vad_force_turn_endpoint: Listen to `VADUserStoppedSpeakingFrame` to send finalize message to Soniox. If disabled, Soniox will detect the end of the speech.
+            vad_force_turn_endpoint: Listen to `VADUserStoppedSpeakingFrame` to send finalize message to Soniox.
+                If disabled, Soniox will detect the end of the speech. Defaults to True.
            ttfs_p99_latency: P99 latency from speech end to final transcript in seconds.
                Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark
            **kwargs: Additional arguments passed to the STTService.
				`@@ -0,0 +1 @@`
				- Update `SonioxSTTService` to set `vad_force_turn_endpoint` to `True`. This setting disabled the turn detection logic available natively in Soniox. Instead, Soniox relies on a local VAD to finalize the transcript. This configuration meaningfully reduces the time to final segment for Soniox. With this setting enabled, Soniox outputs a transcript in ~250ms (median). Pipecat enables smart-turn detection by default using the `LocalSmartTurnAnalyzerV3`. To use the native turn detection logic in Soniox, just set `vad_force_turn_endpoint` to `False`.