Add max_endpoint_delay_ms and clean up Sonoix STT settings (#4521)

This commit is contained in:
mihafabcic-soniox
2026-05-20 23:54:48 +02:00
committed by GitHub
parent 105d6f27da
commit 86a5710801
2 changed files with 9 additions and 12 deletions

View File

@@ -22,9 +22,9 @@ from pipecat.processors.aggregators.llm_response_universal import (
)
from pipecat.runner.types import RunnerArguments
from pipecat.runner.utils import create_transport
from pipecat.services.cartesia.tts import CartesiaTTSService
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.services.soniox.stt import SonioxSTTService
from pipecat.services.soniox.tts import SonioxTTSService
from pipecat.transcriptions.language import Language
from pipecat.transports.base_transport import BaseTransport, TransportParams
from pipecat.transports.daily.transport import DailyParams
@@ -53,12 +53,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
stt = SonioxSTTService(api_key=os.environ["SONIOX_API_KEY"])
tts = CartesiaTTSService(
api_key=os.environ["CARTESIA_API_KEY"],
settings=CartesiaTTSService.Settings(
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
),
)
tts = SonioxTTSService(api_key=os.environ["SONIOX_API_KEY"])
llm = OpenAILLMService(
api_key=os.environ["OPENAI_API_KEY"],
@@ -103,9 +98,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
await task.queue_frames([LLMRunFrame()])
await asyncio.sleep(10)
logger.info("Updating Soniox STT settings: language=es")
logger.info("Updating Soniox STT settings: language_hints=[es]")
await task.queue_frame(
STTUpdateSettingsFrame(delta=SonioxSTTService.Settings(language=Language.ES))
STTUpdateSettingsFrame(delta=SonioxSTTService.Settings(language_hints=[Language.ES]))
)
@transport.event_handler("on_client_disconnected")

View File

@@ -155,7 +155,6 @@ def language_to_soniox_language(language: Language) -> str:
Language.ID: "id",
Language.IT: "it",
Language.JA: "ja",
Language.KA: "ka",
Language.KK: "kk",
Language.KN: "kn",
Language.KO: "ko",
@@ -232,6 +231,7 @@ class SonioxSTTSettings(STTSettings):
context_version 2.
enable_speaker_diarization: Whether to enable speaker diarization.
enable_language_identification: Whether to enable language identification.
max_endpoint_delay_ms: Max ms before endpoint detection finalizes the turn (500-3000).
client_reference_id: Client reference ID to use for transcription.
"""
@@ -242,6 +242,7 @@ class SonioxSTTSettings(STTSettings):
enable_language_identification: bool | None | _NotGiven = field(
default_factory=lambda: NOT_GIVEN
)
max_endpoint_delay_ms: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
client_reference_id: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
@@ -309,6 +310,7 @@ class SonioxSTTService(WebsocketSTTService):
context=None,
enable_speaker_diarization=False,
enable_language_identification=False,
max_endpoint_delay_ms=None,
client_reference_id=None,
)
@@ -390,8 +392,7 @@ class SonioxSTTService(WebsocketSTTService):
changed = await super()._update_settings(delta)
if changed:
await self._disconnect()
await self._connect()
await self._request_reconnect()
return changed
@@ -522,6 +523,7 @@ class SonioxSTTService(WebsocketSTTService):
"audio_format": self._audio_format,
"num_channels": self._num_channels,
"enable_endpoint_detection": enable_endpoint_detection,
"max_endpoint_delay_ms": s.max_endpoint_delay_ms,
"sample_rate": self.sample_rate,
"language_hints": _prepare_language_hints(assert_given(s.language_hints)),
"language_hints_strict": s.language_hints_strict,