diff --git a/examples/update-settings/stt/stt-soniox.py b/examples/update-settings/stt/stt-soniox.py index adf3fb547..db30db29d 100644 --- a/examples/update-settings/stt/stt-soniox.py +++ b/examples/update-settings/stt/stt-soniox.py @@ -22,9 +22,9 @@ from pipecat.processors.aggregators.llm_response_universal import ( ) from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport -from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.soniox.stt import SonioxSTTService +from pipecat.services.soniox.tts import SonioxTTSService from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams @@ -53,12 +53,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): stt = SonioxSTTService(api_key=os.environ["SONIOX_API_KEY"]) - tts = CartesiaTTSService( - api_key=os.environ["CARTESIA_API_KEY"], - settings=CartesiaTTSService.Settings( - voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady - ), - ) + tts = SonioxTTSService(api_key=os.environ["SONIOX_API_KEY"]) llm = OpenAILLMService( api_key=os.environ["OPENAI_API_KEY"], @@ -103,9 +98,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await task.queue_frames([LLMRunFrame()]) await asyncio.sleep(10) - logger.info("Updating Soniox STT settings: language=es") + logger.info("Updating Soniox STT settings: language_hints=[es]") await task.queue_frame( - STTUpdateSettingsFrame(delta=SonioxSTTService.Settings(language=Language.ES)) + STTUpdateSettingsFrame(delta=SonioxSTTService.Settings(language_hints=[Language.ES])) ) @transport.event_handler("on_client_disconnected") diff --git a/src/pipecat/services/soniox/stt.py b/src/pipecat/services/soniox/stt.py index 823517aa1..49e44c8b3 100644 --- a/src/pipecat/services/soniox/stt.py +++ b/src/pipecat/services/soniox/stt.py @@ -155,7 +155,6 @@ def language_to_soniox_language(language: Language) -> str: Language.ID: "id", Language.IT: "it", Language.JA: "ja", - Language.KA: "ka", Language.KK: "kk", Language.KN: "kn", Language.KO: "ko", @@ -232,6 +231,7 @@ class SonioxSTTSettings(STTSettings): context_version 2. enable_speaker_diarization: Whether to enable speaker diarization. enable_language_identification: Whether to enable language identification. + max_endpoint_delay_ms: Max ms before endpoint detection finalizes the turn (500-3000). client_reference_id: Client reference ID to use for transcription. """ @@ -242,6 +242,7 @@ class SonioxSTTSettings(STTSettings): enable_language_identification: bool | None | _NotGiven = field( default_factory=lambda: NOT_GIVEN ) + max_endpoint_delay_ms: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) client_reference_id: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) @@ -309,6 +310,7 @@ class SonioxSTTService(WebsocketSTTService): context=None, enable_speaker_diarization=False, enable_language_identification=False, + max_endpoint_delay_ms=None, client_reference_id=None, ) @@ -390,8 +392,7 @@ class SonioxSTTService(WebsocketSTTService): changed = await super()._update_settings(delta) if changed: - await self._disconnect() - await self._connect() + await self._request_reconnect() return changed @@ -522,6 +523,7 @@ class SonioxSTTService(WebsocketSTTService): "audio_format": self._audio_format, "num_channels": self._num_channels, "enable_endpoint_detection": enable_endpoint_detection, + "max_endpoint_delay_ms": s.max_endpoint_delay_ms, "sample_rate": self.sample_rate, "language_hints": _prepare_language_hints(assert_given(s.language_hints)), "language_hints_strict": s.language_hints_strict,