From 8a4ab611bebc81091aedee94a8218de92083b092 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 11 Feb 2026 10:50:11 -0500 Subject: [PATCH 001/189] Broad service settings refactor, with the primary aim of making service settings discoverable and strongly-typed. Service settings can be updated at runtime with `*UpdateSettingsFrame`s. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Does not (yet) touch `InputParams`, to avoid scope creep and touching something currently part of the public API. But there is a lot of overlap between `*Settings` object fields and `InputParams` fields. Other than discoverability/typing, these are some other improvements brought by this refactor: - There is now a single code path (see `_update_settings_from_typed`) where services can respond to settings changes (by, say, reconnecting if needed), improving maintainability and guaranteeing one and only one reconnection no matter which settings changed - `set_language`/`set_model`/`set_voice`—which we're assuming are usable as public methods, though *not* recommended over `*UpdateSettingsFrame`—all use the same code path as settings updates. They're also now all consistent in that, if a service needs to respond to a change (by, say, reconnecting if needed), any of these methods will kick off that process. Note that this is technically a behavior change. - Several services now properly react to changed settings by reconnecting: - `AWSTranscribeSTTService` - `AzureSTTService` - `SonioxSTTService` - `GladiaSTTService` - `SpeechmaticsSTTService` - `AssemblyAISTTService` - `CartesiaSTTService` - `FishAudioTTSService` (would previously only reconnect when `model` changed) - `GoogleSTTService` - `SpeechmaticsSTTService` (which previously only handled *some* settings updates through a nonstandard public `update_params` method) - `GradiumSTTService` - `NvidiaSegmentedSTTService` (which previously only handled changes to language) - Bookkeeping across various services has been reduced, mostly by deduping ivars; the `self._settings` ivar is treated as the source of truth NOTE: I pretty much guarantee that there are services missed in this PR in terms of bringing to consistency with how updates are handled (like whether changes in certain fields trigger reconnects when they need to). We can squash remaining inconsistencies as we stumble onto them, service by service. The goal here is to get things *mostly* in order, and establish the infrastructure and patterns we'll need going forward. --- .claude/skills/cleanup/SKILL.md | 2 +- .../35-pattern-pair-voice-switching.py | 2 +- src/pipecat/frames/frames.py | 11 +- src/pipecat/services/ai_service.py | 41 ++- src/pipecat/services/anthropic/llm.py | 77 +++-- src/pipecat/services/assemblyai/stt.py | 60 +++- src/pipecat/services/asyncai/tts.py | 67 ++-- src/pipecat/services/aws/llm.py | 56 ++-- src/pipecat/services/aws/stt.py | 73 +++-- src/pipecat/services/aws/tts.py | 60 ++-- src/pipecat/services/azure/stt.py | 52 ++- src/pipecat/services/azure/tts.py | 87 +++-- src/pipecat/services/camb/tts.py | 33 +- src/pipecat/services/cartesia/stt.py | 41 ++- src/pipecat/services/cartesia/tts.py | 153 +++++---- src/pipecat/services/cerebras/llm.py | 10 +- src/pipecat/services/deepgram/stt.py | 85 +++-- .../services/deepgram/stt_sagemaker.py | 83 +++-- src/pipecat/services/deepgram/tts.py | 37 ++- src/pipecat/services/deepseek/llm.py | 12 +- src/pipecat/services/elevenlabs/stt.py | 203 +++++++----- src/pipecat/services/elevenlabs/tts.py | 272 +++++++++++----- src/pipecat/services/fal/stt.py | 60 ++-- src/pipecat/services/fireworks/llm.py | 12 +- src/pipecat/services/fish/tts.py | 81 +++-- src/pipecat/services/gladia/stt.py | 86 +++-- .../services/google/gemini_live/llm.py | 117 ++++--- src/pipecat/services/google/llm.py | 52 +-- src/pipecat/services/google/stt.py | 241 +++++++++----- src/pipecat/services/google/tts.py | 201 +++++++----- src/pipecat/services/gradium/stt.py | 46 ++- src/pipecat/services/gradium/tts.py | 47 +-- src/pipecat/services/grok/realtime/llm.py | 65 ++-- src/pipecat/services/groq/tts.py | 33 +- src/pipecat/services/hathora/stt.py | 35 +- src/pipecat/services/hathora/tts.py | 38 ++- src/pipecat/services/hume/tts.py | 4 +- src/pipecat/services/inworld/tts.py | 120 ++++--- src/pipecat/services/kokoro/tts.py | 19 ++ src/pipecat/services/llm_service.py | 13 + src/pipecat/services/lmnt/tts.py | 29 +- src/pipecat/services/minimax/tts.py | 118 +++++-- src/pipecat/services/mistral/llm.py | 16 +- src/pipecat/services/neuphonic/tts.py | 61 ++-- src/pipecat/services/nvidia/stt.py | 128 ++++---- src/pipecat/services/nvidia/tts.py | 2 +- src/pipecat/services/openai/base_llm.py | 62 ++-- src/pipecat/services/openai/realtime/llm.py | 60 +++- src/pipecat/services/openai/stt.py | 51 ++- src/pipecat/services/openai/tts.py | 44 ++- .../services/openai_realtime_beta/openai.py | 51 ++- src/pipecat/services/perplexity/llm.py | 20 +- src/pipecat/services/playht/tts.py | 88 +++-- src/pipecat/services/resembleai/tts.py | 38 ++- src/pipecat/services/rime/tts.py | 279 +++++++++------- src/pipecat/services/sambanova/llm.py | 10 +- src/pipecat/services/sarvam/stt.py | 145 ++++++--- src/pipecat/services/sarvam/tts.py | 187 ++++++++--- src/pipecat/services/settings.py | 297 +++++++++++++++++ src/pipecat/services/soniox/stt.py | 76 ++++- src/pipecat/services/speechmatics/stt.py | 245 +++++++++++--- src/pipecat/services/speechmatics/tts.py | 2 +- src/pipecat/services/stt_service.py | 52 ++- src/pipecat/services/tts_service.py | 72 +++- src/pipecat/services/ultravox/llm.py | 24 +- src/pipecat/services/whisper/base_stt.py | 61 ++-- src/pipecat/services/whisper/stt.py | 79 +++-- src/pipecat/services/xtts/tts.py | 32 +- tests/test_settings.py | 308 ++++++++++++++++++ 69 files changed, 3943 insertions(+), 1481 deletions(-) create mode 100644 src/pipecat/services/settings.py create mode 100644 tests/test_settings.py diff --git a/.claude/skills/cleanup/SKILL.md b/.claude/skills/cleanup/SKILL.md index f7dd6ea98..c0f4945b7 100644 --- a/.claude/skills/cleanup/SKILL.md +++ b/.claude/skills/cleanup/SKILL.md @@ -293,7 +293,7 @@ class NewTTSService(TTSService): """ super().__init__(**kwargs) self._api_key = api_key - self.set_voice(voice) + self._voice_id = voice ``` --- diff --git a/examples/foundational/35-pattern-pair-voice-switching.py b/examples/foundational/35-pattern-pair-voice-switching.py index 4b269ac3e..cacc04459 100644 --- a/examples/foundational/35-pattern-pair-voice-switching.py +++ b/examples/foundational/35-pattern-pair-voice-switching.py @@ -117,7 +117,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): # First flush any existing audio to finish the current context await tts.flush_audio() # Then set the new voice - tts.set_voice(VOICE_IDS[voice_name]) + await tts.set_voice(VOICE_IDS[voice_name]) logger.info(f"Switched to {voice_name} voice") else: logger.warning(f"Unknown voice: {voice_name}") diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index 5634d79ee..dd12929b9 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -42,6 +42,7 @@ from pipecat.utils.utils import obj_count, obj_id if TYPE_CHECKING: from pipecat.processors.aggregators.llm_context import LLMContext, NotGiven from pipecat.processors.frame_processor import FrameProcessor + from pipecat.services.settings import ServiceSettings class DeprecatedKeypadEntry: @@ -2112,13 +2113,17 @@ class TTSStoppedFrame(ControlFrame): class ServiceUpdateSettingsFrame(ControlFrame): """Base frame for updating service settings. - A control frame containing a request to update service settings. + Supports both the legacy ``settings`` dict and the new typed ``update`` + object. When both are provided, ``update`` takes precedence. Parameters: - settings: Dictionary of setting name to value mappings. + settings: Dictionary of setting name to value mappings (legacy). + update: Typed :class:`~pipecat.services.settings.ServiceSettings` + object describing the delta to apply. """ - settings: Mapping[str, Any] + settings: Mapping[str, Any] = field(default_factory=dict) + update: Optional["ServiceSettings"] = None @dataclass diff --git a/src/pipecat/services/ai_service.py b/src/pipecat/services/ai_service.py index c03ab9d0e..97b7b6443 100644 --- a/src/pipecat/services/ai_service.py +++ b/src/pipecat/services/ai_service.py @@ -10,7 +10,7 @@ Provides the foundation for all AI services in the Pipecat framework, including model management, settings handling, and frame processing lifecycle methods. """ -from typing import Any, AsyncGenerator, Dict, Mapping +from typing import Any, AsyncGenerator, Dict, Mapping, Set from loguru import logger @@ -23,6 +23,7 @@ from pipecat.frames.frames import ( ) from pipecat.metrics.metrics import MetricsData from pipecat.processors.frame_processor import FrameDirection, FrameProcessor +from pipecat.services.settings import ServiceSettings class AIService(FrameProcessor): @@ -42,7 +43,7 @@ class AIService(FrameProcessor): """ super().__init__(**kwargs) self._model_name: str = "" - self._settings: Dict[str, Any] = {} + self._settings: Dict[str, Any] | ServiceSettings = {} self._session_properties: Dict[str, Any] = {} @property @@ -135,6 +136,42 @@ class AIService(FrameProcessor): else: logger.warning(f"Unknown setting for {self.name} service: {key}") + async def _update_settings_from_typed(self, update: ServiceSettings) -> Set[str]: + """Apply a typed settings update and return the set of changed field names. + + If ``_settings`` is a :class:`ServiceSettings` object, the update is + applied to it and the changed-field set is returned. The ``model`` + field is handled specially: when it changes, ``set_model_name`` is + called. + + Services that have been migrated to typed settings should override + this method (calling ``super()``) to react to specific changed fields + (e.g. reconnect on voice change). + + Args: + update: A typed settings delta. + + Returns: + Set of field names whose values actually changed. + """ + if not isinstance(self._settings, ServiceSettings): + logger.warning( + f"{self.name}: received typed settings update but _settings " + f"is not a ServiceSettings — falling back to dict-based update" + ) + await self._update_settings(update.to_dict()) + return set() + + changed = self._settings.apply_update(update) + + if "model" in changed: + self.set_model_name(self._settings.model) + + if changed: + logger.info(f"{self.name}: updated settings fields: {changed}") + + return changed + async def process_frame(self, frame: Frame, direction: FrameDirection): """Process frames and handle service lifecycle. diff --git a/src/pipecat/services/anthropic/llm.py b/src/pipecat/services/anthropic/llm.py index a21296fe3..36ee104f5 100644 --- a/src/pipecat/services/anthropic/llm.py +++ b/src/pipecat/services/anthropic/llm.py @@ -16,8 +16,8 @@ import copy import io import json import re -from dataclasses import dataclass -from typing import Any, Dict, List, Literal, Optional, Union +from dataclasses import dataclass, field +from typing import Any, ClassVar, Dict, List, Literal, Optional, Union import httpx from loguru import logger @@ -42,7 +42,6 @@ from pipecat.frames.frames import ( LLMThoughtEndFrame, LLMThoughtStartFrame, LLMThoughtTextFrame, - LLMUpdateSettingsFrame, UserImageRawFrame, ) from pipecat.metrics.metrics import LLMTokenUsage @@ -59,6 +58,8 @@ from pipecat.processors.aggregators.openai_llm_context import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import FunctionCallFromLLM, LLMService +from pipecat.services.settings import NOT_GIVEN as _NOT_GIVEN +from pipecat.services.settings import LLMSettings from pipecat.utils.tracing.service_decorators import traced_llm try: @@ -69,6 +70,19 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class AnthropicLLMSettings(LLMSettings): + """Typed settings for Anthropic LLM services. + + Parameters: + enable_prompt_caching: Whether to enable prompt caching. + thinking: Extended thinking configuration. + """ + + enable_prompt_caching: Any = field(default_factory=lambda: _NOT_GIVEN) + thinking: Any = field(default_factory=lambda: _NOT_GIVEN) + + @dataclass class AnthropicContextAggregatorPair: """Pair of context aggregators for Anthropic conversations. @@ -210,9 +224,10 @@ class AnthropicLLMService(LLMService): self.set_model_name(model) self._retry_timeout_secs = retry_timeout_secs self._retry_on_timeout = retry_on_timeout - self._settings = { - "max_tokens": params.max_tokens, - "enable_prompt_caching": ( + self._settings = AnthropicLLMSettings( + model=model, + max_tokens=params.max_tokens, + enable_prompt_caching=( params.enable_prompt_caching if params.enable_prompt_caching is not None else ( @@ -221,12 +236,12 @@ class AnthropicLLMService(LLMService): else False ) ), - "temperature": params.temperature, - "top_k": params.top_k, - "top_p": params.top_p, - "thinking": params.thinking, - "extra": params.extra if isinstance(params.extra, dict) else {}, - } + temperature=params.temperature, + top_k=params.top_k, + top_p=params.top_p, + thinking=params.thinking, + extra=params.extra if isinstance(params.extra, dict) else {}, + ) def can_generate_metrics(self) -> bool: """Check if this service can generate usage metrics. @@ -280,7 +295,7 @@ class AnthropicLLMService(LLMService): if isinstance(context, LLMContext): adapter: AnthropicLLMAdapter = self.get_llm_adapter() invocation_params = adapter.get_llm_invocation_params( - context, enable_prompt_caching=self._settings["enable_prompt_caching"] + context, enable_prompt_caching=self._settings.enable_prompt_caching ) messages = invocation_params["messages"] system = invocation_params["system"] @@ -294,20 +309,20 @@ class AnthropicLLMService(LLMService): # Build params using the same method as streaming completions params = { "model": self.model_name, - "max_tokens": max_tokens if max_tokens is not None else self._settings["max_tokens"], + "max_tokens": max_tokens if max_tokens is not None else self._settings.max_tokens, "stream": False, - "temperature": self._settings["temperature"], - "top_k": self._settings["top_k"], - "top_p": self._settings["top_p"], + "temperature": self._settings.temperature, + "top_k": self._settings.top_k, + "top_p": self._settings.top_p, "messages": messages, "system": system, "tools": tools, "betas": ["interleaved-thinking-2025-05-14"], } - if self._settings["thinking"]: - params["thinking"] = self._settings["thinking"].model_dump(exclude_unset=True) + if self._settings.thinking: + params["thinking"] = self._settings.thinking.model_dump(exclude_unset=True) - params.update(self._settings["extra"]) + params.update(self._settings.extra) # LLM completion response = await self._client.beta.messages.create(**params) @@ -358,14 +373,14 @@ class AnthropicLLMService(LLMService): if isinstance(context, LLMContext): adapter: AnthropicLLMAdapter = self.get_llm_adapter() params = adapter.get_llm_invocation_params( - context, enable_prompt_caching=self._settings["enable_prompt_caching"] + context, enable_prompt_caching=self._settings.enable_prompt_caching ) return params # Anthropic-specific context messages = ( context.get_messages_with_cache_control_markers() - if self._settings["enable_prompt_caching"] + if self._settings.enable_prompt_caching else context.messages ) return AnthropicLLMInvocationParams( @@ -408,21 +423,21 @@ class AnthropicLLMService(LLMService): params = { "model": self.model_name, - "max_tokens": self._settings["max_tokens"], + "max_tokens": self._settings.max_tokens, "stream": True, - "temperature": self._settings["temperature"], - "top_k": self._settings["top_k"], - "top_p": self._settings["top_p"], + "temperature": self._settings.temperature, + "top_k": self._settings.top_k, + "top_p": self._settings.top_p, } # Add thinking parameter if set - if self._settings["thinking"]: - params["thinking"] = self._settings["thinking"].model_dump(exclude_unset=True) + if self._settings.thinking: + params["thinking"] = self._settings.thinking.model_dump(exclude_unset=True) # Messages, system, tools params.update(params_from_context) - params.update(self._settings["extra"]) + params.update(self._settings.extra) # "Interleaved thinking" needed to allow thinking between sequences # of function calls, when extended thinking is enabled. @@ -576,11 +591,9 @@ class AnthropicLLMService(LLMService): # NOTE: LLMMessagesFrame is deprecated, so we don't support the newer universal # LLMContext with it context = AnthropicLLMContext.from_messages(frame.messages) - elif isinstance(frame, LLMUpdateSettingsFrame): - await self._update_settings(frame.settings) elif isinstance(frame, LLMEnablePromptCachingFrame): logger.debug(f"Setting enable prompt caching to: [{frame.enable}]") - self._settings["enable_prompt_caching"] = frame.enable + self._settings.enable_prompt_caching = frame.enable else: await self.push_frame(frame, direction) diff --git a/src/pipecat/services/assemblyai/stt.py b/src/pipecat/services/assemblyai/stt.py index 41a0ae2a0..278873fdf 100644 --- a/src/pipecat/services/assemblyai/stt.py +++ b/src/pipecat/services/assemblyai/stt.py @@ -12,6 +12,7 @@ WebSocket API for streaming audio transcription. import asyncio import json +from dataclasses import dataclass, field from typing import Any, AsyncGenerator, Dict, Optional from urllib.parse import urlencode @@ -29,6 +30,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, STTSettings from pipecat.services.stt_latency import ASSEMBLYAI_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.transcriptions.language import Language @@ -52,6 +54,19 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class AssemblyAISTTSettings(STTSettings): + """Typed settings for the AssemblyAI STT service. + + See :class:`AssemblyAIConnectionParams` for detailed parameter descriptions. + + Parameters: + connection_params: Connection configuration parameters. + """ + + connection_params: AssemblyAIConnectionParams = field(default_factory=lambda: NOT_GIVEN) + + class AssemblyAISTTService(WebsocketSTTService): """AssemblyAI real-time speech-to-text service. @@ -96,9 +111,11 @@ class AssemblyAISTTService(WebsocketSTTService): ) self._api_key = api_key - self._language = language + self._settings: AssemblyAISTTSettings = AssemblyAISTTSettings( + language=language, + connection_params=connection_params, + ) self._api_endpoint_base_url = api_endpoint_base_url - self._connection_params = connection_params self._vad_force_turn_endpoint = vad_force_turn_endpoint self._termination_event = asyncio.Event() @@ -165,6 +182,35 @@ class AssemblyAISTTService(WebsocketSTTService): """ return True + async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: + """Apply a typed settings update and reconnect if anything changed. + + Any change triggers a WebSocket reconnect since all connection + parameters are encoded in the WebSocket URL. + + Args: + update: A :class:`STTSettings` (or ``AssemblyAISTTSettings``) delta. + + Returns: + Set of field names whose values actually changed. + """ + changed = await super()._update_settings_from_typed(update) + + if not changed: + return changed + + # Re-apply manual turn mode config if vad_force_turn_endpoint is active + # and connection_params were updated. + if self._vad_force_turn_endpoint and "connection_params" in changed: + self._settings.connection_params = self._configure_manual_turn_mode( + self._settings.connection_params + ) + + await self._disconnect() + await self._connect() + + return changed + async def start(self, frame: StartFrame): """Start the speech-to-text service. @@ -239,7 +285,7 @@ class AssemblyAISTTService(WebsocketSTTService): def _build_ws_url(self) -> str: """Build WebSocket URL with query parameters using urllib.parse.urlencode.""" params = {} - for k, v in self._connection_params.model_dump().items(): + for k, v in self._settings.connection_params.model_dump().items(): if v is not None: if k == "keyterms_prompt": params[k] = json.dumps(v) @@ -415,18 +461,18 @@ class AssemblyAISTTService(WebsocketSTTService): if not message.transcript: return if message.end_of_turn and ( - not self._connection_params.formatted_finals or message.turn_is_formatted + not self._settings.connection_params.formatted_finals or message.turn_is_formatted ): await self.push_frame( TranscriptionFrame( message.transcript, self._user_id, time_now_iso8601(), - self._language, + self._settings.language, message, ) ) - await self._trace_transcription(message.transcript, True, self._language) + await self._trace_transcription(message.transcript, True, self._settings.language) await self.stop_processing_metrics() else: await self.push_frame( @@ -434,7 +480,7 @@ class AssemblyAISTTService(WebsocketSTTService): message.transcript, self._user_id, time_now_iso8601(), - self._language, + self._settings.language, message, ) ) diff --git a/src/pipecat/services/asyncai/tts.py b/src/pipecat/services/asyncai/tts.py index 4ff6c928d..aecf69a26 100644 --- a/src/pipecat/services/asyncai/tts.py +++ b/src/pipecat/services/asyncai/tts.py @@ -9,6 +9,7 @@ import asyncio import base64 import json +from dataclasses import dataclass, field from typing import AsyncGenerator, Optional import aiohttp @@ -27,6 +28,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, TTSSettings from pipecat.services.tts_service import AudioContextTTSService, TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -72,6 +74,21 @@ def language_to_async_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=True) +@dataclass +class AsyncAITTSSettings(TTSSettings): + """Typed settings for Async AI TTS services. + + Parameters: + output_container: Audio container format (e.g. "raw"). + output_encoding: Audio encoding format (e.g. "pcm_s16le"). + output_sample_rate: Audio sample rate in Hz. + """ + + output_container: str = field(default_factory=lambda: NOT_GIVEN) + output_encoding: str = field(default_factory=lambda: NOT_GIVEN) + output_sample_rate: int = field(default_factory=lambda: NOT_GIVEN) + + class AsyncAITTSService(AudioContextTTSService): """Async TTS service with WebSocket streaming. @@ -131,19 +148,21 @@ class AsyncAITTSService(AudioContextTTSService): self._api_key = api_key self._api_version = version self._url = url - self._settings = { - "output_format": { + self._settings: AsyncAITTSSettings = AsyncAITTSSettings( + model=model, + voice=voice_id, + output_format={ "container": container, "encoding": encoding, "sample_rate": 0, }, - "language": self.language_to_service_language(params.language) + language=self.language_to_service_language(params.language) if params.language else None, - } + ) self.set_model_name(model) - self.set_voice(voice_id) + self._voice_id = voice_id self._receive_task = None self._keepalive_task = None @@ -179,7 +198,7 @@ class AsyncAITTSService(AudioContextTTSService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings["output_format"]["sample_rate"] = self.sample_rate + self._settings.output_sample_rate = self.sample_rate await self._connect() async def stop(self, frame: EndFrame): @@ -235,8 +254,12 @@ class AsyncAITTSService(AudioContextTTSService): init_msg = { "model_id": self._model_name, "voice": {"mode": "id", "id": self._voice_id}, - "output_format": self._settings["output_format"], - "language": self._settings["language"], + "output_format": { + "container": self._settings.output_container, + "encoding": self._settings.output_encoding, + "sample_rate": self._settings.output_sample_rate, + }, + "language": self._settings.language, } await self._get_websocket().send(json.dumps(init_msg)) @@ -454,17 +477,17 @@ class AsyncAIHttpTTSService(TTSService): self._api_key = api_key self._base_url = url self._api_version = version - self._settings = { - "output_format": { - "container": container, - "encoding": encoding, - "sample_rate": 0, - }, - "language": self.language_to_service_language(params.language) + self._settings: AsyncAITTSSettings = AsyncAITTSSettings( + model=model, + voice=voice_id, + output_container=container, + output_encoding=encoding, + output_sample_rate=0, + language=self.language_to_service_language(params.language) if params.language else None, - } - self.set_voice(voice_id) + ) + self._voice_id = voice_id self.set_model_name(model) self._session = aiohttp_session @@ -495,7 +518,7 @@ class AsyncAIHttpTTSService(TTSService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings["output_format"]["sample_rate"] = self.sample_rate + self._settings.output_sample_rate = self.sample_rate @traced_tts async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: @@ -517,8 +540,12 @@ class AsyncAIHttpTTSService(TTSService): "model_id": self._model_name, "transcript": text, "voice": voice_config, - "output_format": self._settings["output_format"], - "language": self._settings["language"], + "output_format": { + "container": self._settings.output_container, + "encoding": self._settings.output_encoding, + "sample_rate": self._settings.output_sample_rate, + }, + "language": self._settings.language, } yield TTSStartedFrame(context_id=context_id) headers = { diff --git a/src/pipecat/services/aws/llm.py b/src/pipecat/services/aws/llm.py index 1778ae74e..032cee060 100644 --- a/src/pipecat/services/aws/llm.py +++ b/src/pipecat/services/aws/llm.py @@ -18,8 +18,8 @@ import io import json import os import re -from dataclasses import dataclass -from typing import Any, Dict, List, Optional +from dataclasses import dataclass, field +from typing import Any, ClassVar, Dict, List, Optional from loguru import logger from PIL import Image @@ -40,7 +40,6 @@ from pipecat.frames.frames import ( LLMFullResponseStartFrame, LLMMessagesFrame, LLMTextFrame, - LLMUpdateSettingsFrame, UserImageRawFrame, ) from pipecat.metrics.metrics import LLMTokenUsage @@ -57,6 +56,7 @@ from pipecat.processors.aggregators.openai_llm_context import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import LLMService +from pipecat.services.settings import NOT_GIVEN, LLMSettings from pipecat.utils.tracing.service_decorators import traced_llm try: @@ -71,6 +71,19 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class AWSBedrockLLMSettings(LLMSettings): + """Typed settings for AWS Bedrock LLM services. + + Parameters: + latency: Performance mode - "standard" or "optimized". + additional_model_request_fields: Additional model-specific parameters. + """ + + latency: Any = field(default_factory=lambda: NOT_GIVEN) + additional_model_request_fields: Any = field(default_factory=lambda: NOT_GIVEN) + + @dataclass class AWSBedrockContextAggregatorPair: """Container for AWS Bedrock context aggregators. @@ -806,15 +819,16 @@ class AWSBedrockLLMService(LLMService): self.set_model_name(model) self._retry_timeout_secs = retry_timeout_secs self._retry_on_timeout = retry_on_timeout - self._settings = { - "max_tokens": params.max_tokens, - "temperature": params.temperature, - "top_p": params.top_p, - "latency": params.latency, - "additional_model_request_fields": params.additional_model_request_fields + self._settings = AWSBedrockLLMSettings( + model=model, + max_tokens=params.max_tokens, + temperature=params.temperature, + top_p=params.top_p, + latency=params.latency, + additional_model_request_fields=params.additional_model_request_fields if isinstance(params.additional_model_request_fields, dict) else {}, - } + ) logger.info(f"Using AWS Bedrock model: {model}") @@ -836,12 +850,12 @@ class AWSBedrockLLMService(LLMService): Dictionary containing only the inference parameters that are not None. """ inference_config = {} - if self._settings["max_tokens"] is not None: - inference_config["maxTokens"] = self._settings["max_tokens"] - if self._settings["temperature"] is not None: - inference_config["temperature"] = self._settings["temperature"] - if self._settings["top_p"] is not None: - inference_config["topP"] = self._settings["top_p"] + if self._settings.max_tokens is not None: + inference_config["maxTokens"] = self._settings.max_tokens + if self._settings.temperature is not None: + inference_config["temperature"] = self._settings.temperature + if self._settings.top_p is not None: + inference_config["topP"] = self._settings.top_p return inference_config async def run_inference( @@ -879,7 +893,7 @@ class AWSBedrockLLMService(LLMService): request_params = { "modelId": self.model_name, "messages": messages, - "additionalModelRequestFields": self._settings["additional_model_request_fields"], + "additionalModelRequestFields": self._settings.additional_model_request_fields, } if inference_config: @@ -1036,7 +1050,7 @@ class AWSBedrockLLMService(LLMService): request_params = { "modelId": self.model_name, "messages": messages, - "additionalModelRequestFields": self._settings["additional_model_request_fields"], + "additionalModelRequestFields": self._settings.additional_model_request_fields, } # Only add inference config if it has parameters @@ -1081,8 +1095,8 @@ class AWSBedrockLLMService(LLMService): request_params["toolConfig"] = tool_config # Add performance config if latency is specified - if self._settings["latency"] in ["standard", "optimized"]: - request_params["performanceConfig"] = {"latency": self._settings["latency"]} + if self._settings.latency in ["standard", "optimized"]: + request_params["performanceConfig"] = {"latency": self._settings.latency} # Log request params with messages redacted for logging if isinstance(context, LLMContext): @@ -1207,8 +1221,6 @@ class AWSBedrockLLMService(LLMService): # NOTE: LLMMessagesFrame is deprecated, so we don't support the newer universal # LLMContext with it context = AWSBedrockLLMContext.from_messages(frame.messages) - elif isinstance(frame, LLMUpdateSettingsFrame): - await self._update_settings(frame.settings) else: await self.push_frame(frame, direction) diff --git a/src/pipecat/services/aws/stt.py b/src/pipecat/services/aws/stt.py index f78bc4d4b..cb52da12a 100644 --- a/src/pipecat/services/aws/stt.py +++ b/src/pipecat/services/aws/stt.py @@ -14,6 +14,7 @@ import json import os import random import string +from dataclasses import dataclass, field from typing import AsyncGenerator, Optional from loguru import logger @@ -28,6 +29,7 @@ from pipecat.frames.frames import ( TranscriptionFrame, ) from pipecat.services.aws.utils import build_event_message, decode_event, get_presigned_url +from pipecat.services.settings import NOT_GIVEN, STTSettings from pipecat.services.stt_latency import AWS_TRANSCRIBE_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.transcriptions.language import Language, resolve_language @@ -43,6 +45,25 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class AWSTranscribeSTTSettings(STTSettings): + """Typed settings for the AWS Transcribe STT service. + + Parameters: + sample_rate: Audio sample rate in Hz (8000 or 16000). + media_encoding: Audio encoding format (e.g. "linear16"). + number_of_channels: Number of audio channels. + show_speaker_label: Whether to show speaker labels. + enable_channel_identification: Whether to enable channel identification. + """ + + sample_rate: int = field(default_factory=lambda: NOT_GIVEN) + media_encoding: str = field(default_factory=lambda: NOT_GIVEN) + number_of_channels: int = field(default_factory=lambda: NOT_GIVEN) + show_speaker_label: bool = field(default_factory=lambda: NOT_GIVEN) + enable_channel_identification: bool = field(default_factory=lambda: NOT_GIVEN) + + class AWSTranscribeSTTService(WebsocketSTTService): """AWS Transcribe Speech-to-Text service using WebSocket streaming. @@ -78,21 +99,21 @@ class AWSTranscribeSTTService(WebsocketSTTService): """ super().__init__(ttfs_p99_latency=ttfs_p99_latency, **kwargs) - self._settings = { - "sample_rate": sample_rate, - "language": language, - "media_encoding": "linear16", # AWS expects raw PCM - "number_of_channels": 1, - "show_speaker_label": False, - "enable_channel_identification": False, - } + self._settings: AWSTranscribeSTTSettings = AWSTranscribeSTTSettings( + language=language, + sample_rate=sample_rate, + media_encoding="linear16", + number_of_channels=1, + show_speaker_label=False, + enable_channel_identification=False, + ) # Validate sample rate - AWS Transcribe only supports 8000 Hz or 16000 Hz if sample_rate not in [8000, 16000]: logger.warning( f"AWS Transcribe only supports 8000 Hz or 16000 Hz sample rates. Converting from {sample_rate} Hz to 16000 Hz." ) - self._settings["sample_rate"] = 16000 + self._settings.sample_rate = 16000 self._credentials = { "aws_access_key_id": aws_access_key_id or os.getenv("AWS_ACCESS_KEY_ID"), @@ -117,6 +138,20 @@ class AWSTranscribeSTTService(WebsocketSTTService): } return encoding_map.get(encoding, encoding) + async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: + """Apply a typed settings update, reconnecting if needed. + + Any change to connection-relevant settings (model, language, etc.) + triggers a WebSocket reconnect so the new configuration takes effect. + """ + changed = await super()._update_settings_from_typed(update) + + if changed and self._websocket: + await self._disconnect() + await self._connect() + + return changed + async def start(self, frame: StartFrame): """Initialize the connection when the service starts. @@ -208,9 +243,9 @@ class AWSTranscribeSTTService(WebsocketSTTService): logger.debug("Connecting to AWS Transcribe WebSocket") - language_code = self.language_to_service_language(Language(self._settings["language"])) + language_code = self.language_to_service_language(Language(self._settings.language)) if not language_code: - raise ValueError(f"Unsupported language: {self._settings['language']}") + raise ValueError(f"Unsupported language: {self._settings.language}") # Generate random websocket key websocket_key = "".join( @@ -237,14 +272,14 @@ class AWSTranscribeSTTService(WebsocketSTTService): }, language_code=language_code, media_encoding=self.get_service_encoding( - self._settings["media_encoding"] + self._settings.media_encoding ), # Convert to AWS format - sample_rate=self._settings["sample_rate"], - number_of_channels=self._settings["number_of_channels"], + sample_rate=self._settings.sample_rate, + number_of_channels=self._settings.number_of_channels, enable_partial_results_stabilization=True, partial_results_stability="high", - show_speaker_label=self._settings["show_speaker_label"], - enable_channel_identification=self._settings["enable_channel_identification"], + show_speaker_label=self._settings.show_speaker_label, + enable_channel_identification=self._settings.enable_channel_identification, ) logger.debug(f"{self} Connecting to WebSocket with URL: {presigned_url[:100]}...") @@ -479,14 +514,14 @@ class AWSTranscribeSTTService(WebsocketSTTService): transcript, self._user_id, time_now_iso8601(), - self._settings["language"], + self._settings.language, result=result, ) ) await self._handle_transcription( transcript, is_final, - self._settings["language"], + self._settings.language, ) await self.stop_processing_metrics() else: @@ -495,7 +530,7 @@ class AWSTranscribeSTTService(WebsocketSTTService): transcript, self._user_id, time_now_iso8601(), - self._settings["language"], + self._settings.language, result=result, ) ) diff --git a/src/pipecat/services/aws/tts.py b/src/pipecat/services/aws/tts.py index b902564d2..5086b1469 100644 --- a/src/pipecat/services/aws/tts.py +++ b/src/pipecat/services/aws/tts.py @@ -11,6 +11,7 @@ supporting multiple languages, voices, and SSML features. """ import os +from dataclasses import dataclass, field from typing import AsyncGenerator, List, Optional from loguru import logger @@ -24,6 +25,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) +from pipecat.services.settings import NOT_GIVEN, TTSSettings from pipecat.services.tts_service import TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -121,6 +123,25 @@ def language_to_aws_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=False) +@dataclass +class AWSPollyTTSSettings(TTSSettings): + """Typed settings for AWS Polly TTS service. + + Parameters: + engine: TTS engine to use ('standard', 'neural', etc.). + pitch: Voice pitch adjustment (for standard engine only). + rate: Speech rate adjustment. + volume: Voice volume adjustment. + lexicon_names: List of pronunciation lexicons to apply. + """ + + engine: str = field(default_factory=lambda: NOT_GIVEN) + pitch: str = field(default_factory=lambda: NOT_GIVEN) + rate: str = field(default_factory=lambda: NOT_GIVEN) + volume: str = field(default_factory=lambda: NOT_GIVEN) + lexicon_names: List[str] = field(default_factory=lambda: NOT_GIVEN) + + class AWSPollyTTSService(TTSService): """AWS Polly text-to-speech service. @@ -185,20 +206,21 @@ class AWSPollyTTSService(TTSService): } self._aws_session = aioboto3.Session() - self._settings = { - "engine": params.engine, - "language": self.language_to_service_language(params.language) + self._settings: AWSPollyTTSSettings = AWSPollyTTSSettings( + voice=voice_id, + engine=params.engine, + language=self.language_to_service_language(params.language) if params.language else "en-US", - "pitch": params.pitch, - "rate": params.rate, - "volume": params.volume, - "lexicon_names": params.lexicon_names, - } + pitch=params.pitch, + rate=params.rate, + volume=params.volume, + lexicon_names=params.lexicon_names, + ) self._resampler = create_stream_resampler() - self.set_voice(voice_id) + self._voice_id = voice_id def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -222,19 +244,19 @@ class AWSPollyTTSService(TTSService): def _construct_ssml(self, text: str) -> str: ssml = "" - language = self._settings["language"] + language = self._settings.language ssml += f"" prosody_attrs = [] # Prosody tags are only supported for standard and neural engines - if self._settings["engine"] == "standard": - if self._settings["pitch"]: - prosody_attrs.append(f"pitch='{self._settings['pitch']}'") + if self._settings.engine == "standard": + if self._settings.pitch: + prosody_attrs.append(f"pitch='{self._settings.pitch}'") - if self._settings["rate"]: - prosody_attrs.append(f"rate='{self._settings['rate']}'") - if self._settings["volume"]: - prosody_attrs.append(f"volume='{self._settings['volume']}'") + if self._settings.rate: + prosody_attrs.append(f"rate='{self._settings.rate}'") + if self._settings.volume: + prosody_attrs.append(f"volume='{self._settings.volume}'") if prosody_attrs: ssml += f"" @@ -276,10 +298,10 @@ class AWSPollyTTSService(TTSService): "TextType": "ssml", "OutputFormat": "pcm", "VoiceId": self._voice_id, - "Engine": self._settings["engine"], + "Engine": self._settings.engine, # AWS only supports 8000 and 16000 for PCM. We select 16000. "SampleRate": "16000", - "LexiconNames": self._settings["lexicon_names"], + "LexiconNames": self._settings.lexicon_names, } # Filter out None values diff --git a/src/pipecat/services/azure/stt.py b/src/pipecat/services/azure/stt.py index 1bc7ec70a..bf3f70653 100644 --- a/src/pipecat/services/azure/stt.py +++ b/src/pipecat/services/azure/stt.py @@ -11,6 +11,7 @@ Speech SDK for real-time audio transcription. """ import asyncio +from dataclasses import dataclass, field from typing import AsyncGenerator, Optional from loguru import logger @@ -25,6 +26,7 @@ from pipecat.frames.frames import ( TranscriptionFrame, ) from pipecat.services.azure.common import language_to_azure_language +from pipecat.services.settings import NOT_GIVEN, STTSettings from pipecat.services.stt_latency import AZURE_TTFS_P99 from pipecat.services.stt_service import STTService from pipecat.transcriptions.language import Language @@ -48,6 +50,19 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class AzureSTTSettings(STTSettings): + """Typed settings for the Azure STT service. + + Parameters: + region: Azure region for the Speech service. + sample_rate: Audio sample rate in Hz. + """ + + region: str = field(default_factory=lambda: NOT_GIVEN) + sample_rate: Optional[int] = field(default_factory=lambda: NOT_GIVEN) + + class AzureSTTService(STTService): """Azure Speech-to-Text service for real-time audio transcription. @@ -92,11 +107,11 @@ class AzureSTTService(STTService): self._audio_stream = None self._speech_recognizer = None - self._settings = { - "region": region, - "language": language_to_azure_language(language), - "sample_rate": sample_rate, - } + self._settings: AzureSTTSettings = AzureSTTSettings( + region=region, + language=language_to_azure_language(language), + sample_rate=sample_rate, + ) def can_generate_metrics(self) -> bool: """Check if this service can generate performance metrics. @@ -106,6 +121,29 @@ class AzureSTTService(STTService): """ return True + async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: + """Apply a typed settings update, reconfiguring the recognizer if needed. + + When ``language`` changes the ``SpeechConfig`` is updated and the + speech recognizer is restarted so that the new language takes effect. + """ + changed = await super()._update_settings_from_typed(update) + + if "language" in changed: + # Convert Language enum to Azure language code if needed. + lang = self._settings.language + if isinstance(lang, Language): + lang = language_to_azure_language(lang) + self._settings.language = lang + self._speech_config.speech_recognition_language = lang + + # Restart the recognizer with the new config. + if self._speech_recognizer: + self._speech_recognizer.stop_continuous_recognition_async() + self._speech_recognizer.start_continuous_recognition_async() + + return changed + async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]: """Process audio data for speech-to-text conversion. @@ -198,7 +236,7 @@ class AzureSTTService(STTService): def _on_handle_recognized(self, event): if event.result.reason == ResultReason.RecognizedSpeech and len(event.result.text) > 0: - language = getattr(event.result, "language", None) or self._settings.get("language") + language = getattr(event.result, "language", None) or self._settings.language frame = TranscriptionFrame( event.result.text, self._user_id, @@ -213,7 +251,7 @@ class AzureSTTService(STTService): def _on_handle_recognizing(self, event): if event.result.reason == ResultReason.RecognizingSpeech and len(event.result.text) > 0: - language = getattr(event.result, "language", None) or self._settings.get("language") + language = getattr(event.result, "language", None) or self._settings.language frame = InterimTranscriptionFrame( event.result.text, self._user_id, diff --git a/src/pipecat/services/azure/tts.py b/src/pipecat/services/azure/tts.py index 7d4aa0253..04b51d10b 100644 --- a/src/pipecat/services/azure/tts.py +++ b/src/pipecat/services/azure/tts.py @@ -7,6 +7,7 @@ """Azure Cognitive Services Text-to-Speech service implementations.""" import asyncio +from dataclasses import dataclass, field from typing import AsyncGenerator, Optional from loguru import logger @@ -25,6 +26,7 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.azure.common import language_to_azure_language +from pipecat.services.settings import NOT_GIVEN, TTSSettings from pipecat.services.tts_service import TTSService, WordTTSService from pipecat.transcriptions.language import Language from pipecat.utils.tracing.service_decorators import traced_tts @@ -65,6 +67,31 @@ def sample_rate_to_output_format(sample_rate: int) -> SpeechSynthesisOutputForma return sample_rate_map.get(sample_rate, SpeechSynthesisOutputFormat.Raw24Khz16BitMonoPcm) +@dataclass +class AzureTTSSettings(TTSSettings): + """Typed settings for Azure TTS services. + + Parameters: + emphasis: Emphasis level for speech ("strong", "moderate", "reduced"). + language: Language for synthesis. Defaults to English (US). + pitch: Voice pitch adjustment (e.g., "+10%", "-5Hz", "high"). + rate: Speech rate adjustment (e.g., "1.0", "1.25", "slow", "fast"). + role: Voice role for expression (e.g., "YoungAdultFemale"). + style: Speaking style (e.g., "cheerful", "sad", "excited"). + style_degree: Intensity of the speaking style (0.01 to 2.0). + volume: Volume level (e.g., "+20%", "loud", "x-soft"). + """ + + emphasis: str = field(default_factory=lambda: NOT_GIVEN) + language: str = field(default_factory=lambda: NOT_GIVEN) + pitch: str = field(default_factory=lambda: NOT_GIVEN) + rate: str = field(default_factory=lambda: NOT_GIVEN) + role: str = field(default_factory=lambda: NOT_GIVEN) + style: str = field(default_factory=lambda: NOT_GIVEN) + style_degree: str = field(default_factory=lambda: NOT_GIVEN) + volume: str = field(default_factory=lambda: NOT_GIVEN) + + class AzureBaseTTSService: """Base mixin class for Azure Cognitive Services text-to-speech implementations. @@ -126,18 +153,18 @@ class AzureBaseTTSService: """ params = params or AzureBaseTTSService.InputParams() - self._settings = { - "emphasis": params.emphasis, - "language": self.language_to_service_language(params.language) + self._settings: AzureTTSSettings = AzureTTSSettings( + emphasis=params.emphasis, + language=self.language_to_service_language(params.language) if params.language else "en-US", - "pitch": params.pitch, - "rate": params.rate, - "role": params.role, - "style": params.style, - "style_degree": params.style_degree, - "volume": params.volume, - } + pitch=params.pitch, + rate=params.rate, + role=params.role, + style=params.style, + style_degree=params.style_degree, + volume=params.volume, + ) self._api_key = api_key self._region = region @@ -156,7 +183,7 @@ class AzureBaseTTSService: return language_to_azure_language(language) def _construct_ssml(self, text: str) -> str: - language = self._settings["language"] + language = self._settings.language # Escape special characters escaped_text = self._escape_text(text) @@ -169,38 +196,38 @@ class AzureBaseTTSService: "" ) - if self._settings["style"]: - ssml += f"" - if self._settings["emphasis"]: - ssml += f"" + if self._settings.emphasis: + ssml += f"" ssml += escaped_text - if self._settings["emphasis"]: + if self._settings.emphasis: ssml += "" if prosody_attrs: ssml += "" - if self._settings["style"]: + if self._settings.style: ssml += "" ssml += "" @@ -314,7 +341,7 @@ class AzureTTSService(WordTTSService, AzureBaseTTSService): subscription=self._api_key, region=self._region, ) - self._speech_config.speech_synthesis_language = self._settings["language"] + self._speech_config.speech_synthesis_language = self._settings.language self._speech_config.set_speech_synthesis_output_format( sample_rate_to_output_format(self.sample_rate) ) @@ -364,7 +391,7 @@ class AzureTTSService(WordTTSService, AzureBaseTTSService): Returns: True if the language is CJK, False otherwise. """ - language = self._settings.get("language", "").lower() + language = (self._settings.language if self._settings.language else "").lower() # Check if language starts with CJK language codes return language.startswith(("zh", "ja", "ko", "cmn", "yue", "wuu")) @@ -735,7 +762,7 @@ class AzureHttpTTSService(TTSService, AzureBaseTTSService): subscription=self._api_key, region=self._region, ) - self._speech_config.speech_synthesis_language = self._settings["language"] + self._speech_config.speech_synthesis_language = self._settings.language self._speech_config.set_speech_synthesis_output_format( sample_rate_to_output_format(self.sample_rate) ) diff --git a/src/pipecat/services/camb/tts.py b/src/pipecat/services/camb/tts.py index def57d3a0..8a6f67231 100644 --- a/src/pipecat/services/camb/tts.py +++ b/src/pipecat/services/camb/tts.py @@ -16,7 +16,8 @@ Features: - Model-specific sample rates: mars-pro (48kHz), mars-flash (22.05kHz) """ -from typing import Any, AsyncGenerator, Dict, Optional +from dataclasses import dataclass, field +from typing import AsyncGenerator, Dict, Optional from camb import StreamTtsOutputConfiguration from camb.client import AsyncCambAI @@ -31,6 +32,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) +from pipecat.services.settings import NOT_GIVEN, TTSSettings from pipecat.services.tts_service import TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -133,6 +135,18 @@ def _get_aligned_audio(buffer: bytes) -> tuple[bytes, bytes]: return buffer[:aligned_size], buffer[aligned_size:] +@dataclass +class CambTTSSettings(TTSSettings): + """Typed settings for Camb.ai TTS service. + + Parameters: + user_instructions: Custom instructions for mars-instruct model only. + Ignored for other models. Max 1000 characters. + """ + + user_instructions: str = field(default_factory=lambda: NOT_GIVEN) + + class CambTTSService(TTSService): """Camb.ai MARS text-to-speech service using the official SDK. @@ -212,15 +226,16 @@ class CambTTSService(TTSService): ) # Build settings - self._settings = { - "language": ( + self._settings: CambTTSSettings = CambTTSSettings( + model=model, + voice=voice_id, + language=( self.language_to_service_language(params.language) if params.language else "en-us" ), - "user_instructions": params.user_instructions, - } + user_instructions=params.user_instructions, + ) self.set_model_name(model) - self.set_voice(str(voice_id)) self._voice_id = voice_id self._client = None @@ -283,14 +298,14 @@ class CambTTSService(TTSService): tts_kwargs: Dict[str, Any] = { "text": text, "voice_id": self._voice_id, - "language": self._settings["language"], + "language": self._settings.language, "speech_model": self.model_name, "output_configuration": StreamTtsOutputConfiguration(format="pcm_s16le"), } # Add user instructions if using mars-instruct model - if self._model_name == "mars-instruct" and self._settings.get("user_instructions"): - tts_kwargs["user_instructions"] = self._settings["user_instructions"] + if self._model_name == "mars-instruct" and self._settings.user_instructions: + tts_kwargs["user_instructions"] = self._settings.user_instructions await self.start_tts_usage_metrics(text) yield TTSStartedFrame(context_id=context_id) diff --git a/src/pipecat/services/cartesia/stt.py b/src/pipecat/services/cartesia/stt.py index c4429226f..624801bfb 100644 --- a/src/pipecat/services/cartesia/stt.py +++ b/src/pipecat/services/cartesia/stt.py @@ -12,6 +12,7 @@ the Cartesia Live transcription API for real-time speech recognition. import json import urllib.parse +from dataclasses import dataclass, field from typing import AsyncGenerator, Optional from loguru import logger @@ -27,6 +28,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, STTSettings from pipecat.services.stt_latency import CARTESIA_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.transcriptions.language import Language @@ -42,6 +44,17 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class CartesiaSTTSettings(STTSettings): + """Typed settings for the Cartesia STT service. + + Parameters: + encoding: Audio encoding format (e.g. ``"pcm_s16le"``). + """ + + encoding: str = field(default_factory=lambda: NOT_GIVEN) + + class CartesiaLiveOptions: """Configuration options for Cartesia Live STT service. @@ -181,7 +194,11 @@ class CartesiaSTTService(WebsocketSTTService): k: v for k, v in merged_options.items() if not isinstance(v, str) or v != "None" } - self._settings = merged_options + self._settings: CartesiaSTTSettings = CartesiaSTTSettings( + model=merged_options["model"], + language=merged_options.get("language"), + encoding=merged_options.get("encoding", "pcm_s16le"), + ) self.set_model_name(merged_options["model"]) self._api_key = api_key self._base_url = base_url or "api.cartesia.ai" @@ -275,13 +292,33 @@ class CartesiaSTTService(WebsocketSTTService): await self._disconnect_websocket() + async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: + """Apply a typed settings update and reconnect if anything changed. + + Args: + update: A :class:`STTSettings` (or ``CartesiaSTTSettings``) delta. + + Returns: + Set of field names whose values actually changed. + """ + changed = await super()._update_settings_from_typed(update) + if changed: + await self._disconnect() + await self._connect() + return changed + async def _connect_websocket(self): try: if self._websocket and self._websocket.state is State.OPEN: return logger.debug("Connecting to Cartesia STT") - params = self._settings + params = { + "model": self._settings.model, + "language": self._settings.language, + "encoding": self._settings.encoding, + "sample_rate": str(self.sample_rate), + } ws_url = f"wss://{self._base_url}/stt/websocket?{urllib.parse.urlencode(params)}" headers = {"Cartesia-Version": "2025-04-16", "X-API-Key": self._api_key} diff --git a/src/pipecat/services/cartesia/tts.py b/src/pipecat/services/cartesia/tts.py index 791c60a18..531aafdf7 100644 --- a/src/pipecat/services/cartesia/tts.py +++ b/src/pipecat/services/cartesia/tts.py @@ -9,8 +9,9 @@ import base64 import json import warnings +from dataclasses import dataclass, field from enum import Enum -from typing import AsyncGenerator, List, Literal, Optional +from typing import Any, AsyncGenerator, List, Literal, Optional from loguru import logger from pydantic import BaseModel, Field @@ -27,6 +28,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, TTSSettings, is_given from pipecat.services.tts_service import AudioContextWordTTSService, TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.text.base_text_aggregator import BaseTextAggregator @@ -191,6 +193,31 @@ class CartesiaEmotion(str, Enum): DETERMINED = "determined" +@dataclass +class CartesiaTTSSettings(TTSSettings): + """Typed settings for Cartesia TTS services. + + Parameters: + output_container: Audio container format (e.g. "raw"). + output_encoding: Audio encoding format (e.g. "pcm_s16le"). + output_sample_rate: Audio sample rate in Hz. + speed: Voice speed control for non-Sonic-3 models (literal values). + emotion: List of emotion controls for non-Sonic-3 models. + generation_config: Generation configuration for Sonic-3 models. Includes volume, + speed (numeric), and emotion (string) parameters. + pronunciation_dict_id: The ID of the pronunciation dictionary to use for + custom pronunciations. + """ + + output_container: str = field(default_factory=lambda: NOT_GIVEN) + output_encoding: str = field(default_factory=lambda: NOT_GIVEN) + output_sample_rate: int = field(default_factory=lambda: NOT_GIVEN) + speed: str = field(default_factory=lambda: NOT_GIVEN) + emotion: List[str] = field(default_factory=lambda: NOT_GIVEN) + generation_config: GenerationConfig = field(default_factory=lambda: NOT_GIVEN) + pronunciation_dict_id: str = field(default_factory=lambda: NOT_GIVEN) + + class CartesiaTTSService(AudioContextWordTTSService): """Cartesia TTS service with WebSocket streaming and word timestamps. @@ -289,22 +316,20 @@ class CartesiaTTSService(AudioContextWordTTSService): self._api_key = api_key self._cartesia_version = cartesia_version self._url = url - self._settings = { - "output_format": { - "container": container, - "encoding": encoding, - "sample_rate": 0, - }, - "language": self.language_to_service_language(params.language) + self._settings: CartesiaTTSSettings = CartesiaTTSSettings( + output_container=container, + output_encoding=encoding, + output_sample_rate=0, + language=self.language_to_service_language(params.language) if params.language else None, - "speed": params.speed, - "emotion": params.emotion, - "generation_config": params.generation_config, - "pronunciation_dict_id": params.pronunciation_dict_id, - } + speed=params.speed, + emotion=params.emotion, + generation_config=params.generation_config, + pronunciation_dict_id=params.pronunciation_dict_id, + ) self.set_model_name(model) - self.set_voice(voice_id) + self._voice_id = voice_id self._context_id = None self._receive_task = None @@ -317,16 +342,6 @@ class CartesiaTTSService(AudioContextWordTTSService): """ return True - async def set_model(self, model: str): - """Set the TTS model. - - Args: - model: The model name to use for synthesis. - """ - self._model_id = model - await super().set_model(model) - logger.info(f"Switching TTS model to: [{model}]") - def language_to_service_language(self, language: Language) -> Optional[str]: """Convert a Language enum to Cartesia language format. @@ -391,7 +406,7 @@ class CartesiaTTSService(AudioContextWordTTSService): Returns: List of (word, start_time) tuples processed for the language. """ - current_language = self._settings.get("language") + current_language = self._settings.language # Check if this is a CJK language (if language is None, treat as non-CJK) if current_language and self._is_cjk_language(current_language): @@ -414,7 +429,7 @@ class CartesiaTTSService(AudioContextWordTTSService): voice_config["mode"] = "id" voice_config["id"] = self._voice_id - if self._settings["emotion"]: + if is_given(self._settings.emotion) and self._settings.emotion: with warnings.catch_warnings(): warnings.simplefilter("always") warnings.warn( @@ -423,8 +438,7 @@ class CartesiaTTSService(AudioContextWordTTSService): stacklevel=2, ) voice_config["__experimental_controls"] = {} - if self._settings["emotion"]: - voice_config["__experimental_controls"]["emotion"] = self._settings["emotion"] + voice_config["__experimental_controls"]["emotion"] = self._settings.emotion msg = { "transcript": text, @@ -432,24 +446,28 @@ class CartesiaTTSService(AudioContextWordTTSService): "context_id": self._context_id, "model_id": self.model_name, "voice": voice_config, - "output_format": self._settings["output_format"], + "output_format": { + "container": self._settings.output_container, + "encoding": self._settings.output_encoding, + "sample_rate": self._settings.output_sample_rate, + }, "add_timestamps": add_timestamps, "use_original_timestamps": False if self.model_name == "sonic" else True, } - if self._settings["language"]: - msg["language"] = self._settings["language"] + if is_given(self._settings.language) and self._settings.language: + msg["language"] = self._settings.language - if self._settings["speed"]: - msg["speed"] = self._settings["speed"] + if is_given(self._settings.speed) and self._settings.speed: + msg["speed"] = self._settings.speed - if self._settings["generation_config"]: - msg["generation_config"] = self._settings["generation_config"].model_dump( + if is_given(self._settings.generation_config) and self._settings.generation_config: + msg["generation_config"] = self._settings.generation_config.model_dump( exclude_none=True ) - if self._settings["pronunciation_dict_id"]: - msg["pronunciation_dict_id"] = self._settings["pronunciation_dict_id"] + if is_given(self._settings.pronunciation_dict_id) and self._settings.pronunciation_dict_id: + msg["pronunciation_dict_id"] = self._settings.pronunciation_dict_id return json.dumps(msg) @@ -460,7 +478,7 @@ class CartesiaTTSService(AudioContextWordTTSService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings["output_format"]["sample_rate"] = self.sample_rate + self._settings.output_sample_rate = self.sample_rate await self._connect() async def stop(self, frame: EndFrame): @@ -694,21 +712,21 @@ class CartesiaHttpTTSService(TTSService): self._api_key = api_key self._base_url = base_url self._cartesia_version = cartesia_version - self._settings = { - "output_format": { - "container": container, - "encoding": encoding, - "sample_rate": 0, - }, - "language": self.language_to_service_language(params.language) + self._settings: CartesiaTTSSettings = CartesiaTTSSettings( + model=model, + voice=voice_id, + output_container=container, + output_encoding=encoding, + output_sample_rate=0, + language=self.language_to_service_language(params.language) if params.language else None, - "speed": params.speed, - "emotion": params.emotion, - "generation_config": params.generation_config, - "pronunciation_dict_id": params.pronunciation_dict_id, - } - self.set_voice(voice_id) + speed=params.speed, + emotion=params.emotion, + generation_config=params.generation_config, + pronunciation_dict_id=params.pronunciation_dict_id, + ) + self._voice_id = voice_id self.set_model_name(model) self._client = AsyncCartesia( @@ -742,7 +760,7 @@ class CartesiaHttpTTSService(TTSService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings["output_format"]["sample_rate"] = self.sample_rate + self._settings.output_sample_rate = self.sample_rate async def stop(self, frame: EndFrame): """Stop the Cartesia HTTP TTS service. @@ -778,7 +796,7 @@ class CartesiaHttpTTSService(TTSService): try: voice_config = {"mode": "id", "id": self._voice_id} - if self._settings["emotion"]: + if is_given(self._settings.emotion) and self._settings.emotion: with warnings.catch_warnings(): warnings.simplefilter("always") warnings.warn( @@ -786,30 +804,39 @@ class CartesiaHttpTTSService(TTSService): DeprecationWarning, stacklevel=2, ) - voice_config["__experimental_controls"] = {"emotion": self._settings["emotion"]} + voice_config["__experimental_controls"] = {"emotion": self._settings.emotion} await self.start_ttfb_metrics() + output_format = { + "container": self._settings.output_container, + "encoding": self._settings.output_encoding, + "sample_rate": self._settings.output_sample_rate, + } + payload = { "model_id": self._model_name, "transcript": text, "voice": voice_config, - "output_format": self._settings["output_format"], + "output_format": output_format, } - if self._settings["language"]: - payload["language"] = self._settings["language"] + if is_given(self._settings.language) and self._settings.language: + payload["language"] = self._settings.language - if self._settings["speed"]: - payload["speed"] = self._settings["speed"] + if is_given(self._settings.speed) and self._settings.speed: + payload["speed"] = self._settings.speed - if self._settings["generation_config"]: - payload["generation_config"] = self._settings["generation_config"].model_dump( + if is_given(self._settings.generation_config) and self._settings.generation_config: + payload["generation_config"] = self._settings.generation_config.model_dump( exclude_none=True ) - if self._settings["pronunciation_dict_id"]: - payload["pronunciation_dict_id"] = self._settings["pronunciation_dict_id"] + if ( + is_given(self._settings.pronunciation_dict_id) + and self._settings.pronunciation_dict_id + ): + payload["pronunciation_dict_id"] = self._settings.pronunciation_dict_id yield TTSStartedFrame(context_id=context_id) diff --git a/src/pipecat/services/cerebras/llm.py b/src/pipecat/services/cerebras/llm.py index 54ea45ddb..01a8165f8 100644 --- a/src/pipecat/services/cerebras/llm.py +++ b/src/pipecat/services/cerebras/llm.py @@ -68,14 +68,14 @@ class CerebrasLLMService(OpenAILLMService): params = { "model": self.model_name, "stream": True, - "seed": self._settings["seed"], - "temperature": self._settings["temperature"], - "top_p": self._settings["top_p"], - "max_completion_tokens": self._settings["max_completion_tokens"], + "seed": self._settings.seed, + "temperature": self._settings.temperature, + "top_p": self._settings.top_p, + "max_completion_tokens": self._settings.max_completion_tokens, } # Messages, tools, tool_choice params.update(params_from_context) - params.update(self._settings["extra"]) + params.update(self._settings.extra) return params diff --git a/src/pipecat/services/deepgram/stt.py b/src/pipecat/services/deepgram/stt.py index 0f79499ba..91d4308cb 100644 --- a/src/pipecat/services/deepgram/stt.py +++ b/src/pipecat/services/deepgram/stt.py @@ -6,6 +6,7 @@ """Deepgram speech-to-text service implementation.""" +from dataclasses import dataclass, field from typing import AsyncGenerator, Dict, Optional from loguru import logger @@ -23,6 +24,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, STTSettings, is_given from pipecat.services.stt_latency import DEEPGRAM_TTFS_P99 from pipecat.services.stt_service import STTService from pipecat.transcriptions.language import Language @@ -45,6 +47,17 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class DeepgramSTTSettings(STTSettings): + """Typed settings for the Deepgram STT service. + + Parameters: + live_options: Deepgram ``LiveOptions`` for detailed configuration. + """ + + live_options: LiveOptions = field(default_factory=lambda: NOT_GIVEN) + + class DeepgramSTTService(STTService): """Deepgram speech-to-text service. @@ -129,11 +142,17 @@ class DeepgramSTTService(STTService): merged_options["language"] = merged_options["language"].value self.set_model_name(merged_options["model"]) - self._settings = merged_options + merged_live_options = LiveOptions(**merged_options) + self._settings: DeepgramSTTSettings = DeepgramSTTSettings( + model=merged_options.get("model"), + language=merged_options.get("language"), + live_options=merged_live_options, + ) + self._addons = addons self._should_interrupt = should_interrupt - if merged_options.get("vad_events"): + if merged_live_options.vad_events: import warnings with warnings.catch_warnings(): @@ -164,7 +183,7 @@ class DeepgramSTTService(STTService): Returns: True if VAD events are enabled in the current settings. """ - return self._settings["vad_events"] + return self._settings.live_options.vad_events def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -174,28 +193,48 @@ class DeepgramSTTService(STTService): """ return True - async def set_model(self, model: str): - """Set the Deepgram model and reconnect. + async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: + """Apply a typed settings update, keeping ``live_options`` in sync. - Args: - model: The Deepgram model name to use. + Top-level ``model`` and ``language`` are the source of truth. When + they are given in *update* their values are propagated into + ``live_options``. When only ``live_options`` is given, its ``model`` + and ``language`` are propagated *up* to the top-level fields. + + Any change triggers a WebSocket reconnect. """ - await super().set_model(model) - logger.info(f"Switching STT model to: [{model}]") - self._settings["model"] = model + # Determine which top-level fields are explicitly provided. + model_given = isinstance(update, DeepgramSTTSettings) and is_given( + getattr(update, "model", NOT_GIVEN) + ) + language_given = isinstance(update, DeepgramSTTSettings) and is_given( + getattr(update, "language", NOT_GIVEN) + ) + + changed = await super()._update_settings_from_typed(update) + + if not changed: + return changed + + # --- Sync model -------------------------------------------------- + if model_given: + # Top-level model wins → push into live_options. + self._settings.live_options.model = self._settings.model + elif "live_options" in changed and self._settings.live_options.model is not None: + # Only live_options was given → pull model up. + self._settings.model = self._settings.live_options.model + self.set_model_name(self._settings.model) + + # --- Sync language ----------------------------------------------- + if language_given: + self._settings.live_options.language = self._settings.language + elif "live_options" in changed and self._settings.live_options.language is not None: + self._settings.language = self._settings.live_options.language + await self._disconnect() await self._connect() - async def set_language(self, language: Language): - """Set the recognition language and reconnect. - - Args: - language: The language to use for speech recognition. - """ - logger.info(f"Switching STT language to: [{language}]") - self._settings["language"] = language - await self._disconnect() - await self._connect() + return changed async def start(self, frame: StartFrame): """Start the Deepgram STT service. @@ -204,7 +243,7 @@ class DeepgramSTTService(STTService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings["sample_rate"] = self.sample_rate + self._settings.live_options.sample_rate = self.sample_rate await self._connect() async def stop(self, frame: EndFrame): @@ -257,7 +296,9 @@ class DeepgramSTTService(STTService): self._on_utterance_end, ) - if not await self._connection.start(options=self._settings, addons=self._addons): + if not await self._connection.start( + options=self._settings.live_options, addons=self._addons + ): await self.push_error(error_msg=f"Unable to connect to Deepgram") else: headers = { diff --git a/src/pipecat/services/deepgram/stt_sagemaker.py b/src/pipecat/services/deepgram/stt_sagemaker.py index 99f6cf487..95242ade6 100644 --- a/src/pipecat/services/deepgram/stt_sagemaker.py +++ b/src/pipecat/services/deepgram/stt_sagemaker.py @@ -14,6 +14,7 @@ languages, and various Deepgram features. import asyncio import json +from dataclasses import dataclass, field from typing import AsyncGenerator, Optional from loguru import logger @@ -31,6 +32,7 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.aws.sagemaker.bidi_client import SageMakerBidiClient +from pipecat.services.settings import NOT_GIVEN, STTSettings, is_given from pipecat.services.stt_latency import DEEPGRAM_SAGEMAKER_TTFS_P99 from pipecat.services.stt_service import STTService from pipecat.transcriptions.language import Language @@ -47,6 +49,17 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class DeepgramSageMakerSTTSettings(STTSettings): + """Typed settings for the Deepgram SageMaker STT service. + + Parameters: + live_options: Deepgram ``LiveOptions`` for detailed configuration. + """ + + live_options: LiveOptions = field(default_factory=lambda: NOT_GIVEN) + + class DeepgramSageMakerSTTService(STTService): """Deepgram speech-to-text service for AWS SageMaker. @@ -129,7 +142,12 @@ class DeepgramSageMakerSTTService(STTService): merged_options["language"] = merged_options["language"].value self.set_model_name(merged_options["model"]) - self._settings = merged_options + merged_live_options = LiveOptions(**merged_options) + self._settings: DeepgramSageMakerSTTSettings = DeepgramSageMakerSTTSettings( + model=merged_options.get("model"), + language=merged_options.get("language"), + live_options=merged_live_options, + ) self._client: Optional[SageMakerBidiClient] = None self._response_task: Optional[asyncio.Task] = None @@ -143,35 +161,40 @@ class DeepgramSageMakerSTTService(STTService): """ return True - async def set_model(self, model: str): - """Set the Deepgram model and reconnect. + async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: + """Apply a typed settings update, keeping ``live_options`` in sync. - Disconnects from the current session, updates the model setting, and - establishes a new connection with the updated model. + Top-level ``model`` and ``language`` are the source of truth. When + they change their values are propagated into ``live_options``. - Args: - model: The Deepgram model name to use (e.g., "nova-3"). + Any change triggers a reconnect. """ - await super().set_model(model) - logger.info(f"Switching STT model to: [{model}]") - self._settings["model"] = model - await self._disconnect() - await self._connect() - - async def set_language(self, language: Language): - """Set the recognition language and reconnect. - - Disconnects from the current session, updates the language setting, and - establishes a new connection with the updated language. - - Args: - language: The language to use for speech recognition (e.g., Language.EN, - Language.ES). - """ - logger.info(f"Switching STT language to: [{language}]") - self._settings["language"] = language + model_given = isinstance(update, DeepgramSageMakerSTTSettings) and is_given( + getattr(update, "model", NOT_GIVEN) + ) + language_given = isinstance(update, DeepgramSageMakerSTTSettings) and is_given( + getattr(update, "language", NOT_GIVEN) + ) + + changed = await super()._update_settings_from_typed(update) + + if not changed: + return changed + + # Sync model into live_options + if model_given and "model" in changed: + self._settings.live_options.model = self._settings.model + + # Sync language into live_options + if language_given and "language" in changed: + lang = self._settings.language + if isinstance(lang, Language): + lang = lang.value + self._settings.live_options.language = lang + await self._disconnect() await self._connect() + return changed async def start(self, frame: StartFrame): """Start the Deepgram SageMaker STT service. @@ -180,7 +203,7 @@ class DeepgramSageMakerSTTService(STTService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings["sample_rate"] = self.sample_rate + self._settings.live_options.sample_rate = self.sample_rate await self._connect() async def stop(self, frame: EndFrame): @@ -226,12 +249,12 @@ class DeepgramSageMakerSTTService(STTService): """ logger.debug("Connecting to Deepgram on SageMaker...") - # Update sample rate in settings - self._settings["sample_rate"] = self.sample_rate + # Update sample rate in live_options + self._settings.live_options.sample_rate = self.sample_rate - # Build query string from settings, converting booleans to strings + # Build query string from live_options, converting booleans to strings query_params = {} - for key, value in self._settings.items(): + for key, value in self._settings.live_options.to_dict().items(): if value is not None: # Convert boolean values to lowercase strings for Deepgram API if isinstance(value, bool): diff --git a/src/pipecat/services/deepgram/tts.py b/src/pipecat/services/deepgram/tts.py index 12aba4905..4c698dcea 100644 --- a/src/pipecat/services/deepgram/tts.py +++ b/src/pipecat/services/deepgram/tts.py @@ -11,6 +11,7 @@ for generating speech from text using various voice models. """ import json +from dataclasses import dataclass, field from typing import AsyncGenerator, Optional import aiohttp @@ -29,6 +30,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, TTSSettings from pipecat.services.tts_service import TTSService, WebsocketTTSService from pipecat.utils.tracing.service_decorators import traced_tts @@ -43,6 +45,17 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class DeepgramTTSSettings(TTSSettings): + """Typed settings for Deepgram TTS service. + + Parameters: + encoding: Audio encoding format (linear16, mulaw, alaw). + """ + + encoding: str = field(default_factory=lambda: NOT_GIVEN) + + class DeepgramTTSService(WebsocketTTSService): """Deepgram WebSocket-based text-to-speech service. @@ -91,10 +104,12 @@ class DeepgramTTSService(WebsocketTTSService): self._api_key = api_key self._base_url = base_url - self._settings = { - "encoding": encoding, - } - self.set_voice(voice) + self._settings: DeepgramTTSSettings = DeepgramTTSSettings( + model=voice, + voice=voice, + encoding=encoding, + ) + self._voice_id = voice self._receive_task = None self._context_id: Optional[str] = None @@ -177,7 +192,7 @@ class DeepgramTTSService(WebsocketTTSService): # Build WebSocket URL with query parameters params = [] params.append(f"model={self._voice_id}") - params.append(f"encoding={self._settings['encoding']}") + params.append(f"encoding={self._settings.encoding}") params.append(f"sample_rate={self.sample_rate}") url = f"{self._base_url}/v1/speak?{'&'.join(params)}" @@ -357,10 +372,12 @@ class DeepgramHttpTTSService(TTSService): self._api_key = api_key self._session = aiohttp_session self._base_url = base_url - self._settings = { - "encoding": encoding, - } - self.set_voice(voice) + self._settings: DeepgramTTSSettings = DeepgramTTSSettings( + model=voice, + voice=voice, + encoding=encoding, + ) + self._voice_id = voice def can_generate_metrics(self) -> bool: """Check if the service can generate metrics. @@ -390,7 +407,7 @@ class DeepgramHttpTTSService(TTSService): params = { "model": self._voice_id, - "encoding": self._settings["encoding"], + "encoding": self._settings.encoding, "sample_rate": self.sample_rate, "container": "none", } diff --git a/src/pipecat/services/deepseek/llm.py b/src/pipecat/services/deepseek/llm.py index 56f1ddd18..806dce13d 100644 --- a/src/pipecat/services/deepseek/llm.py +++ b/src/pipecat/services/deepseek/llm.py @@ -68,15 +68,15 @@ class DeepSeekLLMService(OpenAILLMService): "model": self.model_name, "stream": True, "stream_options": {"include_usage": True}, - "frequency_penalty": self._settings["frequency_penalty"], - "presence_penalty": self._settings["presence_penalty"], - "temperature": self._settings["temperature"], - "top_p": self._settings["top_p"], - "max_tokens": self._settings["max_tokens"], + "frequency_penalty": self._settings.frequency_penalty, + "presence_penalty": self._settings.presence_penalty, + "temperature": self._settings.temperature, + "top_p": self._settings.top_p, + "max_tokens": self._settings.max_tokens, } # Messages, tools, tool_choice params.update(params_from_context) - params.update(self._settings["extra"]) + params.update(self._settings.extra) return params diff --git a/src/pipecat/services/elevenlabs/stt.py b/src/pipecat/services/elevenlabs/stt.py index 388f7146b..950dc5de9 100644 --- a/src/pipecat/services/elevenlabs/stt.py +++ b/src/pipecat/services/elevenlabs/stt.py @@ -14,6 +14,7 @@ transcription results directly. import base64 import io import json +from dataclasses import dataclass, field from enum import Enum from typing import AsyncGenerator, Optional @@ -33,6 +34,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, STTSettings, is_given from pipecat.services.stt_latency import ELEVENLABS_REALTIME_TTFS_P99, ELEVENLABS_TTFS_P99 from pipecat.services.stt_service import SegmentedSTTService, WebsocketSTTService from pipecat.transcriptions.language import Language, resolve_language @@ -167,6 +169,44 @@ def language_to_elevenlabs_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=False) +@dataclass +class ElevenLabsSTTSettings(STTSettings): + """Typed settings for the ElevenLabs file-based STT service. + + Parameters: + tag_audio_events: Whether to include audio event tags in transcription. + """ + + tag_audio_events: bool = field(default_factory=lambda: NOT_GIVEN) + + +@dataclass +class ElevenLabsRealtimeSTTSettings(STTSettings): + """Typed settings for the ElevenLabs Realtime STT service. + + See ``ElevenLabsRealtimeSTTService.InputParams`` for detailed descriptions. + + Parameters: + commit_strategy: How to segment speech - manual (Pipecat VAD) or vad (ElevenLabs VAD). + vad_silence_threshold_secs: Seconds of silence before VAD commits (0.3-3.0). + vad_threshold: VAD sensitivity (0.1-0.9, lower is more sensitive). + min_speech_duration_ms: Minimum speech duration for VAD (50-2000ms). + min_silence_duration_ms: Minimum silence duration for VAD (50-2000ms). + include_timestamps: Whether to include word-level timestamps in transcripts. + enable_logging: Whether to enable logging on ElevenLabs' side. + include_language_detection: Whether to include language detection in transcripts. + """ + + commit_strategy: CommitStrategy = field(default_factory=lambda: NOT_GIVEN) + vad_silence_threshold_secs: float = field(default_factory=lambda: NOT_GIVEN) + vad_threshold: float = field(default_factory=lambda: NOT_GIVEN) + min_speech_duration_ms: int = field(default_factory=lambda: NOT_GIVEN) + min_silence_duration_ms: int = field(default_factory=lambda: NOT_GIVEN) + include_timestamps: bool = field(default_factory=lambda: NOT_GIVEN) + enable_logging: bool = field(default_factory=lambda: NOT_GIVEN) + include_language_detection: bool = field(default_factory=lambda: NOT_GIVEN) + + class ElevenLabsSTTService(SegmentedSTTService): """Speech-to-text service using ElevenLabs' file-based API. @@ -223,13 +263,15 @@ class ElevenLabsSTTService(SegmentedSTTService): self._base_url = base_url self._session = aiohttp_session self._model_id = model - self._tag_audio_events = params.tag_audio_events - self._settings = { - "language": self.language_to_service_language(params.language) + self._settings: ElevenLabsSTTSettings = ElevenLabsSTTSettings( + model=model, + language=self.language_to_service_language(params.language) if params.language else "eng", - } + tag_audio_events=params.tag_audio_events, + ) + self.set_model_name(model) def can_generate_metrics(self) -> bool: """Check if the service can generate processing metrics. @@ -250,27 +292,30 @@ class ElevenLabsSTTService(SegmentedSTTService): """ return language_to_elevenlabs_language(language) - async def set_language(self, language: Language): - """Set the transcription language. + async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: + """Apply a typed settings update. + + Converts language to ElevenLabs format before applying and keeps + ``_model_id`` in sync with the model setting. Args: - language: The language to use for speech-to-text transcription. + update: A :class:`STTSettings` (or ``ElevenLabsSTTSettings``) delta. + + Returns: + Set of field names whose values actually changed. """ - logger.info(f"Switching STT language to: [{language}]") - self._settings["language"] = self.language_to_service_language(language) + # Convert language to ElevenLabs format before applying + if is_given(update.language) and isinstance(update.language, Language): + converted = self.language_to_service_language(update.language) + if converted is not None: + update.language = converted - async def set_model(self, model: str): - """Set the STT model. + changed = await super()._update_settings_from_typed(update) - Args: - model: The model name to use for transcription. + if "model" in changed: + self._model_id = self._settings.model - Note: - ElevenLabs STT API does not currently support model selection. - This method is provided for interface compatibility. - """ - await super().set_model(model) - logger.info(f"Model setting [{model}] noted, but ElevenLabs STT uses default model") + return changed async def _transcribe_audio(self, audio_data: bytes) -> dict: """Upload audio data to ElevenLabs and get transcription result. @@ -298,8 +343,8 @@ class ElevenLabsSTTService(SegmentedSTTService): # Add required model_id, language_code, and tag_audio_events data.add_field("model_id", self._model_id) - data.add_field("language_code", self._settings["language"]) - data.add_field("tag_audio_events", str(self._tag_audio_events).lower()) + data.add_field("language_code", self._settings.language) + data.add_field("tag_audio_events", str(self._settings.tag_audio_events).lower()) async with self._session.post(url, data=data, headers=headers) as response: if response.status != 200: @@ -469,11 +514,22 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): self._api_key = api_key self._base_url = base_url self._model_id = model - self._params = params self._audio_format = "" # initialized in start() self._receive_task = None - self._settings = {"language": params.language_code} + self._settings: ElevenLabsRealtimeSTTSettings = ElevenLabsRealtimeSTTSettings( + model=model, + language=params.language_code, + commit_strategy=params.commit_strategy, + vad_silence_threshold_secs=params.vad_silence_threshold_secs, + vad_threshold=params.vad_threshold, + min_speech_duration_ms=params.min_speech_duration_ms, + min_silence_duration_ms=params.min_silence_duration_ms, + include_timestamps=params.include_timestamps, + enable_logging=params.enable_logging, + include_language_detection=params.include_language_detection, + ) + self.set_model_name(model) def can_generate_metrics(self) -> bool: """Check if the service can generate processing metrics. @@ -483,42 +539,35 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): """ return True - async def set_language(self, language: Language): - """Set the transcription language. + async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: + """Apply a typed settings update and reconnect if anything changed. + + Converts language to ElevenLabs format before applying and keeps + ``_model_id`` in sync. Args: - language: The language to use for speech-to-text transcription. + update: A :class:`STTSettings` (or ``ElevenLabsRealtimeSTTSettings``) delta. - Note: - Changing language requires reconnecting to the WebSocket. + Returns: + Set of field names whose values actually changed. """ - logger.info(f"Switching STT language to: [{language}]") - new_language = ( - language_to_elevenlabs_language(language) - if isinstance(language, Language) - else language - ) - self._params.language_code = new_language - self._settings["language"] = new_language - # Reconnect with new settings - await self._disconnect() - await self._connect() - - async def set_model(self, model: str): - """Set the STT model. - - Args: - model: The model name to use for transcription. - - Note: - Changing model requires reconnecting to the WebSocket. - """ - await super().set_model(model) - logger.info(f"Switching STT model to: [{model}]") - self._model_id = model - # Reconnect with new settings + # Convert language to ElevenLabs format before applying + if is_given(update.language) and isinstance(update.language, Language): + converted = language_to_elevenlabs_language(update.language) + if converted is not None: + update.language = converted + + changed = await super()._update_settings_from_typed(update) + + if not changed: + return changed + + if "model" in changed: + self._model_id = self._settings.model + await self._disconnect() await self._connect() + return changed async def start(self, frame: StartFrame): """Start the STT service and establish WebSocket connection. @@ -566,7 +615,7 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): await self._start_metrics() elif isinstance(frame, VADUserStoppedSpeakingFrame): # Send commit when user stops speaking (manual commit mode) - if self._params.commit_strategy == CommitStrategy.MANUAL: + if self._settings.commit_strategy == CommitStrategy.MANUAL: if self._websocket and self._websocket.state is State.OPEN: try: commit_message = { @@ -656,36 +705,40 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): # Build query parameters params = [f"model_id={self._model_id}"] - if self._params.language_code: - params.append(f"language_code={self._params.language_code}") + if self._settings.language: + params.append(f"language_code={self._settings.language}") params.append(f"audio_format={self._audio_format}") - params.append(f"commit_strategy={self._params.commit_strategy.value}") + params.append(f"commit_strategy={self._settings.commit_strategy.value}") # Add optional parameters - if self._params.include_timestamps: - params.append(f"include_timestamps={str(self._params.include_timestamps).lower()}") - - if self._params.enable_logging: - params.append(f"enable_logging={str(self._params.enable_logging).lower()}") - - if self._params.include_language_detection: + if self._settings.include_timestamps: params.append( - f"include_language_detection={str(self._params.include_language_detection).lower()}" + f"include_timestamps={str(self._settings.include_timestamps).lower()}" + ) + + if self._settings.enable_logging: + params.append(f"enable_logging={str(self._settings.enable_logging).lower()}") + + if self._settings.include_language_detection: + params.append( + f"include_language_detection={str(self._settings.include_language_detection).lower()}" ) # Add VAD parameters if using VAD commit strategy and values are specified - if self._params.commit_strategy == CommitStrategy.VAD: - if self._params.vad_silence_threshold_secs is not None: + if self._settings.commit_strategy == CommitStrategy.VAD: + if self._settings.vad_silence_threshold_secs is not None: params.append( - f"vad_silence_threshold_secs={self._params.vad_silence_threshold_secs}" + f"vad_silence_threshold_secs={self._settings.vad_silence_threshold_secs}" + ) + if self._settings.vad_threshold is not None: + params.append(f"vad_threshold={self._settings.vad_threshold}") + if self._settings.min_speech_duration_ms is not None: + params.append(f"min_speech_duration_ms={self._settings.min_speech_duration_ms}") + if self._settings.min_silence_duration_ms is not None: + params.append( + f"min_silence_duration_ms={self._settings.min_silence_duration_ms}" ) - if self._params.vad_threshold is not None: - params.append(f"vad_threshold={self._params.vad_threshold}") - if self._params.min_speech_duration_ms is not None: - params.append(f"min_speech_duration_ms={self._params.min_speech_duration_ms}") - if self._params.min_silence_duration_ms is not None: - params.append(f"min_silence_duration_ms={self._params.min_silence_duration_ms}") ws_url = f"wss://{self._base_url}/v1/speech-to-text/realtime?{'&'.join(params)}" @@ -817,7 +870,7 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): """ # If timestamps are enabled, skip this message and wait for the # committed_transcript_with_timestamps message which contains all the data - if self._params.include_timestamps: + if self._settings.include_timestamps: return text = data.get("text", "").strip() diff --git a/src/pipecat/services/elevenlabs/tts.py b/src/pipecat/services/elevenlabs/tts.py index 4dab0c01a..b061383f3 100644 --- a/src/pipecat/services/elevenlabs/tts.py +++ b/src/pipecat/services/elevenlabs/tts.py @@ -13,7 +13,19 @@ with support for streaming audio, word timestamps, and voice customization. import asyncio import base64 import json -from typing import Any, AsyncGenerator, Dict, List, Literal, Mapping, Optional, Tuple, Union +from dataclasses import dataclass, field +from typing import ( + Any, + AsyncGenerator, + ClassVar, + Dict, + List, + Literal, + Mapping, + Optional, + Tuple, + Union, +) import aiohttp from loguru import logger @@ -32,6 +44,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, TTSSettings, is_given from pipecat.services.tts_service import ( AudioContextWordTTSService, WordTTSService, @@ -136,12 +149,12 @@ def output_format_from_sample_rate(sample_rate: int) -> str: def build_elevenlabs_voice_settings( - settings: Dict[str, Any], + settings: Union[Dict[str, Any], "TTSSettings"], ) -> Optional[Dict[str, Union[float, bool]]]: """Build voice settings dictionary for ElevenLabs based on provided settings. Args: - settings: Dictionary containing voice settings parameters. + settings: Dictionary or typed settings containing voice settings parameters. Returns: Dictionary of voice settings or None if no valid settings are provided. @@ -150,8 +163,11 @@ def build_elevenlabs_voice_settings( voice_settings = {} for key in voice_setting_keys: - if key in settings and settings[key] is not None: - voice_settings[key] = settings[key] + val = ( + getattr(settings, key, None) if isinstance(settings, TTSSettings) else settings.get(key) + ) + if val is not None and is_given(val): + voice_settings[key] = val return voice_settings or None @@ -168,6 +184,75 @@ class PronunciationDictionaryLocator(BaseModel): version_id: str +@dataclass +class ElevenLabsTTSSettings(TTSSettings): + """Typed settings for the ElevenLabs WebSocket TTS service. + + Fields that appear in the WebSocket URL (``voice``, ``model``, + ``language``) require a full reconnect when changed. Fields that + affect the voice character (``stability``, ``similarity_boost``, + ``style``, ``use_speaker_boost``, ``speed``) can be applied by closing + the current audio context so a new one is opened with updated settings. + + Parameters: + stability: Voice stability control (0.0 to 1.0). + similarity_boost: Similarity boost control (0.0 to 1.0). + style: Style control for voice expression (0.0 to 1.0). + use_speaker_boost: Whether to use speaker boost enhancement. + speed: Voice speed control (0.7 to 1.2). + auto_mode: Whether to enable automatic mode optimization. + enable_ssml_parsing: Whether to parse SSML tags in text. + enable_logging: Whether to enable ElevenLabs logging. + apply_text_normalization: Text normalization mode ("auto", "on", "off"). + """ + + stability: float = field(default_factory=lambda: NOT_GIVEN) + similarity_boost: float = field(default_factory=lambda: NOT_GIVEN) + style: float = field(default_factory=lambda: NOT_GIVEN) + use_speaker_boost: bool = field(default_factory=lambda: NOT_GIVEN) + speed: float = field(default_factory=lambda: NOT_GIVEN) + auto_mode: str = field(default_factory=lambda: NOT_GIVEN) + enable_ssml_parsing: bool = field(default_factory=lambda: NOT_GIVEN) + enable_logging: bool = field(default_factory=lambda: NOT_GIVEN) + apply_text_normalization: str = field(default_factory=lambda: NOT_GIVEN) + + #: Fields in the WS URL — changing any of these requires a reconnect. + URL_FIELDS: ClassVar[frozenset[str]] = frozenset({"voice", "model", "language"}) + + #: Fields affecting voice character — changing these requires closing the + #: current audio context so the next one picks up new settings. + VOICE_SETTINGS_FIELDS: ClassVar[frozenset[str]] = frozenset( + {"stability", "similarity_boost", "style", "use_speaker_boost", "speed"} + ) + + _aliases: ClassVar[Dict[str, str]] = {"voice_id": "voice"} + + +@dataclass +class ElevenLabsHttpTTSSettings(TTSSettings): + """Typed settings for the ElevenLabs HTTP TTS service. + + Parameters: + optimize_streaming_latency: Latency optimization level (0-4). + stability: Voice stability control (0.0 to 1.0). + similarity_boost: Similarity boost control (0.0 to 1.0). + style: Style control for voice expression (0.0 to 1.0). + use_speaker_boost: Whether to use speaker boost enhancement. + speed: Voice speed control (0.25 to 4.0). + apply_text_normalization: Text normalization mode ("auto", "on", "off"). + """ + + optimize_streaming_latency: int = field(default_factory=lambda: NOT_GIVEN) + stability: float = field(default_factory=lambda: NOT_GIVEN) + similarity_boost: float = field(default_factory=lambda: NOT_GIVEN) + style: float = field(default_factory=lambda: NOT_GIVEN) + use_speaker_boost: bool = field(default_factory=lambda: NOT_GIVEN) + speed: float = field(default_factory=lambda: NOT_GIVEN) + apply_text_normalization: str = field(default_factory=lambda: NOT_GIVEN) + + _aliases: ClassVar[Dict[str, str]] = {"voice_id": "voice"} + + def calculate_word_times( alignment_info: Mapping[str, Any], cumulative_time: float, @@ -316,22 +401,25 @@ class ElevenLabsTTSService(AudioContextWordTTSService): self._api_key = api_key self._url = url - self._settings = { - "language": self.language_to_service_language(params.language) - if params.language - else None, - "stability": params.stability, - "similarity_boost": params.similarity_boost, - "style": params.style, - "use_speaker_boost": params.use_speaker_boost, - "speed": params.speed, - "auto_mode": str(params.auto_mode).lower(), - "enable_ssml_parsing": params.enable_ssml_parsing, - "enable_logging": params.enable_logging, - "apply_text_normalization": params.apply_text_normalization, - } + self._settings: ElevenLabsTTSSettings = ElevenLabsTTSSettings( + model=model, + voice=voice_id, + language=( + self.language_to_service_language(params.language) if params.language else None + ), + stability=params.stability, + similarity_boost=params.similarity_boost, + style=params.style, + use_speaker_boost=params.use_speaker_boost, + speed=params.speed, + auto_mode=str(params.auto_mode).lower(), + enable_ssml_parsing=params.enable_ssml_parsing, + enable_logging=params.enable_logging, + apply_text_normalization=params.apply_text_normalization, + ) self.set_model_name(model) - self.set_voice(voice_id) + self._voice_id = voice_id + self._output_format = "" # initialized in start() self._voice_settings = self._set_voice_settings() self._pronunciation_dictionary_locators = params.pronunciation_dictionary_locators @@ -366,54 +454,57 @@ class ElevenLabsTTSService(AudioContextWordTTSService): return language_to_elevenlabs_language(language) def _set_voice_settings(self): - return build_elevenlabs_voice_settings(self._settings) + ts = self._settings + voice_setting_keys = [ + "stability", + "similarity_boost", + "style", + "use_speaker_boost", + "speed", + ] + voice_settings = {} + for key in voice_setting_keys: + val = getattr(ts, key, None) + if val is not None and is_given(val): + voice_settings[key] = val + return voice_settings or None - async def set_model(self, model: str): - """Set the TTS model and reconnect. + async def _update_settings_from_typed(self, update: TTSSettings) -> set[str]: + """Apply a typed settings update, reconnecting as needed. + + Uses the declarative ``URL_FIELDS`` and ``VOICE_SETTINGS_FIELDS`` + sets on :class:`ElevenLabsTTSSettings` to decide whether to + reconnect the WebSocket or close the current audio context. Args: - model: The model name to use for synthesis. + update: A :class:`TTSSettings` (or ``ElevenLabsTTSSettings``) delta. + + Returns: + Set of field names whose values actually changed. """ - await super().set_model(model) - logger.info(f"Switching TTS model to: [{model}]") - await self._disconnect() - await self._connect() + changed = await super()._update_settings_from_typed(update) - async def _update_settings(self, settings: Mapping[str, Any]): - """Update service settings and reconnect if voice, model, or language changed.""" - # Track previous values for settings that require reconnection - prev_voice = self._voice_id - prev_model = self.model_name - prev_language = self._settings.get("language") - # Create snapshot of current voice settings to detect changes after update - prev_voice_settings = self._voice_settings.copy() if self._voice_settings else None + if not changed: + return changed - await super()._update_settings(settings) - - # Update voice settings for the next context creation + # Rebuild voice settings for next context self._voice_settings = self._set_voice_settings() - # Check if URL-level settings changed (these require reconnection) - url_changed = ( - prev_voice != self._voice_id - or prev_model != self.model_name - or prev_language != self._settings.get("language") - ) - - # Check if only voice settings changed (speed, stability, etc.) - voice_settings_changed = prev_voice_settings != self._voice_settings + url_changed = bool(changed & ElevenLabsTTSSettings.URL_FIELDS) + voice_settings_changed = bool(changed & ElevenLabsTTSSettings.VOICE_SETTINGS_FIELDS) if url_changed: - # These settings are in the WebSocket URL, so we need to reconnect logger.debug( - f"URL-level setting changed (voice/model/language), reconnecting WebSocket" + f"URL-level setting changed ({changed & ElevenLabsTTSSettings.URL_FIELDS}), " + f"reconnecting WebSocket" ) await self._disconnect() await self._connect() elif voice_settings_changed and self._context_id: - # Voice settings can be updated by closing current context - # so new one gets created with updated voice settings - logger.debug(f"Voice settings changed, closing current context to apply changes") + logger.debug( + f"Voice settings changed ({changed & ElevenLabsTTSSettings.VOICE_SETTINGS_FIELDS}), " + f"closing current context to apply changes" + ) try: if self._websocket: await self._websocket.send( @@ -423,6 +514,8 @@ class ElevenLabsTTSService(AudioContextWordTTSService): await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e) self._context_id = None + return changed + async def start(self, frame: StartFrame): """Start the ElevenLabs TTS service. @@ -505,19 +598,19 @@ class ElevenLabsTTSService(AudioContextWordTTSService): voice_id = self._voice_id model = self.model_name output_format = self._output_format - url = f"{self._url}/v1/text-to-speech/{voice_id}/multi-stream-input?model_id={model}&output_format={output_format}&auto_mode={self._settings['auto_mode']}" + url = f"{self._url}/v1/text-to-speech/{voice_id}/multi-stream-input?model_id={model}&output_format={output_format}&auto_mode={self._settings.auto_mode}" - if self._settings["enable_ssml_parsing"]: - url += f"&enable_ssml_parsing={self._settings['enable_ssml_parsing']}" + if self._settings.enable_ssml_parsing: + url += f"&enable_ssml_parsing={self._settings.enable_ssml_parsing}" - if self._settings["enable_logging"]: - url += f"&enable_logging={self._settings['enable_logging']}" + if self._settings.enable_logging: + url += f"&enable_logging={self._settings.enable_logging}" - if self._settings["apply_text_normalization"] is not None: - url += f"&apply_text_normalization={self._settings['apply_text_normalization']}" + if self._settings.apply_text_normalization is not None: + url += f"&apply_text_normalization={self._settings.apply_text_normalization}" # Language can only be used with the ELEVENLABS_MULTILINGUAL_MODELS - language = self._settings["language"] + language = self._settings.language if model in ELEVENLABS_MULTILINGUAL_MODELS and language is not None: url += f"&language_code={language}" logger.debug(f"Using language code: {language}") @@ -809,20 +902,22 @@ class ElevenLabsHttpTTSService(WordTTSService): self._params = params self._session = aiohttp_session - self._settings = { - "language": self.language_to_service_language(params.language) + self._settings: ElevenLabsHttpTTSSettings = ElevenLabsHttpTTSSettings( + model=model, + voice=voice_id, + language=self.language_to_service_language(params.language) if params.language else None, - "optimize_streaming_latency": params.optimize_streaming_latency, - "stability": params.stability, - "similarity_boost": params.similarity_boost, - "style": params.style, - "use_speaker_boost": params.use_speaker_boost, - "speed": params.speed, - "apply_text_normalization": params.apply_text_normalization, - } + optimize_streaming_latency=params.optimize_streaming_latency, + stability=params.stability, + similarity_boost=params.similarity_boost, + style=params.style, + use_speaker_boost=params.use_speaker_boost, + speed=params.speed, + apply_text_normalization=params.apply_text_normalization, + ) self.set_model_name(model) - self.set_voice(voice_id) + self._voice_id = voice_id self._output_format = "" # initialized in start() self._voice_settings = self._set_voice_settings() self._pronunciation_dictionary_locators = params.pronunciation_dictionary_locators @@ -859,10 +954,19 @@ class ElevenLabsHttpTTSService(WordTTSService): def _set_voice_settings(self): return build_elevenlabs_voice_settings(self._settings) - async def _update_settings(self, settings: Mapping[str, Any]): - await super()._update_settings(settings) - # Update voice settings for the next context creation - self._voice_settings = self._set_voice_settings() + async def _update_settings_from_typed(self, update: TTSSettings) -> set[str]: + """Apply a typed settings update and rebuild voice settings. + + Args: + update: A :class:`TTSSettings` (or ``ElevenLabsHttpTTSSettings``) delta. + + Returns: + Set of field names whose values actually changed. + """ + changed = await super()._update_settings_from_typed(update) + if changed: + self._voice_settings = self._set_voice_settings() + return changed def _reset_state(self): """Reset internal state variables.""" @@ -999,10 +1103,13 @@ class ElevenLabsHttpTTSService(WordTTSService): locator.model_dump() for locator in self._pronunciation_dictionary_locators ] - if self._settings["apply_text_normalization"] is not None: - payload["apply_text_normalization"] = self._settings["apply_text_normalization"] + if ( + is_given(self._settings.apply_text_normalization) + and self._settings.apply_text_normalization is not None + ): + payload["apply_text_normalization"] = self._settings.apply_text_normalization - language = self._settings["language"] + language = self._settings.language if self._model_name in ELEVENLABS_MULTILINGUAL_MODELS and language: payload["language_code"] = language logger.debug(f"Using language code: {language}") @@ -1020,8 +1127,11 @@ class ElevenLabsHttpTTSService(WordTTSService): params = { "output_format": self._output_format, } - if self._settings["optimize_streaming_latency"] is not None: - params["optimize_streaming_latency"] = self._settings["optimize_streaming_latency"] + if ( + is_given(self._settings.optimize_streaming_latency) + and self._settings.optimize_streaming_latency is not None + ): + params["optimize_streaming_latency"] = self._settings.optimize_streaming_latency try: await self.start_ttfb_metrics() diff --git a/src/pipecat/services/fal/stt.py b/src/pipecat/services/fal/stt.py index 4e8a655ec..eef0e0487 100644 --- a/src/pipecat/services/fal/stt.py +++ b/src/pipecat/services/fal/stt.py @@ -11,12 +11,14 @@ transcription using segmented audio processing. """ import os +from dataclasses import dataclass, field from typing import AsyncGenerator, Optional from loguru import logger from pydantic import BaseModel from pipecat.frames.frames import ErrorFrame, Frame, TranscriptionFrame +from pipecat.services.settings import NOT_GIVEN, STTSettings from pipecat.services.stt_latency import FAL_TTFS_P99 from pipecat.services.stt_service import SegmentedSTTService from pipecat.transcriptions.language import Language, resolve_language @@ -146,6 +148,22 @@ def language_to_fal_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=True) +@dataclass +class FalSTTSettings(STTSettings): + """Typed settings for the Fal Wizper STT service. + + Parameters: + task: Task to perform ('transcribe' or 'translate'). Defaults to + 'transcribe'. + chunk_level: Level of chunking ('segment'). Defaults to 'segment'. + version: Version of Wizper model to use. Defaults to '3'. + """ + + task: str = field(default_factory=lambda: NOT_GIVEN) + chunk_level: str = field(default_factory=lambda: NOT_GIVEN) + version: str = field(default_factory=lambda: NOT_GIVEN) + + class FalSTTService(SegmentedSTTService): """Speech-to-text service using Fal's Wizper API. @@ -203,14 +221,14 @@ class FalSTTService(SegmentedSTTService): ) self._fal_client = fal_client.AsyncClient(key=api_key or os.getenv("FAL_KEY")) - self._settings = { - "task": params.task, - "language": self.language_to_service_language(params.language) + self._settings: FalSTTSettings = FalSTTSettings( + language=self.language_to_service_language(params.language) if params.language else "en", - "chunk_level": params.chunk_level, - "version": params.version, - } + task=params.task, + chunk_level=params.chunk_level, + version=params.version, + ) def can_generate_metrics(self) -> bool: """Check if the service can generate processing metrics. @@ -231,23 +249,17 @@ class FalSTTService(SegmentedSTTService): """ return language_to_fal_language(language) - async def set_language(self, language: Language): - """Set the transcription language. + async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: + """Apply a typed settings update, converting language if changed.""" + changed = await super()._update_settings_from_typed(update) - Args: - language: The language to use for speech-to-text transcription. - """ - logger.info(f"Switching STT language to: [{language}]") - self._settings["language"] = self.language_to_service_language(language) + if "language" in changed: + # Convert the Language enum to a Fal language code. + lang = self._settings.language + if isinstance(lang, Language): + self._settings.language = self.language_to_service_language(lang) - async def set_model(self, model: str): - """Set the STT model. - - Args: - model: The model name to use for transcription. - """ - await super().set_model(model) - logger.info(f"Switching STT model to: [{model}]") + return changed @traced_stt async def _handle_transcription( @@ -276,19 +288,19 @@ class FalSTTService(SegmentedSTTService): data_uri = fal_client.encode(audio, "audio/x-wav") response = await self._fal_client.run( "fal-ai/wizper", - arguments={"audio_url": data_uri, **self._settings}, + arguments={"audio_url": data_uri, **self._settings.given_fields()}, ) if response and "text" in response: text = response["text"].strip() if text: # Only yield non-empty text - await self._handle_transcription(text, True, self._settings["language"]) + await self._handle_transcription(text, True, self._settings.language) logger.debug(f"Transcription: [{text}]") yield TranscriptionFrame( text, self._user_id, time_now_iso8601(), - Language(self._settings["language"]), + Language(self._settings.language), result=response, ) diff --git a/src/pipecat/services/fireworks/llm.py b/src/pipecat/services/fireworks/llm.py index d7bf57908..9338d8c5a 100644 --- a/src/pipecat/services/fireworks/llm.py +++ b/src/pipecat/services/fireworks/llm.py @@ -68,15 +68,15 @@ class FireworksLLMService(OpenAILLMService): params = { "model": self.model_name, "stream": True, - "frequency_penalty": self._settings["frequency_penalty"], - "presence_penalty": self._settings["presence_penalty"], - "temperature": self._settings["temperature"], - "top_p": self._settings["top_p"], - "max_tokens": self._settings["max_tokens"], + "frequency_penalty": self._settings.frequency_penalty, + "presence_penalty": self._settings.presence_penalty, + "temperature": self._settings.temperature, + "top_p": self._settings.top_p, + "max_tokens": self._settings.max_tokens, } # Messages, tools, tool_choice params.update(params_from_context) - params.update(self._settings["extra"]) + params.update(self._settings.extra) return params diff --git a/src/pipecat/services/fish/tts.py b/src/pipecat/services/fish/tts.py index 93a718429..5517758ad 100644 --- a/src/pipecat/services/fish/tts.py +++ b/src/pipecat/services/fish/tts.py @@ -11,6 +11,7 @@ for streaming text-to-speech synthesis with customizable voice parameters. """ import uuid +from dataclasses import dataclass, field from typing import AsyncGenerator, Literal, Optional from loguru import logger @@ -28,6 +29,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, TTSSettings from pipecat.services.tts_service import InterruptibleTTSService from pipecat.transcriptions.language import Language from pipecat.utils.tracing.service_decorators import traced_tts @@ -45,6 +47,29 @@ except ModuleNotFoundError as e: FishAudioOutputFormat = Literal["opus", "mp3", "pcm", "wav"] +@dataclass +class FishAudioTTSSettings(TTSSettings): + """Typed settings for Fish Audio TTS service. + + Parameters: + fish_sample_rate: Audio sample rate sent to the API. + latency: Latency mode ("normal" or "balanced"). Defaults to "normal". + format: Audio output format. + normalize: Whether to normalize audio output. Defaults to True. + prosody_speed: Speech speed multiplier (0.5-2.0). Defaults to 1.0. + prosody_volume: Volume adjustment in dB. Defaults to 0. + reference_id: Reference ID of the voice model. + """ + + fish_sample_rate: int = field(default_factory=lambda: NOT_GIVEN) + latency: str = field(default_factory=lambda: NOT_GIVEN) + format: str = field(default_factory=lambda: NOT_GIVEN) + normalize: bool = field(default_factory=lambda: NOT_GIVEN) + prosody_speed: float = field(default_factory=lambda: NOT_GIVEN) + prosody_volume: int = field(default_factory=lambda: NOT_GIVEN) + reference_id: str = field(default_factory=lambda: NOT_GIVEN) + + class FishAudioTTSService(InterruptibleTTSService): """Fish Audio text-to-speech service with WebSocket streaming. @@ -136,17 +161,16 @@ class FishAudioTTSService(InterruptibleTTSService): self._receive_task = None self._request_id = None - self._settings = { - "sample_rate": 0, - "latency": params.latency, - "format": output_format, - "normalize": params.normalize, - "prosody": { - "speed": params.prosody_speed, - "volume": params.prosody_volume, - }, - "reference_id": reference_id, - } + self._settings: FishAudioTTSSettings = FishAudioTTSSettings( + voice=reference_id, + fish_sample_rate=0, + latency=params.latency, + format=output_format, + normalize=params.normalize, + prosody_speed=params.prosody_speed, + prosody_volume=params.prosody_volume, + reference_id=reference_id, + ) self.set_model_name(model_id) @@ -158,16 +182,22 @@ class FishAudioTTSService(InterruptibleTTSService): """ return True - async def set_model(self, model: str): - """Set the TTS model and reconnect. + async def _update_settings_from_typed(self, update: TTSSettings) -> set[str]: + """Apply a typed settings update and reconnect if needed. + + Any change to voice or model triggers a WebSocket reconnect. Args: - model: The model name to use for synthesis. + update: A :class:`TTSSettings` (or ``FishAudioTTSSettings``) delta. + + Returns: + Set of field names whose values actually changed. """ - await super().set_model(model) - logger.info(f"Switching TTS model to: [{model}]") - await self._disconnect() - await self._connect() + changed = await super()._update_settings_from_typed(update) + if changed: + await self._disconnect() + await self._connect() + return changed async def start(self, frame: StartFrame): """Start the Fish Audio TTS service. @@ -176,7 +206,7 @@ class FishAudioTTSService(InterruptibleTTSService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings["sample_rate"] = self.sample_rate + self._settings.fish_sample_rate = self.sample_rate await self._connect() async def stop(self, frame: EndFrame): @@ -225,7 +255,18 @@ class FishAudioTTSService(InterruptibleTTSService): self._websocket = await websocket_connect(self._base_url, additional_headers=headers) # Send initial start message with ormsgpack - start_message = {"event": "start", "request": {"text": "", **self._settings}} + request_settings = { + "sample_rate": self._settings.fish_sample_rate, + "latency": self._settings.latency, + "format": self._settings.format, + "normalize": self._settings.normalize, + "prosody": { + "speed": self._settings.prosody_speed, + "volume": self._settings.prosody_volume, + }, + "reference_id": self._settings.reference_id, + } + start_message = {"event": "start", "request": {"text": "", **request_settings}} await self._websocket.send(ormsgpack.packb(start_message)) logger.debug("Sent start message to Fish Audio") diff --git a/src/pipecat/services/gladia/stt.py b/src/pipecat/services/gladia/stt.py index 475a7213e..76a1620e1 100644 --- a/src/pipecat/services/gladia/stt.py +++ b/src/pipecat/services/gladia/stt.py @@ -14,6 +14,7 @@ import asyncio import base64 import json import warnings +from dataclasses import dataclass, field from typing import Any, AsyncGenerator, Dict, Literal, Optional import aiohttp @@ -32,6 +33,7 @@ from pipecat.frames.frames import ( UserStoppedSpeakingFrame, ) from pipecat.services.gladia.config import GladiaInputParams +from pipecat.services.settings import NOT_GIVEN, STTSettings from pipecat.services.stt_latency import GLADIA_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.transcriptions.language import Language, resolve_language @@ -178,6 +180,17 @@ class _InputParamsDescriptor: return GladiaInputParams +@dataclass +class GladiaSTTSettings(STTSettings): + """Typed settings for Gladia STT service. + + Parameters: + input_params: Gladia ``GladiaInputParams`` for detailed configuration. + """ + + input_params: GladiaInputParams = field(default_factory=lambda: NOT_GIVEN) + + class GladiaSTTService(WebsocketSTTService): """Speech-to-Text service using Gladia's API. @@ -265,9 +278,8 @@ class GladiaSTTService(WebsocketSTTService): self._region = region self._url = url self.set_model_name(model) - self._params = params self._receive_task = None - self._settings = {} + self._settings = GladiaSTTSettings(model=model, input_params=params) # Session management self._session_url = None @@ -307,31 +319,33 @@ class GladiaSTTService(WebsocketSTTService): return language_to_gladia_language(language) def _prepare_settings(self) -> Dict[str, Any]: + params = self._settings.input_params + settings = { - "encoding": self._params.encoding or "wav/pcm", - "bit_depth": self._params.bit_depth or 16, + "encoding": params.encoding or "wav/pcm", + "bit_depth": params.bit_depth or 16, "sample_rate": self.sample_rate, - "channels": self._params.channels or 1, + "channels": params.channels or 1, "model": self._model_name, } # Add custom_metadata if provided - settings["custom_metadata"] = dict(self._params.custom_metadata or {}) + settings["custom_metadata"] = dict(params.custom_metadata or {}) settings["custom_metadata"]["pipecat"] = pipecat_version() # Add endpointing parameters if provided - if self._params.endpointing is not None: - settings["endpointing"] = self._params.endpointing - if self._params.maximum_duration_without_endpointing is not None: + if params.endpointing is not None: + settings["endpointing"] = params.endpointing + if params.maximum_duration_without_endpointing is not None: settings["maximum_duration_without_endpointing"] = ( - self._params.maximum_duration_without_endpointing + params.maximum_duration_without_endpointing ) # Add language configuration (prioritize language_config over deprecated language) - if self._params.language_config: - settings["language_config"] = self._params.language_config.model_dump(exclude_none=True) - elif self._params.language: # Backward compatibility for deprecated parameter - language_code = self.language_to_service_language(self._params.language) + if params.language_config: + settings["language_config"] = params.language_config.model_dump(exclude_none=True) + elif params.language: # Backward compatibility for deprecated parameter + language_code = self.language_to_service_language(params.language) if language_code: settings["language_config"] = { "languages": [language_code], @@ -339,21 +353,18 @@ class GladiaSTTService(WebsocketSTTService): } # Add pre_processing configuration if provided - if self._params.pre_processing: - settings["pre_processing"] = self._params.pre_processing.model_dump(exclude_none=True) + if params.pre_processing: + settings["pre_processing"] = params.pre_processing.model_dump(exclude_none=True) # Add realtime_processing configuration if provided - if self._params.realtime_processing: - settings["realtime_processing"] = self._params.realtime_processing.model_dump( + if params.realtime_processing: + settings["realtime_processing"] = params.realtime_processing.model_dump( exclude_none=True ) # Add messages_config if provided - if self._params.messages_config: - settings["messages_config"] = self._params.messages_config.model_dump(exclude_none=True) - - # Store settings for tracing - self._settings = settings + if params.messages_config: + settings["messages_config"] = params.messages_config.model_dump(exclude_none=True) return settings @@ -366,6 +377,31 @@ class GladiaSTTService(WebsocketSTTService): await super().start(frame) await self._connect() + async def _update_settings_from_typed(self, update: GladiaSTTSettings) -> set[str]: + """Apply typed settings update. + + Gladia sessions are fixed at creation time, so any change requires + a full session teardown and reconnect. + + Args: + update: A typed settings delta. + + Returns: + Set of field names whose values actually changed. + """ + changed = await super()._update_settings_from_typed(update) + + if not changed: + return changed + + # Gladia sessions are fixed — need to tear down and recreate + self._session_url = None + self._session_id = None + await self._disconnect() + await self._connect() + + return changed + async def stop(self, frame: EndFrame): """Stop the Gladia STT websocket connection. @@ -522,7 +558,7 @@ class GladiaSTTService(WebsocketSTTService): Broadcasts UserStartedSpeakingFrame and optionally triggers interruption when VAD is enabled. """ - if not self._params.enable_vad or self._is_speaking: + if not self._settings.input_params.enable_vad or self._is_speaking: return logger.debug(f"{self} User started speaking") @@ -537,7 +573,7 @@ class GladiaSTTService(WebsocketSTTService): Broadcasts UserStoppedSpeakingFrame when VAD is enabled. """ - if not self._params.enable_vad or not self._is_speaking: + if not self._settings.input_params.enable_vad or not self._is_speaking: return self._is_speaking = False await self.broadcast_frame(UserStoppedSpeakingFrame) diff --git a/src/pipecat/services/google/gemini_live/llm.py b/src/pipecat/services/google/gemini_live/llm.py index e209f3d0a..1edab5783 100644 --- a/src/pipecat/services/google/gemini_live/llm.py +++ b/src/pipecat/services/google/gemini_live/llm.py @@ -17,9 +17,9 @@ import io import time import uuid import warnings -from dataclasses import dataclass +from dataclasses import dataclass, field from enum import Enum -from typing import Any, Dict, List, Optional, Union +from typing import Any, ClassVar, Dict, List, Optional, Union from loguru import logger from PIL import Image @@ -47,7 +47,6 @@ from pipecat.frames.frames import ( LLMThoughtEndFrame, LLMThoughtStartFrame, LLMThoughtTextFrame, - LLMUpdateSettingsFrame, StartFrame, TranscriptionFrame, TTSAudioRawFrame, @@ -77,6 +76,7 @@ from pipecat.services.openai.llm import ( OpenAIAssistantContextAggregator, OpenAIUserContextAggregator, ) +from pipecat.services.settings import NOT_GIVEN, LLMSettings from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.string import match_endofsentence from pipecat.utils.time import time_now_iso8601 @@ -602,6 +602,31 @@ class InputParams(BaseModel): extra: Optional[Dict[str, Any]] = Field(default_factory=dict) +@dataclass +class GeminiLiveLLMSettings(LLMSettings): + """Typed settings for Gemini Live LLM services. + + Parameters: + modalities: Response modalities. + language: Language for generation. + media_resolution: Media resolution setting. + vad: Voice activity detection parameters. + context_window_compression: Context window compression configuration. + thinking: Thinking configuration. + enable_affective_dialog: Whether to enable affective dialog. + proactivity: Proactivity configuration. + """ + + modalities: Any = field(default_factory=lambda: NOT_GIVEN) + language: Any = field(default_factory=lambda: NOT_GIVEN) + media_resolution: Any = field(default_factory=lambda: NOT_GIVEN) + vad: Any = field(default_factory=lambda: NOT_GIVEN) + context_window_compression: Any = field(default_factory=lambda: NOT_GIVEN) + thinking: Any = field(default_factory=lambda: NOT_GIVEN) + enable_affective_dialog: Any = field(default_factory=lambda: NOT_GIVEN) + proactivity: Any = field(default_factory=lambda: NOT_GIVEN) + + class GeminiLiveLLMService(LLMService): """Provides access to Google's Gemini Live API. @@ -714,25 +739,26 @@ class GeminiLiveLLMService(LLMService): self._consecutive_failures = 0 self._connection_start_time = None - self._settings = { - "frequency_penalty": params.frequency_penalty, - "max_tokens": params.max_tokens, - "presence_penalty": params.presence_penalty, - "temperature": params.temperature, - "top_k": params.top_k, - "top_p": params.top_p, - "modalities": params.modalities, - "language": self._language_code, - "media_resolution": params.media_resolution, - "vad": params.vad, - "context_window_compression": params.context_window_compression.model_dump() + self._settings = GeminiLiveLLMSettings( + model=model, + frequency_penalty=params.frequency_penalty, + max_tokens=params.max_tokens, + presence_penalty=params.presence_penalty, + temperature=params.temperature, + top_k=params.top_k, + top_p=params.top_p, + modalities=params.modalities, + language=self._language_code, + media_resolution=params.media_resolution, + vad=params.vad, + context_window_compression=params.context_window_compression.model_dump() if params.context_window_compression else {}, - "thinking": params.thinking or {}, - "enable_affective_dialog": params.enable_affective_dialog or False, - "proactivity": params.proactivity or {}, - "extra": params.extra if isinstance(params.extra, dict) else {}, - } + thinking=params.thinking or {}, + enable_affective_dialog=params.enable_affective_dialog or False, + proactivity=params.proactivity or {}, + extra=params.extra if isinstance(params.extra, dict) else {}, + ) self._file_api_base_url = file_api_base_url self._file_api: Optional[GeminiFileAPI] = None @@ -798,7 +824,7 @@ class GeminiLiveLLMService(LLMService): Args: modalities: The modalities to use for responses. """ - self._settings["modalities"] = modalities + self._settings.modalities = modalities def set_language(self, language: Language): """Set the language for generation. @@ -808,7 +834,7 @@ class GeminiLiveLLMService(LLMService): """ self._language = language self._language_code = language_to_gemini_language(language) or "en-US" - self._settings["language"] = self._language_code + self._settings.language = self._language_code logger.info(f"Set Gemini language to: {self._language_code}") async def set_context(self, context: OpenAILLMContext): @@ -866,7 +892,7 @@ class GeminiLiveLLMService(LLMService): async def _handle_interruption(self): if self._bot_is_responding: await self._set_bot_is_responding(False) - if self._settings.get("modalities") == GeminiModalities.AUDIO: + if self._settings.modalities == GeminiModalities.AUDIO: await self.push_frame(TTSStoppedFrame()) # Do not send LLMFullResponseEndFrame here - an interruption # already tells the assistant context aggregator that the response @@ -947,8 +973,6 @@ class GeminiLiveLLMService(LLMService): # uses this frame *without* a user context aggregator still works # (we have an example that does just that, actually). await self._create_single_response(frame.messages) - elif isinstance(frame, LLMUpdateSettingsFrame): - await self._update_settings(frame.settings) elif isinstance(frame, LLMSetToolsFrame): await self._update_settings() else: @@ -1074,20 +1098,20 @@ class GeminiLiveLLMService(LLMService): # Assemble basic configuration config = LiveConnectConfig( generation_config=GenerationConfig( - frequency_penalty=self._settings["frequency_penalty"], - max_output_tokens=self._settings["max_tokens"], - presence_penalty=self._settings["presence_penalty"], - temperature=self._settings["temperature"], - top_k=self._settings["top_k"], - top_p=self._settings["top_p"], - response_modalities=[Modality(self._settings["modalities"].value)], + frequency_penalty=self._settings.frequency_penalty, + max_output_tokens=self._settings.max_tokens, + presence_penalty=self._settings.presence_penalty, + temperature=self._settings.temperature, + top_k=self._settings.top_k, + top_p=self._settings.top_p, + response_modalities=[Modality(self._settings.modalities.value)], speech_config=SpeechConfig( voice_config=VoiceConfig( prebuilt_voice_config={"voice_name": self._voice_id} ), - language_code=self._settings["language"], + language_code=self._settings.language, ), - media_resolution=MediaResolution(self._settings["media_resolution"].value), + media_resolution=MediaResolution(self._settings.media_resolution.value), ), input_audio_transcription=AudioTranscriptionConfig(), output_audio_transcription=AudioTranscriptionConfig(), @@ -1095,37 +1119,36 @@ class GeminiLiveLLMService(LLMService): ) # Add context window compression to configuration, if enabled - if self._settings.get("context_window_compression", {}).get("enabled", False): + cwc = self._settings.context_window_compression or {} + if cwc.get("enabled", False): compression_config = ContextWindowCompressionConfig() # Add sliding window (always true if compression is enabled) compression_config.sliding_window = SlidingWindow() # Add trigger_tokens if specified - trigger_tokens = self._settings.get("context_window_compression", {}).get( - "trigger_tokens" - ) + trigger_tokens = cwc.get("trigger_tokens") if trigger_tokens is not None: compression_config.trigger_tokens = trigger_tokens config.context_window_compression = compression_config # Add thinking configuration to configuration, if provided - if self._settings.get("thinking"): - config.thinking_config = self._settings["thinking"] + if self._settings.thinking: + config.thinking_config = self._settings.thinking # Add affective dialog setting, if provided - if self._settings.get("enable_affective_dialog", False): - config.enable_affective_dialog = self._settings["enable_affective_dialog"] + if self._settings.enable_affective_dialog: + config.enable_affective_dialog = self._settings.enable_affective_dialog # Add proactivity configuration to configuration, if provided - if self._settings.get("proactivity"): - config.proactivity = self._settings["proactivity"] + if self._settings.proactivity: + config.proactivity = self._settings.proactivity # Add VAD configuration to configuration, if provided - if self._settings.get("vad"): + if self._settings.vad: vad_config = AutomaticActivityDetection() - vad_params = self._settings["vad"] + vad_params = self._settings.vad has_vad_settings = False # Only add parameters that are explicitly set @@ -1604,7 +1627,7 @@ class GeminiLiveLLMService(LLMService): text: The transcription text to push result: Optional LiveServerMessage that triggered this transcription """ - await self._handle_user_transcription(text, True, self._settings["language"]) + await self._handle_user_transcription(text, True, self._settings.language) await self.push_frame( TranscriptionFrame( text=text, diff --git a/src/pipecat/services/google/llm.py b/src/pipecat/services/google/llm.py index 563acadb3..692106241 100644 --- a/src/pipecat/services/google/llm.py +++ b/src/pipecat/services/google/llm.py @@ -15,8 +15,8 @@ import io import json import os import uuid -from dataclasses import dataclass -from typing import Any, AsyncIterator, Dict, List, Literal, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncIterator, ClassVar, Dict, List, Literal, Optional from loguru import logger from PIL import Image @@ -39,7 +39,6 @@ from pipecat.frames.frames import ( LLMThoughtEndFrame, LLMThoughtStartFrame, LLMThoughtTextFrame, - LLMUpdateSettingsFrame, ) from pipecat.metrics.metrics import LLMTokenUsage from pipecat.processors.aggregators.llm_context import LLMContext @@ -59,6 +58,7 @@ from pipecat.services.openai.llm import ( OpenAIAssistantContextAggregator, OpenAIUserContextAggregator, ) +from pipecat.services.settings import NOT_GIVEN, LLMSettings from pipecat.utils.tracing.service_decorators import traced_llm # Suppress gRPC fork warnings @@ -673,6 +673,17 @@ class GoogleLLMContext(OpenAILLMContext): self._messages = [m for m in self._messages if m.parts] +@dataclass +class GoogleLLMSettings(LLMSettings): + """Typed settings for Google LLM services. + + Parameters: + thinking: Thinking configuration. + """ + + thinking: Any = field(default_factory=lambda: NOT_GIVEN) + + class GoogleLLMService(LLMService): """Google AI (Gemini) LLM service implementation. @@ -773,14 +784,15 @@ class GoogleLLMService(LLMService): self._system_instruction = system_instruction self._http_options = update_google_client_http_options(http_options) - self._settings = { - "max_tokens": params.max_tokens, - "temperature": params.temperature, - "top_k": params.top_k, - "top_p": params.top_p, - "thinking": params.thinking, - "extra": params.extra if isinstance(params.extra, dict) else {}, - } + self._settings = GoogleLLMSettings( + model=model, + max_tokens=params.max_tokens, + temperature=params.temperature, + top_k=params.top_k, + top_p=params.top_p, + thinking=params.thinking, + extra=params.extra if isinstance(params.extra, dict) else {}, + ) self._tools = tools self._tool_config = tool_config @@ -874,10 +886,10 @@ class GoogleLLMService(LLMService): k: v for k, v in { "system_instruction": system_instruction, - "temperature": self._settings["temperature"], - "top_p": self._settings["top_p"], - "top_k": self._settings["top_k"], - "max_output_tokens": self._settings["max_tokens"], + "temperature": self._settings.temperature, + "top_p": self._settings.top_p, + "top_k": self._settings.top_k, + "max_output_tokens": self._settings.max_tokens, "tools": tools, "tool_config": tool_config, }.items() @@ -885,13 +897,13 @@ class GoogleLLMService(LLMService): } # Add thinking parameters if configured - if self._settings["thinking"]: - generation_params["thinking_config"] = self._settings["thinking"].model_dump( + if self._settings.thinking: + generation_params["thinking_config"] = self._settings.thinking.model_dump( exclude_unset=True ) - if self._settings["extra"]: - generation_params.update(self._settings["extra"]) + if self._settings.extra: + generation_params.update(self._settings.extra) return generation_params @@ -1190,8 +1202,6 @@ class GoogleLLMService(LLMService): # NOTE: LLMMessagesFrame is deprecated, so we don't support the newer universal # LLMContext with it context = GoogleLLMContext(frame.messages) - elif isinstance(frame, LLMUpdateSettingsFrame): - await self._update_settings(frame.settings) else: await self.push_frame(frame, direction) diff --git a/src/pipecat/services/google/stt.py b/src/pipecat/services/google/stt.py index 23396b0b8..8f762da9d 100644 --- a/src/pipecat/services/google/stt.py +++ b/src/pipecat/services/google/stt.py @@ -15,13 +15,15 @@ import asyncio import json import os import time +import warnings +from dataclasses import dataclass, field from pipecat.utils.tracing.service_decorators import traced_stt # Suppress gRPC fork warnings os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "false" -from typing import AsyncGenerator, List, Optional, Union +from typing import Any, AsyncGenerator, List, Optional, Union from loguru import logger from pydantic import BaseModel, Field, field_validator @@ -34,6 +36,7 @@ from pipecat.frames.frames import ( StartFrame, TranscriptionFrame, ) +from pipecat.services.settings import NOT_GIVEN, STTSettings from pipecat.services.stt_latency import GOOGLE_TTFS_P99 from pipecat.services.stt_service import STTService from pipecat.transcriptions.language import Language, resolve_language @@ -355,6 +358,44 @@ def language_to_google_stt_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=False) +@dataclass +class GoogleSTTSettings(STTSettings): + """Typed settings for Google Cloud Speech-to-Text V2. + + Parameters: + languages: List of ``Language`` enums for recognition + (e.g. ``[Language.EN_US]``). Preferred over ``language_codes``. + language_codes: List of Google STT language code strings + (e.g. ``["en-US"]``). + + .. deprecated:: 0.0.103 + Use ``languages`` instead. If both are provided, ``languages`` + takes precedence. This field is here just for backward + compatibility with dict-based settings updates. + use_separate_recognition_per_channel: Process each audio channel separately. + enable_automatic_punctuation: Add punctuation to transcripts. + enable_spoken_punctuation: Include spoken punctuation in transcript. + enable_spoken_emojis: Include spoken emojis in transcript. + profanity_filter: Filter profanity from transcript. + enable_word_time_offsets: Include timing information for each word. + enable_word_confidence: Include confidence scores for each word. + enable_interim_results: Stream partial recognition results. + enable_voice_activity_events: Detect voice activity in audio. + """ + + languages: Any = field(default_factory=lambda: NOT_GIVEN) + language_codes: Any = field(default_factory=lambda: NOT_GIVEN) + use_separate_recognition_per_channel: Any = field(default_factory=lambda: NOT_GIVEN) + enable_automatic_punctuation: Any = field(default_factory=lambda: NOT_GIVEN) + enable_spoken_punctuation: Any = field(default_factory=lambda: NOT_GIVEN) + enable_spoken_emojis: Any = field(default_factory=lambda: NOT_GIVEN) + profanity_filter: Any = field(default_factory=lambda: NOT_GIVEN) + enable_word_time_offsets: Any = field(default_factory=lambda: NOT_GIVEN) + enable_word_confidence: Any = field(default_factory=lambda: NOT_GIVEN) + enable_interim_results: Any = field(default_factory=lambda: NOT_GIVEN) + enable_voice_activity_events: Any = field(default_factory=lambda: NOT_GIVEN) + + class GoogleSTTService(STTService): """Google Cloud Speech-to-Text V2 service implementation. @@ -508,21 +549,19 @@ class GoogleSTTService(STTService): self._client = speech_v2.SpeechAsyncClient(credentials=creds, client_options=client_options) - self._settings = { - "language_codes": [ - self.language_to_service_language(lang) for lang in params.language_list - ], - "model": params.model, - "use_separate_recognition_per_channel": params.use_separate_recognition_per_channel, - "enable_automatic_punctuation": params.enable_automatic_punctuation, - "enable_spoken_punctuation": params.enable_spoken_punctuation, - "enable_spoken_emojis": params.enable_spoken_emojis, - "profanity_filter": params.profanity_filter, - "enable_word_time_offsets": params.enable_word_time_offsets, - "enable_word_confidence": params.enable_word_confidence, - "enable_interim_results": params.enable_interim_results, - "enable_voice_activity_events": params.enable_voice_activity_events, - } + self._settings = GoogleSTTSettings( + languages=list(params.language_list), + model=params.model, + use_separate_recognition_per_channel=params.use_separate_recognition_per_channel, + enable_automatic_punctuation=params.enable_automatic_punctuation, + enable_spoken_punctuation=params.enable_spoken_punctuation, + enable_spoken_emojis=params.enable_spoken_emojis, + profanity_filter=params.profanity_filter, + enable_word_time_offsets=params.enable_word_time_offsets, + enable_word_confidence=params.enable_word_confidence, + enable_interim_results=params.enable_interim_results, + enable_voice_activity_events=params.enable_voice_activity_events, + ) def can_generate_metrics(self) -> bool: """Check if the service can generate metrics. @@ -545,6 +584,23 @@ class GoogleSTTService(STTService): return [language_to_google_stt_language(lang) or "en-US" for lang in language] return language_to_google_stt_language(language) or "en-US" + def _get_language_codes(self) -> List[str]: + """Resolve the current language settings to Google STT language code strings. + + Prefers ``languages`` (``Language`` enums) over the deprecated + ``language_codes`` (raw strings). Falls back to ``["en-US"]``. + + Returns: + List[str]: Google STT language code strings. + """ + from pipecat.services.settings import is_given + + if is_given(self._settings.languages): + return [self.language_to_service_language(lang) for lang in self._settings.languages] + if is_given(self._settings.language_codes): + return list(self._settings.language_codes) + return ["en-US"] + async def _reconnect_if_needed(self): """Reconnect the stream if it's currently active.""" if self._streaming_task: @@ -552,41 +608,65 @@ class GoogleSTTService(STTService): await self._disconnect() await self._connect() - async def set_language(self, language: Language): - """Update the service's recognition language. - - A convenience method for setting a single language. - - Args: - language: New language for recognition. - """ - logger.debug(f"Switching STT language to: {language}") - await self.set_languages([language]) - async def set_languages(self, languages: List[Language]): """Update the service's recognition languages. + .. deprecated:: + Use ``STTUpdateSettingsFrame`` with ``GoogleSTTSettings(languages=...)`` + instead. + Args: languages: List of languages for recognition. First language is primary. """ + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "set_languages() is deprecated. Use STTUpdateSettingsFrame with " + "GoogleSTTSettings(languages=...) instead.", + DeprecationWarning, + ) logger.debug(f"Switching STT languages to: {languages}") - self._settings["language_codes"] = [ - self.language_to_service_language(lang) for lang in languages - ] - # Recreate stream with new languages - await self._reconnect_if_needed() + await self._update_settings_from_typed(GoogleSTTSettings(languages=list(languages))) - async def set_model(self, model: str): - """Update the service's recognition model. + async def _update_settings_from_typed(self, update: GoogleSTTSettings) -> set[str]: + """Apply typed settings update and reconnect if anything changed. + + Handles ``language`` from base ``set_language`` by converting it to + ``languages``. Emits a deprecation warning if ``language_codes`` is + used. All other fields (model, boolean flags) are applied directly. + Reconnects the stream on any change. Args: - model: The new recognition model to use. + update: A typed settings delta. + + Returns: + Set of field names whose values actually changed. """ - logger.debug(f"Switching STT model to: {model}") - await super().set_model(model) - self._settings["model"] = model - # Recreate stream with new model - await self._reconnect_if_needed() + from pipecat.services.settings import is_given + + # If base set_language sent a Language value, convert to languages list + if is_given(update.language): + update.languages = [update.language] + # Clear language so the base class doesn't try to store it + update.language = NOT_GIVEN + + # Warn on deprecated language_codes usage + if is_given(update.language_codes): + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "GoogleSTTSettings.language_codes is deprecated. " + "Use GoogleSTTSettings.languages (List[Language]) instead.", + DeprecationWarning, + stacklevel=2, + ) + + changed = await super()._update_settings_from_typed(update) + + if changed: + await self._reconnect_if_needed() + + return changed async def start(self, frame: StartFrame): """Start the STT service and establish connection. @@ -632,6 +712,10 @@ class GoogleSTTService(STTService): ) -> None: """Update service options dynamically. + .. deprecated:: + Use ``STTUpdateSettingsFrame`` with ``GoogleSTTSettings(...)`` + instead. + Args: languages: New list of recognition languages. model: New recognition model. @@ -649,55 +733,42 @@ class GoogleSTTService(STTService): Changes that affect the streaming configuration will cause the stream to be reconnected. """ - # Update settings with new values + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "update_options() is deprecated. Use STTUpdateSettingsFrame with " + "GoogleSTTSettings(...) instead.", + DeprecationWarning, + ) + # Build a typed settings delta from the provided options + update = GoogleSTTSettings() + if languages is not None: - logger.debug(f"Updating language to: {languages}") - self._settings["language_codes"] = [ - self.language_to_service_language(lang) for lang in languages - ] - + update.languages = list(languages) if model is not None: - logger.debug(f"Updating model to: {model}") - self._settings["model"] = model - + update.model = model if enable_automatic_punctuation is not None: - logger.debug(f"Updating automatic punctuation to: {enable_automatic_punctuation}") - self._settings["enable_automatic_punctuation"] = enable_automatic_punctuation - + update.enable_automatic_punctuation = enable_automatic_punctuation if enable_spoken_punctuation is not None: - logger.debug(f"Updating spoken punctuation to: {enable_spoken_punctuation}") - self._settings["enable_spoken_punctuation"] = enable_spoken_punctuation - + update.enable_spoken_punctuation = enable_spoken_punctuation if enable_spoken_emojis is not None: - logger.debug(f"Updating spoken emojis to: {enable_spoken_emojis}") - self._settings["enable_spoken_emojis"] = enable_spoken_emojis - + update.enable_spoken_emojis = enable_spoken_emojis if profanity_filter is not None: - logger.debug(f"Updating profanity filter to: {profanity_filter}") - self._settings["profanity_filter"] = profanity_filter - + update.profanity_filter = profanity_filter if enable_word_time_offsets is not None: - logger.debug(f"Updating word time offsets to: {enable_word_time_offsets}") - self._settings["enable_word_time_offsets"] = enable_word_time_offsets - + update.enable_word_time_offsets = enable_word_time_offsets if enable_word_confidence is not None: - logger.debug(f"Updating word confidence to: {enable_word_confidence}") - self._settings["enable_word_confidence"] = enable_word_confidence - + update.enable_word_confidence = enable_word_confidence if enable_interim_results is not None: - logger.debug(f"Updating interim results to: {enable_interim_results}") - self._settings["enable_interim_results"] = enable_interim_results - + update.enable_interim_results = enable_interim_results if enable_voice_activity_events is not None: - logger.debug(f"Updating voice activity events to: {enable_voice_activity_events}") - self._settings["enable_voice_activity_events"] = enable_voice_activity_events + update.enable_voice_activity_events = enable_voice_activity_events if location is not None: logger.debug(f"Updating location to: {location}") self._location = location - # Reconnect the stream for updates - await self._reconnect_if_needed() + await self._update_settings_from_typed(update) async def _connect(self): """Initialize streaming recognition config and stream.""" @@ -714,20 +785,20 @@ class GoogleSTTService(STTService): sample_rate_hertz=self.sample_rate, audio_channel_count=1, ), - language_codes=self._settings["language_codes"], - model=self._settings["model"], + language_codes=self._get_language_codes(), + model=self._settings.model, features=cloud_speech.RecognitionFeatures( - enable_automatic_punctuation=self._settings["enable_automatic_punctuation"], - enable_spoken_punctuation=self._settings["enable_spoken_punctuation"], - enable_spoken_emojis=self._settings["enable_spoken_emojis"], - profanity_filter=self._settings["profanity_filter"], - enable_word_time_offsets=self._settings["enable_word_time_offsets"], - enable_word_confidence=self._settings["enable_word_confidence"], + enable_automatic_punctuation=self._settings.enable_automatic_punctuation, + enable_spoken_punctuation=self._settings.enable_spoken_punctuation, + enable_spoken_emojis=self._settings.enable_spoken_emojis, + profanity_filter=self._settings.profanity_filter, + enable_word_time_offsets=self._settings.enable_word_time_offsets, + enable_word_confidence=self._settings.enable_word_confidence, ), ), streaming_features=cloud_speech.StreamingRecognitionFeatures( - enable_voice_activity_events=self._settings["enable_voice_activity_events"], - interim_results=self._settings["enable_interim_results"], + enable_voice_activity_events=self._settings.enable_voice_activity_events, + interim_results=self._settings.enable_interim_results, ), ) @@ -857,7 +928,7 @@ class GoogleSTTService(STTService): if not transcript: continue - primary_language = self._settings["language_codes"][0] + primary_language = self._get_language_codes()[0] if result.is_final: self._last_transcript_was_final = True diff --git a/src/pipecat/services/google/tts.py b/src/pipecat/services/google/tts.py index 4016286df..d015571d0 100644 --- a/src/pipecat/services/google/tts.py +++ b/src/pipecat/services/google/tts.py @@ -23,7 +23,8 @@ from pipecat.utils.tracing.service_decorators import traced_tts # Suppress gRPC fork warnings os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "false" -from typing import Any, AsyncGenerator, List, Literal, Mapping, Optional +from dataclasses import dataclass, field +from typing import AsyncGenerator, List, Literal, Optional from loguru import logger from pydantic import BaseModel @@ -36,6 +37,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) +from pipecat.services.settings import NOT_GIVEN, TTSSettings, is_given from pipecat.services.tts_service import TTSService from pipecat.transcriptions.language import Language, resolve_language @@ -474,6 +476,63 @@ def language_to_gemini_tts_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=False) +@dataclass +class GoogleHttpTTSSettings(TTSSettings): + """Typed settings for Google HTTP TTS service. + + Parameters: + pitch: Voice pitch adjustment (e.g., "+2st", "-50%"). + rate: Speaking rate adjustment (e.g., "slow", "fast", "125%"). Used for + SSML prosody tags (non-Chirp voices). + speaking_rate: Speaking rate for AudioConfig (Chirp/Journey voices). + Range [0.25, 2.0]. + volume: Volume adjustment (e.g., "loud", "soft", "+6dB"). + emphasis: Emphasis level for the text. + language: Language for synthesis. Defaults to English. + gender: Voice gender preference. + google_style: Google-specific voice style. + """ + + pitch: str = field(default_factory=lambda: NOT_GIVEN) + rate: str = field(default_factory=lambda: NOT_GIVEN) + speaking_rate: float = field(default_factory=lambda: NOT_GIVEN) + volume: str = field(default_factory=lambda: NOT_GIVEN) + emphasis: str = field(default_factory=lambda: NOT_GIVEN) + language: str = field(default_factory=lambda: NOT_GIVEN) + gender: str = field(default_factory=lambda: NOT_GIVEN) + google_style: str = field(default_factory=lambda: NOT_GIVEN) + + +@dataclass +class GoogleStreamTTSSettings(TTSSettings): + """Typed settings for Google streaming TTS service. + + Parameters: + language: Language for synthesis. Defaults to English. + speaking_rate: The speaking rate, in the range [0.25, 2.0]. + """ + + language: str = field(default_factory=lambda: NOT_GIVEN) + speaking_rate: float = field(default_factory=lambda: NOT_GIVEN) + + +@dataclass +class GeminiTTSSettings(TTSSettings): + """Typed settings for Gemini TTS service. + + Parameters: + language: Language for synthesis. Defaults to English. + prompt: Optional style instructions for how to synthesize the content. + multi_speaker: Whether to enable multi-speaker support. + speaker_configs: List of speaker configurations for multi-speaker mode. + """ + + language: str = field(default_factory=lambda: NOT_GIVEN) + prompt: str = field(default_factory=lambda: NOT_GIVEN) + multi_speaker: bool = field(default_factory=lambda: NOT_GIVEN) + speaker_configs: List[dict] = field(default_factory=lambda: NOT_GIVEN) + + class GoogleHttpTTSService(TTSService): """Google Cloud Text-to-Speech HTTP service with SSML support. @@ -538,19 +597,19 @@ class GoogleHttpTTSService(TTSService): params = params or GoogleHttpTTSService.InputParams() self._location = location - self._settings = { - "pitch": params.pitch, - "rate": params.rate, - "speaking_rate": params.speaking_rate, - "volume": params.volume, - "emphasis": params.emphasis, - "language": self.language_to_service_language(params.language) + self._settings: GoogleHttpTTSSettings = GoogleHttpTTSSettings( + pitch=params.pitch, + rate=params.rate, + speaking_rate=params.speaking_rate, + volume=params.volume, + emphasis=params.emphasis, + language=self.language_to_service_language(params.language) if params.language else "en-US", - "gender": params.gender, - "google_style": params.google_style, - } - self.set_voice(voice_id) + gender=params.gender, + google_style=params.google_style, + ) + self._voice_id = voice_id self._client: texttospeech_v1.TextToSpeechAsyncClient = self._create_client( credentials, credentials_path ) @@ -619,21 +678,20 @@ class GoogleHttpTTSService(TTSService): """ return language_to_google_tts_language(language) - async def _update_settings(self, settings: Mapping[str, Any]): - """Override to handle speaking_rate updates for Chirp/Journey voices. + async def _update_settings_from_typed(self, update: TTSSettings) -> set[str]: + """Override to handle speaking_rate validation. Args: - settings: Dictionary of settings to update. Can include 'speaking_rate' (float) + update: Typed settings delta. Can include 'speaking_rate' (float). """ - if "speaking_rate" in settings: - rate_value = float(settings["speaking_rate"]) - if 0.25 <= rate_value <= 2.0: - self._settings["speaking_rate"] = rate_value - else: + if isinstance(update, GoogleHttpTTSSettings) and is_given(update.speaking_rate): + rate_value = float(update.speaking_rate) + if not (0.25 <= rate_value <= 2.0): logger.warning( f"Invalid speaking_rate value: {rate_value}. Must be between 0.25 and 2.0" ) - await super()._update_settings(settings) + update.speaking_rate = NOT_GIVEN + return await super()._update_settings_from_typed(update) def _construct_ssml(self, text: str) -> str: ssml = "" @@ -641,39 +699,39 @@ class GoogleHttpTTSService(TTSService): # Voice tag voice_attrs = [f"name='{self._voice_id}'"] - language = self._settings["language"] + language = self._settings.language voice_attrs.append(f"language='{language}'") - if self._settings["gender"]: - voice_attrs.append(f"gender='{self._settings['gender']}'") + if self._settings.gender: + voice_attrs.append(f"gender='{self._settings.gender}'") ssml += f"" # Prosody tag prosody_attrs = [] - if self._settings["pitch"]: - prosody_attrs.append(f"pitch='{self._settings['pitch']}'") - if self._settings["rate"]: - prosody_attrs.append(f"rate='{self._settings['rate']}'") - if self._settings["volume"]: - prosody_attrs.append(f"volume='{self._settings['volume']}'") + if self._settings.pitch: + prosody_attrs.append(f"pitch='{self._settings.pitch}'") + if self._settings.rate: + prosody_attrs.append(f"rate='{self._settings.rate}'") + if self._settings.volume: + prosody_attrs.append(f"volume='{self._settings.volume}'") if prosody_attrs: ssml += f"" # Emphasis tag - if self._settings["emphasis"]: - ssml += f"" + if self._settings.emphasis: + ssml += f"" # Google style tag - if self._settings["google_style"]: - ssml += f"" + if self._settings.google_style: + ssml += f"" ssml += text # Close tags - if self._settings["google_style"]: + if self._settings.google_style: ssml += "" - if self._settings["emphasis"]: + if self._settings.emphasis: ssml += "" if prosody_attrs: ssml += "" @@ -710,7 +768,7 @@ class GoogleHttpTTSService(TTSService): synthesis_input = texttospeech_v1.SynthesisInput(ssml=ssml) voice = texttospeech_v1.VoiceSelectionParams( - language_code=self._settings["language"], name=self._voice_id + language_code=self._settings.language, name=self._voice_id ) # Build audio config with conditional speaking_rate audio_config_params = { @@ -719,8 +777,8 @@ class GoogleHttpTTSService(TTSService): } # For Chirp and Journey voices, include speaking_rate in AudioConfig - if (is_chirp_voice or is_journey_voice) and self._settings["speaking_rate"] is not None: - audio_config_params["speaking_rate"] = self._settings["speaking_rate"] + if (is_chirp_voice or is_journey_voice) and self._settings.speaking_rate is not None: + audio_config_params["speaking_rate"] = self._settings.speaking_rate audio_config = texttospeech_v1.AudioConfig(**audio_config_params) @@ -950,33 +1008,32 @@ class GoogleTTSService(GoogleBaseTTSService): params = params or GoogleTTSService.InputParams() self._location = location - self._settings = { - "language": self.language_to_service_language(params.language) + self._settings: GoogleStreamTTSSettings = GoogleStreamTTSSettings( + language=self.language_to_service_language(params.language) if params.language else "en-US", - "speaking_rate": params.speaking_rate, - } - self.set_voice(voice_id) + speaking_rate=params.speaking_rate, + ) + self._voice_id = voice_id self._voice_cloning_key = voice_cloning_key self._client: texttospeech_v1.TextToSpeechAsyncClient = self._create_client( credentials, credentials_path ) - async def _update_settings(self, settings: Mapping[str, Any]): - """Override to handle speaking_rate updates for streaming API. + async def _update_settings_from_typed(self, update: TTSSettings) -> set[str]: + """Override to handle speaking_rate validation. Args: - settings: Dictionary of settings to update. Can include 'speaking_rate' (float) + update: Typed settings delta. Can include 'speaking_rate' (float). """ - if "speaking_rate" in settings: - rate_value = float(settings["speaking_rate"]) - if 0.25 <= rate_value <= 2.0: - self._settings["speaking_rate"] = rate_value - else: + if isinstance(update, GoogleStreamTTSSettings) and is_given(update.speaking_rate): + rate_value = float(update.speaking_rate) + if not (0.25 <= rate_value <= 2.0): logger.warning( f"Invalid speaking_rate value: {rate_value}. Must be between 0.25 and 2.0" ) - await super()._update_settings(settings) + update.speaking_rate = NOT_GIVEN + return await super()._update_settings_from_typed(update) @traced_tts async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: @@ -1000,11 +1057,11 @@ class GoogleTTSService(GoogleBaseTTSService): voice_cloning_key=self._voice_cloning_key ) voice = texttospeech_v1.VoiceSelectionParams( - language_code=self._settings["language"], voice_clone=voice_clone_params + language_code=self._settings.language, voice_clone=voice_clone_params ) else: voice = texttospeech_v1.VoiceSelectionParams( - language_code=self._settings["language"], name=self._voice_id + language_code=self._settings.language, name=self._voice_id ) # Create streaming config @@ -1013,7 +1070,7 @@ class GoogleTTSService(GoogleBaseTTSService): streaming_audio_config=texttospeech_v1.StreamingAudioConfig( audio_encoding=texttospeech_v1.AudioEncoding.PCM, sample_rate_hertz=self.sample_rate, - speaking_rate=self._settings["speaking_rate"], + speaking_rate=self._settings.speaking_rate, ), ) @@ -1159,14 +1216,14 @@ class GeminiTTSService(GoogleBaseTTSService): self._location = location self._model = model self._voice_id = voice_id - self._settings = { - "language": self.language_to_service_language(params.language) + self._settings: GeminiTTSSettings = GeminiTTSSettings( + language=self.language_to_service_language(params.language) if params.language else "en-US", - "prompt": params.prompt, - "multi_speaker": params.multi_speaker, - "speaker_configs": params.speaker_configs, - } + prompt=params.prompt, + multi_speaker=params.multi_speaker, + speaker_configs=params.speaker_configs, + ) self._client: texttospeech_v1.TextToSpeechAsyncClient = self._create_client( credentials, credentials_path @@ -1183,7 +1240,7 @@ class GeminiTTSService(GoogleBaseTTSService): """ return language_to_gemini_tts_language(language) - def set_voice(self, voice_id: str): + async def set_voice(self, voice_id: str): """Set the voice for TTS generation. Args: @@ -1206,15 +1263,13 @@ class GeminiTTSService(GoogleBaseTTSService): f"Current rate of {self.sample_rate}Hz may cause issues." ) - async def _update_settings(self, settings: Mapping[str, Any]): + async def _update_settings_from_typed(self, update: TTSSettings) -> set[str]: """Override to handle prompt updates. Args: - settings: Dictionary of settings to update. Can include 'prompt' (str) + update: Typed settings delta. Can include 'prompt' (str). """ - if "prompt" in settings: - self._settings["prompt"] = settings["prompt"] - await super()._update_settings(settings) + return await super()._update_settings_from_typed(update) @traced_tts async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: @@ -1234,10 +1289,10 @@ class GeminiTTSService(GoogleBaseTTSService): await self.start_ttfb_metrics() # Build voice selection params - if self._settings["multi_speaker"] and self._settings["speaker_configs"]: + if self._settings.multi_speaker and self._settings.speaker_configs: # Multi-speaker mode speaker_voice_configs = [] - for speaker_config in self._settings["speaker_configs"]: + for speaker_config in self._settings.speaker_configs: speaker_voice_configs.append( texttospeech_v1.MultispeakerPrebuiltVoice( speaker_alias=speaker_config["speaker_alias"], @@ -1250,14 +1305,14 @@ class GeminiTTSService(GoogleBaseTTSService): ) voice = texttospeech_v1.VoiceSelectionParams( - language_code=self._settings["language"], + language_code=self._settings.language, model_name=self._model, multi_speaker_voice_config=multi_speaker_voice_config, ) else: # Single speaker mode voice = texttospeech_v1.VoiceSelectionParams( - language_code=self._settings["language"], + language_code=self._settings.language, name=self._voice_id, model_name=self._model, ) @@ -1273,7 +1328,7 @@ class GeminiTTSService(GoogleBaseTTSService): # Use base class streaming logic with prompt support async for frame in self._stream_tts( - streaming_config, text, context_id, self._settings["prompt"] + streaming_config, text, context_id, self._settings.prompt ): yield frame diff --git a/src/pipecat/services/gradium/stt.py b/src/pipecat/services/gradium/stt.py index 7433c2549..2bad8cf30 100644 --- a/src/pipecat/services/gradium/stt.py +++ b/src/pipecat/services/gradium/stt.py @@ -12,6 +12,7 @@ WebSocket API for streaming audio transcription. import base64 import json +from dataclasses import dataclass, field from typing import AsyncGenerator, Optional from loguru import logger @@ -27,6 +28,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, STTSettings, is_given from pipecat.services.stt_latency import GRADIUM_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.transcriptions.language import Language, resolve_language @@ -64,6 +66,18 @@ def language_to_gradium_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=True) +@dataclass +class GradiumSTTSettings(STTSettings): + """Typed settings for the Gradium STT service. + + Parameters: + delay_in_frames: Delay in audio frames (80ms each) before text is + generated. Higher delays allow more context but increase latency. + """ + + delay_in_frames: int = field(default_factory=lambda: NOT_GIVEN) + + class GradiumSTTService(WebsocketSTTService): """Gradium real-time speech-to-text service. @@ -127,9 +141,15 @@ class GradiumSTTService(WebsocketSTTService): self._api_key = api_key self._api_endpoint_base_url = api_endpoint_base_url self._websocket = None - self._params = params or GradiumSTTService.InputParams() self._json_config = json_config + params = params or GradiumSTTService.InputParams() + + self._settings: GradiumSTTSettings = GradiumSTTSettings( + language=params.language, + delay_in_frames=params.delay_in_frames if params.delay_in_frames else NOT_GIVEN, + ) + self._receive_task = None self._audio_buffer = bytearray() @@ -149,16 +169,22 @@ class GradiumSTTService(WebsocketSTTService): """ return True - async def set_language(self, language: Language): - """Set the recognition language and reconnect. + async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: + """Apply a typed settings update, sync params, and reconnect. Args: - language: The language to use for speech recognition. + update: A :class:`STTSettings` (or ``GradiumSTTSettings``) delta. + + Returns: + Set of field names whose values actually changed. """ - logger.info(f"Switching STT language to: [{language}]") - self._params.language = language + changed = await super()._update_settings_from_typed(update) + if not changed: + return changed + await self._disconnect() await self._connect() + return changed async def start(self, frame: StartFrame): """Start the speech-to-text service. @@ -298,12 +324,12 @@ class GradiumSTTService(WebsocketSTTService): json_config = {} if self._json_config: json_config = json.loads(self._json_config) - if self._params.language: - gradium_language = language_to_gradium_language(self._params.language) + if is_given(self._settings.language) and self._settings.language: + gradium_language = language_to_gradium_language(self._settings.language) if gradium_language: json_config["language"] = gradium_language - if self._params.delay_in_frames: - json_config["delay_in_frames"] = self._params.delay_in_frames + if is_given(self._settings.delay_in_frames) and self._settings.delay_in_frames: + json_config["delay_in_frames"] = self._settings.delay_in_frames if json_config: setup_msg["json_config"] = json_config await self._websocket.send(json.dumps(setup_msg)) diff --git a/src/pipecat/services/gradium/tts.py b/src/pipecat/services/gradium/tts.py index 0e9865cf0..e129fba68 100644 --- a/src/pipecat/services/gradium/tts.py +++ b/src/pipecat/services/gradium/tts.py @@ -6,7 +6,8 @@ import base64 import json -from typing import Any, AsyncGenerator, Mapping, Optional +from dataclasses import dataclass, field +from typing import AsyncGenerator, Optional from loguru import logger from pydantic import BaseModel @@ -22,6 +23,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, TTSSettings from pipecat.services.tts_service import InterruptibleWordTTSService from pipecat.utils.tracing.service_decorators import traced_tts @@ -37,6 +39,17 @@ except ModuleNotFoundError as e: SAMPLE_RATE = 48000 +@dataclass +class GradiumTTSSettings(TTSSettings): + """Typed settings for the Gradium TTS service. + + Parameters: + output_format: Audio output format. + """ + + output_format: str = field(default_factory=lambda: NOT_GIVEN) + + class GradiumTTSService(InterruptibleWordTTSService): """Text-to-Speech service using Gradium's websocket API.""" @@ -86,12 +99,11 @@ class GradiumTTSService(InterruptibleWordTTSService): self._url = url self._voice_id = voice_id self._json_config = json_config - self._model = model - self._settings = { - "voice_id": voice_id, - "model_name": model, - "output_format": "pcm", - } + self._settings: GradiumTTSSettings = GradiumTTSSettings( + model=model, + voice=voice_id, + output_format="pcm", + ) # State tracking self._receive_task = None @@ -105,24 +117,21 @@ class GradiumTTSService(InterruptibleWordTTSService): """ return True - async def set_model(self, model: str): - """Update the TTS model. + async def _update_settings_from_typed(self, update: TTSSettings) -> set[str]: + """Apply a typed settings update and reconnect if voice changed. Args: - model: The model name to use for synthesis. - """ - self._model = model - await super().set_model(model) + update: A :class:`TTSSettings` (or ``GradiumTTSSettings``) delta. - async def _update_settings(self, settings: Mapping[str, Any]): - """Update service settings and reconnect if voice changed.""" + Returns: + Set of field names whose values actually changed. + """ prev_voice = self._voice_id - await super()._update_settings(settings) - if not prev_voice == self._voice_id: - self._settings["voice_id"] = self._voice_id - logger.info(f"Switching TTS voice to: [{self._voice_id}]") + changed = await super()._update_settings_from_typed(update) + if self._voice_id != prev_voice: await self._disconnect() await self._connect() + return changed def _build_msg(self, text: str = "") -> dict: """Build JSON message for Gradium API.""" diff --git a/src/pipecat/services/grok/realtime/llm.py b/src/pipecat/services/grok/realtime/llm.py index e1355ce31..7cb619a7d 100644 --- a/src/pipecat/services/grok/realtime/llm.py +++ b/src/pipecat/services/grok/realtime/llm.py @@ -13,8 +13,8 @@ https://docs.x.ai/docs/guides/voice/agent import base64 import json import time -from dataclasses import dataclass -from typing import Optional +from dataclasses import dataclass, field +from typing import Any, Optional from loguru import logger @@ -56,6 +56,7 @@ from pipecat.processors.aggregators.llm_response_universal import ( from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import FunctionCallFromLLM, LLMService +from pipecat.services.settings import NOT_GIVEN, LLMSettings from pipecat.utils.time import time_now_iso8601 from . import events @@ -85,6 +86,17 @@ class CurrentAudioResponse: total_size: int = 0 +@dataclass +class GrokRealtimeLLMSettings(LLMSettings): + """Typed settings for Grok Realtime LLM services. + + Parameters: + session_properties: Grok Realtime session configuration. + """ + + session_properties: Any = field(default_factory=lambda: NOT_GIVEN) + + class GrokRealtimeLLMService(LLMService): """Grok Realtime Voice Agent LLM service providing real-time audio and text communication. @@ -134,9 +146,8 @@ class GrokRealtimeLLMService(LLMService): self.api_key = api_key self.base_url = base_url - # Initialize session_properties - self._session_properties: events.SessionProperties = ( - session_properties or events.SessionProperties() + self._settings = GrokRealtimeLLMSettings( + session_properties=session_properties or events.SessionProperties(), ) self._audio_input_paused = start_audio_paused @@ -186,13 +197,13 @@ class GrokRealtimeLLMService(LLMService): Configured sample rate or None if not manually configured. For PCMU/PCMA formats, returns 8000 Hz (G.711 standard). """ - if not self._session_properties.audio: + if not self._settings.session_properties.audio: return None audio_config = ( - self._session_properties.audio.input + self._settings.session_properties.audio.input if direction == "input" - else self._session_properties.audio.output + else self._settings.session_properties.audio.output ) if audio_config and audio_config.format: @@ -222,8 +233,8 @@ class GrokRealtimeLLMService(LLMService): def _is_turn_detection_enabled(self) -> bool: """Check if server-side VAD is enabled.""" - if self._session_properties.turn_detection: - return self._session_properties.turn_detection.type == "server_vad" + if self._settings.session_properties.turn_detection: + return self._settings.session_properties.turn_detection.type == "server_vad" return False async def _handle_interruption(self): @@ -290,18 +301,18 @@ class GrokRealtimeLLMService(LLMService): await super().start(frame) # Ensure audio configuration exists with both input and output - if not self._session_properties.audio: - self._session_properties.audio = events.AudioConfiguration() + if not self._settings.session_properties.audio: + self._settings.session_properties.audio = events.AudioConfiguration() # Fill in missing input configuration - if not self._session_properties.audio.input: - self._session_properties.audio.input = events.AudioInput( + if not self._settings.session_properties.audio.input: + self._settings.session_properties.audio.input = events.AudioInput( format=events.PCMAudioFormat(rate=frame.audio_in_sample_rate) ) # Fill in missing output configuration - if not self._session_properties.audio.output: - self._session_properties.audio.output = events.AudioOutput( + if not self._settings.session_properties.audio.output: + self._settings.session_properties.audio.output = events.AudioOutput( format=events.PCMAudioFormat(rate=frame.audio_out_sample_rate) ) @@ -336,6 +347,16 @@ class GrokRealtimeLLMService(LLMService): frame: The frame to process. direction: The direction of frame flow in the pipeline. """ + # Legacy dict path: frame.settings contains SessionProperties fields, + # not our Settings fields, so we construct SessionProperties directly. + # The new typed path (frame.update) falls through to super, which calls + # _update_settings_from_typed → our override handles the rest. + if isinstance(frame, LLMUpdateSettingsFrame) and frame.update is None: + self._settings.session_properties = events.SessionProperties(**frame.settings) + await self._update_settings() + await self.push_frame(frame, direction) + return + await super().process_frame(frame, direction) if isinstance(frame, TranscriptionFrame): @@ -355,9 +376,6 @@ class GrokRealtimeLLMService(LLMService): await self._handle_bot_stopped_speaking() elif isinstance(frame, LLMMessagesAppendFrame): await self._handle_messages_append(frame) - elif isinstance(frame, LLMUpdateSettingsFrame): - self._session_properties = events.SessionProperties(**frame.settings) - await self._update_settings() elif isinstance(frame, LLMSetToolsFrame): await self._update_settings() @@ -436,9 +454,16 @@ class GrokRealtimeLLMService(LLMService): return await self.push_error(error_msg=f"Error sending client event: {e}", exception=e) + async def _update_settings_from_typed(self, update): + """Apply a typed settings update, sending a session update if needed.""" + changed = await super()._update_settings_from_typed(update) + if "session_properties" in changed: + await self._update_settings() + return changed + async def _update_settings(self): """Update session settings on the server.""" - settings = self._session_properties + settings = self._settings.session_properties adapter: GrokRealtimeLLMAdapter = self.get_llm_adapter() if self._context: diff --git a/src/pipecat/services/groq/tts.py b/src/pipecat/services/groq/tts.py index 331af8eb7..678a2426d 100644 --- a/src/pipecat/services/groq/tts.py +++ b/src/pipecat/services/groq/tts.py @@ -8,6 +8,7 @@ import io import wave +from dataclasses import dataclass, field from typing import AsyncGenerator, Optional from loguru import logger @@ -20,6 +21,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) +from pipecat.services.settings import NOT_GIVEN, TTSSettings from pipecat.services.tts_service import TTSService from pipecat.transcriptions.language import Language from pipecat.utils.tracing.service_decorators import traced_tts @@ -32,6 +34,21 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class GroqTTSSettings(TTSSettings): + """Typed settings for the Groq TTS service. + + Parameters: + output_format: Audio output format. + speed: Speech speed multiplier. Defaults to 1.0. + groq_sample_rate: Audio sample rate. + """ + + output_format: str = field(default_factory=lambda: NOT_GIVEN) + speed: float = field(default_factory=lambda: NOT_GIVEN) + groq_sample_rate: int = field(default_factory=lambda: NOT_GIVEN) + + class GroqTTSService(TTSService): """Groq text-to-speech service implementation. @@ -92,14 +109,14 @@ class GroqTTSService(TTSService): self._voice_id = voice_id self._params = params - self._settings = { - "model": model_name, - "voice_id": voice_id, - "output_format": output_format, - "language": str(params.language) if params.language else "en", - "speed": params.speed, - "sample_rate": sample_rate, - } + self._settings: GroqTTSSettings = GroqTTSSettings( + model=model_name, + voice=voice_id, + language=str(params.language) if params.language else "en", + output_format=output_format, + speed=params.speed, + groq_sample_rate=sample_rate, + ) self._client = AsyncGroq(api_key=self._api_key) diff --git a/src/pipecat/services/hathora/stt.py b/src/pipecat/services/hathora/stt.py index defdc355d..b0e3beead 100644 --- a/src/pipecat/services/hathora/stt.py +++ b/src/pipecat/services/hathora/stt.py @@ -8,6 +8,7 @@ import base64 import os +from dataclasses import dataclass, field from typing import AsyncGenerator, Optional import aiohttp @@ -18,6 +19,7 @@ from pipecat.frames.frames import ( Frame, TranscriptionFrame, ) +from pipecat.services.settings import NOT_GIVEN, STTSettings from pipecat.services.stt_latency import HATHORA_TTFS_P99 from pipecat.services.stt_service import SegmentedSTTService from pipecat.transcriptions.language import Language @@ -27,6 +29,19 @@ from pipecat.utils.tracing.service_decorators import traced_stt from .utils import ConfigOption +@dataclass +class HathoraSTTSettings(STTSettings): + """Typed settings for the Hathora STT service. + + Parameters: + config: Some models support additional config, refer to + `docs `_ for each model to see + what is supported. + """ + + config: Optional[list] = field(default_factory=lambda: NOT_GIVEN) + + class HathoraSTTService(SegmentedSTTService): """This service supports several different speech-to-text models hosted by Hathora. @@ -83,10 +98,11 @@ class HathoraSTTService(SegmentedSTTService): params = params or HathoraSTTService.InputParams() - self._settings = { - "language": params.language, - "config": params.config, - } + self._settings: HathoraSTTSettings = HathoraSTTSettings( + model=model, + language=params.language, + config=params.config, + ) self.set_model_name(model) @@ -123,12 +139,11 @@ class HathoraSTTService(SegmentedSTTService): "model": self._model, } - if self._settings["language"] is not None: - payload["language"] = self._settings["language"] - if self._settings["config"] is not None: + if self._settings.language is not None: + payload["language"] = self._settings.language + if self._settings.config is not None: payload["model_config"] = [ - {"name": option.name, "value": option.value} - for option in self._settings["config"] + {"name": option.name, "value": option.value} for option in self._settings.config ] base64_audio = base64.b64encode(audio).decode("utf-8") @@ -147,7 +162,7 @@ class HathoraSTTService(SegmentedSTTService): if text: # Only yield non-empty text # Hathora's API currently doesn't return language info # so we default to the requested language or "en" - response_language = self._settings["language"] or "en" + response_language = self._settings.language or "en" await self._handle_transcription(text, True, response_language) yield TranscriptionFrame( text, diff --git a/src/pipecat/services/hathora/tts.py b/src/pipecat/services/hathora/tts.py index 80cbd4fe8..b821b1e05 100644 --- a/src/pipecat/services/hathora/tts.py +++ b/src/pipecat/services/hathora/tts.py @@ -9,6 +9,7 @@ import io import os import wave +from dataclasses import dataclass, field from typing import AsyncGenerator, Optional, Tuple import aiohttp @@ -21,6 +22,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) +from pipecat.services.settings import NOT_GIVEN, TTSSettings from pipecat.services.tts_service import TTSService from pipecat.utils.tracing.service_decorators import traced_tts @@ -45,6 +47,21 @@ def _decode_audio_payload( return audio_bytes, fallback_sample_rate, fallback_channels +@dataclass +class HathoraTTSSettings(TTSSettings): + """Typed settings for Hathora TTS service. + + Parameters: + speed: Speech speed multiplier (if supported by model). + config: Some models support additional config, refer to + [docs](https://models.hathora.dev) for each model to see + what is supported. + """ + + speed: float = field(default_factory=lambda: NOT_GIVEN) + config: list = field(default_factory=lambda: NOT_GIVEN) + + class HathoraTTSService(TTSService): """This service supports several different text-to-speech models hosted by Hathora. @@ -98,13 +115,15 @@ class HathoraTTSService(TTSService): params = params or HathoraTTSService.InputParams() - self._settings = { - "speed": params.speed, - "config": params.config, - } + self._settings: HathoraTTSSettings = HathoraTTSSettings( + model=model, + voice=voice_id, + speed=params.speed, + config=params.config, + ) self.set_model_name(model) - self.set_voice(voice_id) + self._voice_id = voice_id def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -135,12 +154,11 @@ class HathoraTTSService(TTSService): if self._voice_id is not None: payload["voice"] = self._voice_id - if self._settings["speed"] is not None: - payload["speed"] = self._settings["speed"] - if self._settings["config"] is not None: + if self._settings.speed is not None: + payload["speed"] = self._settings.speed + if self._settings.config is not None: payload["model_config"] = [ - {"name": option.name, "value": option.value} - for option in self._settings["config"] + {"name": option.name, "value": option.value} for option in self._settings.config ] yield TTSStartedFrame(context_id=context_id) diff --git a/src/pipecat/services/hume/tts.py b/src/pipecat/services/hume/tts.py index 2d98e1f8c..3b45cc249 100644 --- a/src/pipecat/services/hume/tts.py +++ b/src/pipecat/services/hume/tts.py @@ -117,7 +117,7 @@ class HumeTTSService(WordTTSService): self._params = params or HumeTTSService.InputParams() # Store voice in the base class (mirrors other services) - self.set_voice(voice_id) + self._voice_id = voice_id self._audio_bytes = b"" @@ -196,7 +196,7 @@ class HumeTTSService(WordTTSService): key_l = (key or "").lower() if key_l == "voice_id": - self.set_voice(str(value)) + await self.set_voice(str(value)) logger.debug(f"HumeTTSService voice_id set to: {self.voice}") elif key_l == "description": self._params.description = None if value is None else str(value) diff --git a/src/pipecat/services/inworld/tts.py b/src/pipecat/services/inworld/tts.py index 2ea94399b..68c140187 100644 --- a/src/pipecat/services/inworld/tts.py +++ b/src/pipecat/services/inworld/tts.py @@ -16,6 +16,7 @@ Inworld’s text-to-speech (TTS) models offer ultra-realistic, context-aware spe import asyncio import base64 import json +from dataclasses import dataclass, field from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple import aiohttp @@ -23,6 +24,8 @@ import websockets from loguru import logger from pydantic import BaseModel +from pipecat.services.settings import NOT_GIVEN, TTSSettings, is_given + try: from websockets.asyncio.client import connect as websocket_connect from websockets.protocol import State @@ -47,6 +50,31 @@ from pipecat.services.tts_service import AudioContextWordTTSService, WordTTSServ from pipecat.utils.tracing.service_decorators import traced_tts +@dataclass +class InworldTTSSettings(TTSSettings): + """Typed settings for Inworld TTS services. + + Parameters: + audio_encoding: Audio encoding format (e.g. LINEAR16). + audio_sample_rate: Audio sample rate in Hz. + speaking_rate: Speaking rate for speech synthesis. + temperature: Temperature for speech synthesis. + auto_mode: Whether to use auto mode. Recommended when texts are sent + in full sentences/phrases. When enabled, the server controls + flushing of buffered text to achieve minimal latency while + maintaining high quality audio output. If None (default), + automatically set based on aggregate_sentences. + apply_text_normalization: Whether to apply text normalization. + """ + + audio_encoding: str = field(default_factory=lambda: NOT_GIVEN) + audio_sample_rate: int = field(default_factory=lambda: NOT_GIVEN) + speaking_rate: float = field(default_factory=lambda: NOT_GIVEN) + temperature: float = field(default_factory=lambda: NOT_GIVEN) + auto_mode: bool = field(default_factory=lambda: NOT_GIVEN) + apply_text_normalization: str = field(default_factory=lambda: NOT_GIVEN) + + class InworldHttpTTSService(WordTTSService): """Inworld AI HTTP-based TTS service. @@ -110,23 +138,21 @@ class InworldHttpTTSService(WordTTSService): else: self._base_url = "https://api.inworld.ai/tts/v1/voice" - self._settings = { - "voiceId": voice_id, - "modelId": model, - "audioConfig": { - "audioEncoding": encoding, - "sampleRateHertz": 0, - }, - } + self._settings: InworldTTSSettings = InworldTTSSettings( + model=model, + voice=voice_id, + audio_encoding=encoding, + audio_sample_rate=0, + ) if params.temperature is not None: - self._settings["temperature"] = params.temperature + self._settings.temperature = params.temperature if params.speaking_rate is not None: - self._settings["audioConfig"]["speakingRate"] = params.speaking_rate + self._settings.speaking_rate = params.speaking_rate self._cumulative_time = 0.0 - self.set_voice(voice_id) + self._voice_id = voice_id self.set_model_name(model) def can_generate_metrics(self) -> bool: @@ -144,7 +170,7 @@ class InworldHttpTTSService(WordTTSService): frame: The start frame. """ await super().start(frame) - self._settings["audioConfig"]["sampleRateHertz"] = self.sample_rate + self._settings.audio_sample_rate = self.sample_rate async def stop(self, frame: EndFrame): """Stop the Inworld TTS service. @@ -223,15 +249,22 @@ class InworldHttpTTSService(WordTTSService): """ logger.debug(f"{self}: Generating TTS [{text}] (streaming={self._streaming})") + audio_config = { + "audioEncoding": self._settings.audio_encoding, + "sampleRateHertz": self._settings.audio_sample_rate, + } + if is_given(self._settings.speaking_rate): + audio_config["speakingRate"] = self._settings.speaking_rate + payload = { "text": text, - "voiceId": self._settings["voiceId"], - "modelId": self._settings["modelId"], - "audioConfig": self._settings["audioConfig"], + "voiceId": self._settings.voice, + "modelId": self._settings.model, + "audioConfig": audio_config, } - if "temperature" in self._settings: - payload["temperature"] = self._settings["temperature"] + if is_given(self._settings.temperature): + payload["temperature"] = self._settings.temperature # Use WORD timestamps for simplicity and correct spacing/capitalization payload["timestampType"] = self._timestamp_type @@ -470,27 +503,25 @@ class InworldTTSService(AudioContextWordTTSService): self._api_key = api_key self._url = url - self._settings: Dict[str, Any] = { - "voiceId": voice_id, - "modelId": model, - "audioConfig": { - "audioEncoding": encoding, - "sampleRateHertz": 0, - }, - } + self._settings: InworldTTSSettings = InworldTTSSettings( + model=model, + voice=voice_id, + audio_encoding=encoding, + audio_sample_rate=0, + ) self._timestamp_type = "WORD" if params.temperature is not None: - self._settings["temperature"] = params.temperature + self._settings.temperature = params.temperature if params.speaking_rate is not None: - self._settings["audioConfig"]["speakingRate"] = params.speaking_rate + self._settings.speaking_rate = params.speaking_rate if params.apply_text_normalization is not None: - self._settings["applyTextNormalization"] = params.apply_text_normalization + self._settings.apply_text_normalization = params.apply_text_normalization if params.auto_mode is not None: - self._settings["autoMode"] = params.auto_mode + self._settings.auto_mode = params.auto_mode else: - self._settings["autoMode"] = aggregate_sentences + self._settings.auto_mode = aggregate_sentences self._buffer_settings = { "maxBufferDelayMs": params.max_buffer_delay_ms, @@ -509,7 +540,7 @@ class InworldTTSService(AudioContextWordTTSService): # Track the end time of the last word in the current generation self._generation_end_time = 0.0 - self.set_voice(voice_id) + self._voice_id = voice_id self.set_model_name(model) def can_generate_metrics(self) -> bool: @@ -527,7 +558,7 @@ class InworldTTSService(AudioContextWordTTSService): frame: The start frame. """ await super().start(frame) - self._settings["audioConfig"]["sampleRateHertz"] = self.sample_rate + self._settings.audio_sample_rate = self.sample_rate await self._connect() async def stop(self, frame: EndFrame): @@ -859,18 +890,25 @@ class InworldTTSService(AudioContextWordTTSService): Args: context_id: The context ID. """ + audio_config = { + "audioEncoding": self._settings.audio_encoding, + "sampleRateHertz": self._settings.audio_sample_rate, + } + if is_given(self._settings.speaking_rate): + audio_config["speakingRate"] = self._settings.speaking_rate + create_config: Dict[str, Any] = { - "voiceId": self._settings["voiceId"], - "modelId": self._settings["modelId"], - "audioConfig": self._settings["audioConfig"], + "voiceId": self._settings.voice, + "modelId": self._settings.model, + "audioConfig": audio_config, } - if "temperature" in self._settings: - create_config["temperature"] = self._settings["temperature"] - if "applyTextNormalization" in self._settings: - create_config["applyTextNormalization"] = self._settings["applyTextNormalization"] - if "autoMode" in self._settings: - create_config["autoMode"] = self._settings["autoMode"] + if is_given(self._settings.temperature): + create_config["temperature"] = self._settings.temperature + if is_given(self._settings.apply_text_normalization): + create_config["applyTextNormalization"] = self._settings.apply_text_normalization + if is_given(self._settings.auto_mode): + create_config["autoMode"] = self._settings.auto_mode # Set buffer settings for timely audio generation. # Use provided values or defaults that work well for streaming LLM output. diff --git a/src/pipecat/services/kokoro/tts.py b/src/pipecat/services/kokoro/tts.py index 49ede2409..242446de9 100644 --- a/src/pipecat/services/kokoro/tts.py +++ b/src/pipecat/services/kokoro/tts.py @@ -7,6 +7,7 @@ """Kokoro TTS service implementation using kokoro-onnx.""" import os +from dataclasses import dataclass, field from pathlib import Path from typing import AsyncGenerator, Optional @@ -22,6 +23,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) +from pipecat.services.settings import NOT_GIVEN, TTSSettings from pipecat.services.tts_service import TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -87,6 +89,17 @@ def language_to_kokoro_language(language: Language) -> str: return resolve_language(language, LANGUAGE_MAP, use_base_code=True) +@dataclass +class KokoroTTSSettings(TTSSettings): + """Typed settings for the Kokoro TTS service. + + Parameters: + lang_code: Kokoro language code for synthesis. + """ + + lang_code: str = field(default_factory=lambda: NOT_GIVEN) + + class KokoroTTSService(TTSService): """Kokoro TTS service implementation. @@ -129,6 +142,12 @@ class KokoroTTSService(TTSService): self._voice_id = voice_id self._lang_code = language_to_kokoro_language(params.language) + self._settings: KokoroTTSSettings = KokoroTTSSettings( + voice=voice_id, + language=language_to_kokoro_language(params.language), + lang_code=language_to_kokoro_language(params.language), + ) + model = Path(model_path) if model_path else KOKORO_CACHE_DIR / "kokoro-v1.0.onnx" voices = Path(voices_path) if voices_path else KOKORO_CACHE_DIR / "voices-v1.0.bin" diff --git a/src/pipecat/services/llm_service.py b/src/pipecat/services/llm_service.py index af7e691b0..77af50f15 100644 --- a/src/pipecat/services/llm_service.py +++ b/src/pipecat/services/llm_service.py @@ -44,6 +44,7 @@ from pipecat.frames.frames import ( LLMFullResponseEndFrame, LLMFullResponseStartFrame, LLMTextFrame, + LLMUpdateSettingsFrame, StartFrame, UserImageRequestFrame, ) @@ -58,6 +59,7 @@ from pipecat.processors.aggregators.llm_response import ( from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.processors.frame_processor import FrameDirection from pipecat.services.ai_service import AIService +from pipecat.services.settings import ServiceSettings from pipecat.turns.user_turn_completion_mixin import UserTurnCompletionLLMServiceMixin from pipecat.utils.context.llm_context_summarization import ( LLMContextSummarizationUtil, @@ -351,6 +353,17 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): await self._handle_interruptions(frame) elif isinstance(frame, LLMConfigureOutputFrame): self._skip_tts = frame.skip_tts + elif isinstance(frame, LLMUpdateSettingsFrame): + # New path: typed settings update object. + if frame.update is not None: + await self._update_settings_from_typed(frame.update) + # Legacy path: plain dict, but service uses typed settings — convert. + elif isinstance(self._settings, ServiceSettings): + update = type(self._settings).from_mapping(frame.settings) + await self._update_settings_from_typed(update) + # Legacy path: plain dict, service still uses dict-based settings. + else: + await self._update_settings(frame.settings) elif isinstance(frame, LLMContextSummaryRequestFrame): await self._handle_summary_request(frame) diff --git a/src/pipecat/services/lmnt/tts.py b/src/pipecat/services/lmnt/tts.py index 4c34e28d5..97569fa1d 100644 --- a/src/pipecat/services/lmnt/tts.py +++ b/src/pipecat/services/lmnt/tts.py @@ -7,6 +7,7 @@ """LMNT text-to-speech service implementation.""" import json +from dataclasses import dataclass, field from typing import AsyncGenerator, Optional from loguru import logger @@ -23,6 +24,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, TTSSettings from pipecat.services.tts_service import InterruptibleTTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -71,6 +73,17 @@ def language_to_lmnt_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=True) +@dataclass +class LmntTTSSettings(TTSSettings): + """Typed settings for LMNT TTS service. + + Parameters: + format: Audio output format. Defaults to "raw". + """ + + format: str = field(default_factory=lambda: NOT_GIVEN) + + class LmntTTSService(InterruptibleTTSService): """LMNT real-time text-to-speech service. @@ -107,12 +120,14 @@ class LmntTTSService(InterruptibleTTSService): ) self._api_key = api_key - self.set_voice(voice_id) + self._voice_id = voice_id self.set_model_name(model) - self._settings = { - "language": self.language_to_service_language(language), - "format": "raw", # Use raw format for direct PCM data - } + self._settings: LmntTTSSettings = LmntTTSSettings( + model=model, + voice=voice_id, + language=self.language_to_service_language(language), + format="raw", + ) self._receive_task = None self._context_id: Optional[str] = None @@ -202,9 +217,9 @@ class LmntTTSService(InterruptibleTTSService): init_msg = { "X-API-Key": self._api_key, "voice": self._voice_id, - "format": self._settings["format"], + "format": self._settings.format, "sample_rate": self.sample_rate, - "language": self._settings["language"], + "language": self._settings.language, "model": self.model_name, } diff --git a/src/pipecat/services/minimax/tts.py b/src/pipecat/services/minimax/tts.py index 7284d9630..6ce3e4b45 100644 --- a/src/pipecat/services/minimax/tts.py +++ b/src/pipecat/services/minimax/tts.py @@ -11,6 +11,7 @@ for streaming text-to-speech synthesis. """ import json +from dataclasses import dataclass, field from typing import AsyncGenerator, Optional import aiohttp @@ -25,6 +26,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) +from pipecat.services.settings import NOT_GIVEN, TTSSettings, is_given from pipecat.services.tts_service import TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -85,6 +87,40 @@ def language_to_minimax_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=False) +@dataclass +class MiniMaxTTSSettings(TTSSettings): + """Typed settings for MiniMax TTS service. + + Parameters: + stream: Whether to use streaming mode. + speed: Speech speed (range: 0.5 to 2.0). + volume: Speech volume (range: 0 to 10). + pitch: Pitch adjustment (range: -12 to 12). + emotion: Emotional tone (options: "happy", "sad", "angry", "fearful", + "disgusted", "surprised", "calm", "fluent"). + text_normalization: Enable text normalization (Chinese/English). + latex_read: Enable LaTeX formula reading. + audio_bitrate: Audio bitrate in bps. + audio_format: Audio output format. + audio_channel: Number of audio channels. + audio_sample_rate: Audio sample rate in Hz. + language_boost: Language boost string for multilingual support. + """ + + stream: bool = field(default_factory=lambda: NOT_GIVEN) + speed: float = field(default_factory=lambda: NOT_GIVEN) + volume: float = field(default_factory=lambda: NOT_GIVEN) + pitch: int = field(default_factory=lambda: NOT_GIVEN) + emotion: str = field(default_factory=lambda: NOT_GIVEN) + text_normalization: bool = field(default_factory=lambda: NOT_GIVEN) + latex_read: bool = field(default_factory=lambda: NOT_GIVEN) + audio_bitrate: int = field(default_factory=lambda: NOT_GIVEN) + audio_format: str = field(default_factory=lambda: NOT_GIVEN) + audio_channel: int = field(default_factory=lambda: NOT_GIVEN) + audio_sample_rate: int = field(default_factory=lambda: NOT_GIVEN) + language_boost: str = field(default_factory=lambda: NOT_GIVEN) + + class MiniMaxHttpTTSService(TTSService): """Text-to-speech service using MiniMax's T2A (Text-to-Audio) API. @@ -172,29 +208,27 @@ class MiniMaxHttpTTSService(TTSService): self._voice_id = voice_id # Create voice settings - self._settings = { - "stream": True, - "voice_setting": { - "speed": params.speed, - "vol": params.volume, - "pitch": params.pitch, - }, - "audio_setting": { - "bitrate": 128000, - "format": "pcm", - "channel": 1, - }, - } + self._settings: MiniMaxTTSSettings = MiniMaxTTSSettings( + model=model, + voice=voice_id, + stream=True, + speed=params.speed, + volume=params.volume, + pitch=params.pitch, + audio_bitrate=128000, + audio_format="pcm", + audio_channel=1, + ) # Set voice and model - self.set_voice(voice_id) + self._voice_id = voice_id self.set_model_name(model) # Add language boost if provided if params.language: service_lang = self.language_to_service_language(params.language) if service_lang: - self._settings["language_boost"] = service_lang + self._settings.language_boost = service_lang # Add optional emotion if provided if params.emotion: @@ -210,7 +244,7 @@ class MiniMaxHttpTTSService(TTSService): "fluent", ] if params.emotion in supported_emotions: - self._settings["voice_setting"]["emotion"] = params.emotion + self._settings.emotion = params.emotion else: logger.warning( f"Unsupported emotion: {params.emotion}. Supported emotions: {supported_emotions}" @@ -226,15 +260,15 @@ class MiniMaxHttpTTSService(TTSService): "Parameter `english_normalization` is deprecated and will be removed in a future version. Use `text_normalization` instead.", DeprecationWarning, ) - self._settings["voice_setting"]["text_normalization"] = params.english_normalization + self._settings.text_normalization = params.english_normalization # Add text_normalization if provided (corrected parameter name) if params.text_normalization is not None: - self._settings["voice_setting"]["text_normalization"] = params.text_normalization + self._settings.text_normalization = params.text_normalization # Add latex_read if provided if params.latex_read is not None: - self._settings["voice_setting"]["latex_read"] = params.latex_read + self._settings.latex_read = params.latex_read def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -263,16 +297,6 @@ class MiniMaxHttpTTSService(TTSService): """ self._model_name = model - def set_voice(self, voice: str): - """Set the voice to use. - - Args: - voice: The voice identifier to use for synthesis. - """ - self._voice_id = voice - if "voice_setting" in self._settings: - self._settings["voice_setting"]["voice_id"] = voice - async def start(self, frame: StartFrame): """Start the MiniMax TTS service. @@ -280,7 +304,7 @@ class MiniMaxHttpTTSService(TTSService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings["audio_setting"]["sample_rate"] = self.sample_rate + self._settings.audio_sample_rate = self.sample_rate logger.debug(f"MiniMax TTS initialized with sample_rate: {self.sample_rate}") @traced_tts @@ -302,10 +326,38 @@ class MiniMaxHttpTTSService(TTSService): "Authorization": f"Bearer {self._api_key}", } + # Build voice_setting dict for API + voice_setting = { + "voice_id": self._voice_id, + "speed": self._settings.speed, + "vol": self._settings.volume, + "pitch": self._settings.pitch, + } + if is_given(self._settings.emotion): + voice_setting["emotion"] = self._settings.emotion + if is_given(self._settings.text_normalization): + voice_setting["text_normalization"] = self._settings.text_normalization + if is_given(self._settings.latex_read): + voice_setting["latex_read"] = self._settings.latex_read + + # Build audio_setting dict for API + audio_setting = { + "bitrate": self._settings.audio_bitrate, + "format": self._settings.audio_format, + "channel": self._settings.audio_channel, + "sample_rate": self._settings.audio_sample_rate, + } + # Create payload from settings - payload = self._settings.copy() - payload["model"] = self._model_name - payload["text"] = text + payload = { + "stream": self._settings.stream, + "voice_setting": voice_setting, + "audio_setting": audio_setting, + "model": self._model_name, + "text": text, + } + if is_given(self._settings.language_boost): + payload["language_boost"] = self._settings.language_boost try: await self.start_ttfb_metrics() diff --git a/src/pipecat/services/mistral/llm.py b/src/pipecat/services/mistral/llm.py index 54361ef28..7a8f5b71a 100644 --- a/src/pipecat/services/mistral/llm.py +++ b/src/pipecat/services/mistral/llm.py @@ -185,19 +185,19 @@ class MistralLLMService(OpenAILLMService): "messages": fixed_messages, "tools": params_from_context["tools"], "tool_choice": params_from_context["tool_choice"], - "frequency_penalty": self._settings["frequency_penalty"], - "presence_penalty": self._settings["presence_penalty"], - "temperature": self._settings["temperature"], - "top_p": self._settings["top_p"], - "max_tokens": self._settings["max_tokens"], + "frequency_penalty": self._settings.frequency_penalty, + "presence_penalty": self._settings.presence_penalty, + "temperature": self._settings.temperature, + "top_p": self._settings.top_p, + "max_tokens": self._settings.max_tokens, } # Handle Mistral-specific parameter mapping # Mistral uses "random_seed" instead of "seed" - if self._settings["seed"]: - params["random_seed"] = self._settings["seed"] + if self._settings.seed: + params["random_seed"] = self._settings.seed # Add any extra parameters - params.update(self._settings["extra"]) + params.update(self._settings.extra) return params diff --git a/src/pipecat/services/neuphonic/tts.py b/src/pipecat/services/neuphonic/tts.py index 24eb05bd3..b7019e6d6 100644 --- a/src/pipecat/services/neuphonic/tts.py +++ b/src/pipecat/services/neuphonic/tts.py @@ -13,7 +13,8 @@ text-to-speech API for real-time audio synthesis. import asyncio import base64 import json -from typing import Any, AsyncGenerator, Mapping, Optional +from dataclasses import dataclass, field +from typing import AsyncGenerator, Optional import aiohttp from loguru import logger @@ -34,6 +35,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, TTSSettings from pipecat.services.tts_service import InterruptibleTTSService, TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -72,6 +74,23 @@ def language_to_neuphonic_lang_code(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=True) +@dataclass +class NeuphonicTTSSettings(TTSSettings): + """Typed settings for Neuphonic TTS service. + + Parameters: + lang_code: Neuphonic language code. + speed: Speech speed multiplier. Defaults to 1.0. + encoding: Audio encoding format. + sampling_rate: Audio sample rate. + """ + + lang_code: str = field(default_factory=lambda: NOT_GIVEN) + speed: float = field(default_factory=lambda: NOT_GIVEN) + encoding: str = field(default_factory=lambda: NOT_GIVEN) + sampling_rate: int = field(default_factory=lambda: NOT_GIVEN) + + class NeuphonicTTSService(InterruptibleTTSService): """Neuphonic real-time text-to-speech service using WebSocket streaming. @@ -127,13 +146,13 @@ class NeuphonicTTSService(InterruptibleTTSService): self._api_key = api_key self._url = url - self._settings = { - "lang_code": self.language_to_service_language(params.language), - "speed": params.speed, - "encoding": encoding, - "sampling_rate": sample_rate, - } - self.set_voice(voice_id) + self._settings: NeuphonicTTSSettings = NeuphonicTTSSettings( + lang_code=self.language_to_service_language(params.language), + speed=params.speed, + encoding=encoding, + sampling_rate=sample_rate, + ) + self._voice_id = voice_id self._cumulative_time = 0 @@ -160,15 +179,14 @@ class NeuphonicTTSService(InterruptibleTTSService): """ return language_to_neuphonic_lang_code(language) - async def _update_settings(self, settings: Mapping[str, Any]): - """Update service settings and reconnect with new configuration.""" - if "voice_id" in settings: - self.set_voice(settings["voice_id"]) - - await super()._update_settings(settings) - await self._disconnect() - await self._connect() - logger.info(f"Switching TTS to settings: [{self._settings}]") + async def _update_settings_from_typed(self, update: TTSSettings) -> set[str]: + """Apply a typed settings update and reconnect with new configuration.""" + changed = await super()._update_settings_from_typed(update) + if changed: + await self._disconnect() + await self._connect() + logger.info(f"Switching TTS to settings: [{self._settings}]") + return changed async def start(self, frame: StartFrame): """Start the Neuphonic TTS service. @@ -266,7 +284,10 @@ class NeuphonicTTSService(InterruptibleTTSService): logger.debug("Connecting to Neuphonic") tts_config = { - **self._settings, + "lang_code": self._settings.lang_code, + "speed": self._settings.speed, + "encoding": self._settings.encoding, + "sampling_rate": self._settings.sampling_rate, "voice_id": self._voice_id, } @@ -275,7 +296,7 @@ class NeuphonicTTSService(InterruptibleTTSService): if value is not None: query_params.append(f"{key}={value}") - url = f"{self._url}/speak/{self._settings['lang_code']}" + url = f"{self._url}/speak/{self._settings.lang_code}" if query_params: url += f"?{'&'.join(query_params)}" @@ -429,7 +450,7 @@ class NeuphonicHttpTTSService(TTSService): self._lang_code = self.language_to_service_language(params.language) or "en" self._speed = params.speed self._encoding = encoding - self.set_voice(voice_id) + self._voice_id = voice_id def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. diff --git a/src/pipecat/services/nvidia/stt.py b/src/pipecat/services/nvidia/stt.py index 8eb6d7bb5..c65d6da62 100644 --- a/src/pipecat/services/nvidia/stt.py +++ b/src/pipecat/services/nvidia/stt.py @@ -8,6 +8,7 @@ import asyncio from concurrent.futures import CancelledError as FuturesCancelledError +from dataclasses import dataclass, field from typing import AsyncGenerator, List, Mapping, Optional from loguru import logger @@ -22,6 +23,7 @@ from pipecat.frames.frames import ( StartFrame, TranscriptionFrame, ) +from pipecat.services.settings import NOT_GIVEN, STTSettings from pipecat.services.stt_latency import NVIDIA_TTFS_P99 from pipecat.services.stt_service import SegmentedSTTService, STTService from pipecat.transcriptions.language import Language, resolve_language @@ -89,6 +91,32 @@ def language_to_nvidia_riva_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=False) +@dataclass +class NvidiaSTTSettings(STTSettings): + """Typed settings for the NVIDIA Riva streaming STT service.""" + + pass + + +@dataclass +class NvidiaSegmentedSTTSettings(STTSettings): + """Typed settings for the NVIDIA Riva segmented STT service. + + Parameters: + profanity_filter: Whether to filter profanity from results. + automatic_punctuation: Whether to add automatic punctuation. + verbatim_transcripts: Whether to return verbatim transcripts. + boosted_lm_words: List of words to boost in language model. + boosted_lm_score: Score boost for specified words. + """ + + profanity_filter: bool = field(default_factory=lambda: NOT_GIVEN) + automatic_punctuation: bool = field(default_factory=lambda: NOT_GIVEN) + verbatim_transcripts: bool = field(default_factory=lambda: NOT_GIVEN) + boosted_lm_words: Optional[List[str]] = field(default_factory=lambda: NOT_GIVEN) + boosted_lm_score: float = field(default_factory=lambda: NOT_GIVEN) + + class NvidiaSTTService(STTService): """Real-time speech-to-text service using NVIDIA Riva streaming ASR. @@ -141,12 +169,6 @@ class NvidiaSTTService(STTService): self._server = server self._api_key = api_key self._use_ssl = use_ssl - self._profanity_filter = False - self._automatic_punctuation = True - self._no_verbatim_transcripts = False - self._language_code = params.language - self._boosted_lm_words = None - self._boosted_lm_score = 4.0 self._start_history = -1 self._start_threshold = -1.0 self._stop_history = -1 @@ -156,14 +178,9 @@ class NvidiaSTTService(STTService): self._custom_configuration = "" self._function_id = model_function_map.get("function_id") - self._settings = { - "language": str(params.language), - "profanity_filter": self._profanity_filter, - "automatic_punctuation": self._automatic_punctuation, - "verbatim_transcripts": not self._no_verbatim_transcripts, - "boosted_lm_words": self._boosted_lm_words, - "boosted_lm_score": self._boosted_lm_score, - } + self._settings: NvidiaSTTSettings = NvidiaSTTSettings( + language=params.language, + ) self.set_model_name(model_function_map.get("model_name")) @@ -186,22 +203,18 @@ class NvidiaSTTService(STTService): config = riva.client.StreamingRecognitionConfig( config=riva.client.RecognitionConfig( encoding=riva.client.AudioEncoding.LINEAR_PCM, - language_code=self._language_code, + language_code=self._settings.language, model="", max_alternatives=1, - profanity_filter=self._profanity_filter, - enable_automatic_punctuation=self._automatic_punctuation, - verbatim_transcripts=not self._no_verbatim_transcripts, + profanity_filter=False, + enable_automatic_punctuation=True, + verbatim_transcripts=True, sample_rate_hertz=self.sample_rate, audio_channel_count=1, ), interim_results=True, ) - riva.client.add_word_boosting_to_config( - config, self._boosted_lm_words, self._boosted_lm_score - ) - riva.client.add_endpoint_parameters_to_config( config, self._start_history, @@ -318,14 +331,14 @@ class NvidiaSTTService(STTService): transcript, self._user_id, time_now_iso8601(), - self._language_code, + self._settings.language, result=result, ) ) await self._handle_transcription( transcript=transcript, is_final=result.is_final, - language=self._language_code, + language=self._settings.language, ) else: await self.push_frame( @@ -333,7 +346,7 @@ class NvidiaSTTService(STTService): transcript, self._user_id, time_now_iso8601(), - self._language_code, + self._settings.language, result=result, ) ) @@ -445,18 +458,6 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): self._server = server self._use_ssl = use_ssl self._function_id = model_function_map.get("function_id") - self._model_name = model_function_map.get("model_name") - - # Store the language as a Language enum and as a string - self._language_enum = params.language or Language.EN_US - self._language = self.language_to_service_language(self._language_enum) or "en-US" - - # Configure transcription parameters - self._profanity_filter = params.profanity_filter - self._automatic_punctuation = params.automatic_punctuation - self._verbatim_transcripts = params.verbatim_transcripts - self._boosted_lm_words = params.boosted_lm_words - self._boosted_lm_score = params.boosted_lm_score # Voice activity detection thresholds (use NVIDIA Riva defaults) self._start_history = -1 @@ -467,10 +468,16 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): self._stop_threshold_eou = -1.0 self._custom_configuration = "" - # Create NVIDIA Riva client self._config = None self._asr_service = None - self._settings = {"language": self._language_enum} + self._settings: NvidiaSegmentedSTTSettings = NvidiaSegmentedSTTSettings( + language=params.language or Language.EN_US, + profanity_filter=params.profanity_filter, + automatic_punctuation=params.automatic_punctuation, + verbatim_transcripts=params.verbatim_transcripts, + boosted_lm_words=params.boosted_lm_words, + boosted_lm_score=params.boosted_lm_score, + ) def language_to_service_language(self, language: Language) -> Optional[str]: """Convert pipecat Language enum to NVIDIA Riva's language code. @@ -498,21 +505,25 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): auth = riva.client.Auth(None, self._use_ssl, self._server, metadata) self._asr_service = riva.client.ASRService(auth) + def _get_language_code(self) -> str: + """Resolve the current language enum to an NVIDIA Riva language code string.""" + return self.language_to_service_language(self._settings.language) or "en-US" + def _create_recognition_config(self): """Create the NVIDIA Riva ASR recognition configuration.""" # Create base configuration config = riva.client.RecognitionConfig( - language_code=self._language, # Now using the string, not a tuple + language_code=self._get_language_code(), max_alternatives=1, - profanity_filter=self._profanity_filter, - enable_automatic_punctuation=self._automatic_punctuation, - verbatim_transcripts=self._verbatim_transcripts, + profanity_filter=self._settings.profanity_filter, + enable_automatic_punctuation=self._settings.automatic_punctuation, + verbatim_transcripts=self._settings.verbatim_transcripts, ) # Add word boosting if specified - if self._boosted_lm_words: + if self._settings.boosted_lm_words: riva.client.add_word_boosting_to_config( - config, self._boosted_lm_words, self._boosted_lm_score + config, self._settings.boosted_lm_words, self._settings.boosted_lm_score ) # Add voice activity detection parameters @@ -567,20 +578,21 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): self._config = self._create_recognition_config() logger.debug(f"Initialized NvidiaSegmentedSTTService with model: {self.model_name}") - async def set_language(self, language: Language): - """Set the language for the STT service. + async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: + """Apply a typed settings update and sync internal state. Args: - language: Target language for transcription. - """ - logger.info(f"Switching STT language to: [{language}]") - self._language_enum = language - self._language = self.language_to_service_language(language) or "en-US" - self._settings["language"] = language + update: A :class:`STTSettings` (or ``NvidiaSegmentedSTTSettings``) delta. - # Update configuration with new language - if self._config: - self._config.language_code = self._language + Returns: + Set of field names whose values actually changed. + """ + changed = await super()._update_settings_from_typed(update) + + if changed: + self._config = self._create_recognition_config() + + return changed @traced_stt async def _handle_transcription( @@ -633,11 +645,11 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): text, self._user_id, time_now_iso8601(), - self._language_enum, + self._settings.language, ) transcription_found = True - await self._handle_transcription(text, True, self._language_enum) + await self._handle_transcription(text, True, self._settings.language) if not transcription_found: logger.debug(f"{self}: No transcription results found in NVIDIA Riva response") diff --git a/src/pipecat/services/nvidia/tts.py b/src/pipecat/services/nvidia/tts.py index 6bac54e3a..8a018d6aa 100644 --- a/src/pipecat/services/nvidia/tts.py +++ b/src/pipecat/services/nvidia/tts.py @@ -100,7 +100,7 @@ class NvidiaTTSService(TTSService): self._function_id = model_function_map.get("function_id") self._use_ssl = use_ssl self.set_model_name(model_function_map.get("model_name")) - self.set_voice(voice_id) + self._voice_id = voice_id self._service = None self._config = None diff --git a/src/pipecat/services/openai/base_llm.py b/src/pipecat/services/openai/base_llm.py index 2cdde51ea..2ac53794c 100644 --- a/src/pipecat/services/openai/base_llm.py +++ b/src/pipecat/services/openai/base_llm.py @@ -10,7 +10,8 @@ import asyncio import base64 import json from contextlib import asynccontextmanager -from typing import Any, Dict, List, Mapping, Optional +from dataclasses import dataclass, field +from typing import Any, ClassVar, Dict, List, Mapping, Optional import httpx from loguru import logger @@ -32,7 +33,6 @@ from pipecat.frames.frames import ( LLMFullResponseStartFrame, LLMMessagesFrame, LLMTextFrame, - LLMUpdateSettingsFrame, ) from pipecat.metrics.metrics import LLMTokenUsage from pipecat.processors.aggregators.llm_context import LLMContext @@ -42,9 +42,24 @@ from pipecat.processors.aggregators.openai_llm_context import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import FunctionCallFromLLM, LLMService +from pipecat.services.settings import NOT_GIVEN as _NOT_GIVEN +from pipecat.services.settings import LLMSettings from pipecat.utils.tracing.service_decorators import traced_llm +@dataclass +class OpenAILLMSettings(LLMSettings): + """Typed settings for OpenAI-compatible LLM services. + + Parameters: + max_completion_tokens: Maximum completion tokens to generate. + service_tier: Service tier to use (e.g., "auto", "flex", "priority"). + """ + + max_completion_tokens: Any = field(default_factory=lambda: _NOT_GIVEN) + service_tier: Any = field(default_factory=lambda: _NOT_GIVEN) + + class BaseOpenAILLMService(LLMService): """Base class for all services that use the AsyncOpenAI client. @@ -120,17 +135,18 @@ class BaseOpenAILLMService(LLMService): params = params or BaseOpenAILLMService.InputParams() - self._settings = { - "frequency_penalty": params.frequency_penalty, - "presence_penalty": params.presence_penalty, - "seed": params.seed, - "temperature": params.temperature, - "top_p": params.top_p, - "max_tokens": params.max_tokens, - "max_completion_tokens": params.max_completion_tokens, - "service_tier": params.service_tier, - "extra": params.extra if isinstance(params.extra, dict) else {}, - } + self._settings = OpenAILLMSettings( + model=model, + frequency_penalty=params.frequency_penalty, + presence_penalty=params.presence_penalty, + seed=params.seed, + temperature=params.temperature, + top_p=params.top_p, + max_tokens=params.max_tokens, + max_completion_tokens=params.max_completion_tokens, + service_tier=params.service_tier, + extra=params.extra if isinstance(params.extra, dict) else {}, + ) self._retry_timeout_secs = retry_timeout_secs self._retry_on_timeout = retry_on_timeout self.set_model_name(model) @@ -250,20 +266,20 @@ class BaseOpenAILLMService(LLMService): "model": self.model_name, "stream": True, "stream_options": {"include_usage": True}, - "frequency_penalty": self._settings["frequency_penalty"], - "presence_penalty": self._settings["presence_penalty"], - "seed": self._settings["seed"], - "temperature": self._settings["temperature"], - "top_p": self._settings["top_p"], - "max_tokens": self._settings["max_tokens"], - "max_completion_tokens": self._settings["max_completion_tokens"], - "service_tier": self._settings["service_tier"], + "frequency_penalty": self._settings.frequency_penalty, + "presence_penalty": self._settings.presence_penalty, + "seed": self._settings.seed, + "temperature": self._settings.temperature, + "top_p": self._settings.top_p, + "max_tokens": self._settings.max_tokens, + "max_completion_tokens": self._settings.max_completion_tokens, + "service_tier": self._settings.service_tier, } # Messages, tools, tool_choice params.update(params_from_context) - params.update(self._settings["extra"]) + params.update(self._settings.extra) return params async def run_inference( @@ -508,8 +524,6 @@ class BaseOpenAILLMService(LLMService): # NOTE: LLMMessagesFrame is deprecated, so we don't support the newer universal # LLMContext with it context = OpenAILLMContext.from_messages(frame.messages) - elif isinstance(frame, LLMUpdateSettingsFrame): - await self._update_settings(frame.settings) else: await self.push_frame(frame, direction) diff --git a/src/pipecat/services/openai/realtime/llm.py b/src/pipecat/services/openai/realtime/llm.py index cf249408c..abd66963b 100644 --- a/src/pipecat/services/openai/realtime/llm.py +++ b/src/pipecat/services/openai/realtime/llm.py @@ -10,8 +10,8 @@ import base64 import io import json import time -from dataclasses import dataclass -from typing import Optional +from dataclasses import dataclass, field +from typing import Any, Optional from loguru import logger from PIL import Image @@ -59,6 +59,7 @@ from pipecat.processors.aggregators.openai_llm_context import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import FunctionCallFromLLM, LLMService +from pipecat.services.settings import NOT_GIVEN, LLMSettings from pipecat.transcriptions.language import Language from pipecat.utils.time import time_now_iso8601 from pipecat.utils.tracing.service_decorators import traced_openai_realtime, traced_stt @@ -90,6 +91,17 @@ class CurrentAudioResponse: total_size: int = 0 +@dataclass +class OpenAIRealtimeLLMSettings(LLMSettings): + """Typed settings for OpenAI Realtime LLM services. + + Parameters: + session_properties: OpenAI Realtime session configuration. + """ + + session_properties: Any = field(default_factory=lambda: NOT_GIVEN) + + class OpenAIRealtimeLLMService(LLMService): """OpenAI Realtime LLM service providing real-time audio and text communication. @@ -161,9 +173,9 @@ class OpenAIRealtimeLLMService(LLMService): self.base_url = full_url self.set_model_name(model) - # Initialize session_properties - self._session_properties: events.SessionProperties = ( - session_properties or events.SessionProperties() + self._settings = OpenAIRealtimeLLMSettings( + model=model, + session_properties=session_properties or events.SessionProperties(), ) self._audio_input_paused = start_audio_paused self._video_input_paused = start_video_paused @@ -227,12 +239,12 @@ class OpenAIRealtimeLLMService(LLMService): def _is_modality_enabled(self, modality: str) -> bool: """Check if a specific modality is enabled, "text" or "audio".""" - modalities = self._session_properties.output_modalities or ["audio", "text"] + modalities = self._settings.session_properties.output_modalities or ["audio", "text"] return modality in modalities def _get_enabled_modalities(self) -> list[str]: """Get the list of enabled modalities.""" - modalities = self._session_properties.output_modalities or ["audio", "text"] + modalities = self._settings.session_properties.output_modalities or ["audio", "text"] # API only supports single modality responses: either ["text"] or ["audio"] if "audio" in modalities: return ["audio"] @@ -305,9 +317,9 @@ class OpenAIRealtimeLLMService(LLMService): # None and False are different. Check for False. None means we're using OpenAI's # built-in turn detection defaults. turn_detection_disabled = ( - self._session_properties.audio - and self._session_properties.audio.input - and self._session_properties.audio.input.turn_detection is False + self._settings.session_properties.audio + and self._settings.session_properties.audio.input + and self._settings.session_properties.audio.input.turn_detection is False ) if turn_detection_disabled: await self.send_client_event(events.InputAudioBufferClearEvent()) @@ -327,9 +339,9 @@ class OpenAIRealtimeLLMService(LLMService): # None and False are different. Check for False. None means we're using OpenAI's # built-in turn detection defaults. turn_detection_disabled = ( - self._session_properties.audio - and self._session_properties.audio.input - and self._session_properties.audio.input.turn_detection is False + self._settings.session_properties.audio + and self._settings.session_properties.audio.input + and self._settings.session_properties.audio.input.turn_detection is False ) if turn_detection_disabled: await self.send_client_event(events.InputAudioBufferCommitEvent()) @@ -397,6 +409,16 @@ class OpenAIRealtimeLLMService(LLMService): frame: The frame to process. direction: The direction of frame flow in the pipeline. """ + # Legacy dict path: frame.settings contains SessionProperties fields, + # not our Settings fields, so we construct SessionProperties directly. + # The new typed path (frame.update) falls through to super, which calls + # _update_settings_from_typed → our override handles the rest. + if isinstance(frame, LLMUpdateSettingsFrame) and frame.update is None: + self._settings.session_properties = events.SessionProperties(**frame.settings) + await self._update_settings() + await self.push_frame(frame, direction) + return + await super().process_frame(frame, direction) if isinstance(frame, TranscriptionFrame): @@ -424,9 +446,6 @@ class OpenAIRealtimeLLMService(LLMService): await self._handle_bot_stopped_speaking() elif isinstance(frame, LLMMessagesAppendFrame): await self._handle_messages_append(frame) - elif isinstance(frame, LLMUpdateSettingsFrame): - self._session_properties = events.SessionProperties(**frame.settings) - await self._update_settings() elif isinstance(frame, LLMSetToolsFrame): await self._update_settings() @@ -513,8 +532,15 @@ class OpenAIRealtimeLLMService(LLMService): # treat a send-side error as fatal. await self.push_error(error_msg=f"Error sending client event: {e}", exception=e) + async def _update_settings_from_typed(self, update): + """Apply a typed settings update, sending a session update if needed.""" + changed = await super()._update_settings_from_typed(update) + if "session_properties" in changed: + await self._update_settings() + return changed + async def _update_settings(self): - settings = self._session_properties + settings = self._settings.session_properties adapter: OpenAIRealtimeLLMAdapter = self.get_llm_adapter() if self._context: diff --git a/src/pipecat/services/openai/stt.py b/src/pipecat/services/openai/stt.py index 4dd16be6e..12eada24e 100644 --- a/src/pipecat/services/openai/stt.py +++ b/src/pipecat/services/openai/stt.py @@ -16,6 +16,7 @@ Provides two STT services: import base64 import json +from dataclasses import dataclass, field from typing import AsyncGenerator, Literal, Optional, Union from loguru import logger @@ -34,6 +35,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, STTSettings, is_given from pipecat.services.stt_latency import OPENAI_REALTIME_TTFS_P99, OPENAI_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.services.whisper.base_stt import BaseWhisperSTTService, Transcription @@ -123,6 +125,17 @@ class OpenAISTTService(BaseWhisperSTTService): _OPENAI_SAMPLE_RATE = 24000 +@dataclass +class OpenAIRealtimeSTTSettings(STTSettings): + """Typed settings for the OpenAI Realtime STT service. + + Parameters: + prompt: Optional prompt text to guide transcription style. + """ + + prompt: Optional[str] = field(default_factory=lambda: NOT_GIVEN) + + class OpenAIRealtimeSTTService(WebsocketSTTService): """OpenAI Realtime Speech-to-Text service using WebSocket transcription sessions. @@ -213,12 +226,17 @@ class OpenAIRealtimeSTTService(WebsocketSTTService): self._base_url = base_url self.set_model_name(model) - self._language_code = self._language_to_code(language) if language else None self._prompt = prompt self._turn_detection = turn_detection self._noise_reduction = noise_reduction self._should_interrupt = should_interrupt + self._settings: OpenAIRealtimeSTTSettings = OpenAIRealtimeSTTSettings( + model=model, + language=language, + prompt=prompt, + ) + self._receive_task = None self._session_ready = False self._resampler = create_stream_resampler() @@ -248,19 +266,31 @@ class OpenAIRealtimeSTTService(WebsocketSTTService): """ return True - async def set_language(self, language: Language): - """Set the language for speech recognition. + async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: + """Apply a typed settings update and send session update if needed. - If the session is already active, sends an updated configuration - to the server. + Keeps ``_language_code`` and ``_prompt`` in sync with typed settings + and sends a ``session.update`` to the server when the session is active. Args: - language: The language to use for speech recognition. + update: A :class:`STTSettings` (or ``OpenAIRealtimeSTTSettings``) delta. + + Returns: + Set of field names whose values actually changed. """ - self._language_code = self._language_to_code(language) + changed = await super()._update_settings_from_typed(update) + + if not changed: + return changed + + if "prompt" in changed and isinstance(self._settings, OpenAIRealtimeSTTSettings): + self._prompt = self._settings.prompt + if self._session_ready: await self._send_session_update() + return changed + async def start(self, frame: StartFrame): """Start the service and establish WebSocket connection. @@ -407,8 +437,11 @@ class OpenAIRealtimeSTTService(WebsocketSTTService): """Send ``session.update`` to configure the transcription session.""" transcription: dict = {"model": self.model_name} - if self._language_code: - transcription["language"] = self._language_code + language_code = ( + self._language_to_code(self._settings.language) if self._settings.language else None + ) + if language_code: + transcription["language"] = language_code if self._prompt: transcription["prompt"] = self._prompt diff --git a/src/pipecat/services/openai/tts.py b/src/pipecat/services/openai/tts.py index f59f0b31b..ee1e34316 100644 --- a/src/pipecat/services/openai/tts.py +++ b/src/pipecat/services/openai/tts.py @@ -10,6 +10,7 @@ This module provides integration with OpenAI's text-to-speech API for generating high-quality synthetic speech from text input. """ +from dataclasses import dataclass, field from typing import AsyncGenerator, Dict, Literal, Optional from loguru import logger @@ -24,6 +25,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) +from pipecat.services.settings import NOT_GIVEN, TTSSettings from pipecat.services.tts_service import TTSService from pipecat.utils.tracing.service_decorators import traced_tts @@ -60,6 +62,19 @@ VALID_VOICES: Dict[str, ValidVoice] = { } +@dataclass +class OpenAITTSSettings(TTSSettings): + """Typed settings for OpenAI TTS service. + + Parameters: + instructions: Instructions to guide voice synthesis behavior. + speed: Voice speed control (0.25 to 4.0, default 1.0). + """ + + instructions: str = field(default_factory=lambda: NOT_GIVEN) + speed: float = field(default_factory=lambda: NOT_GIVEN) + + class OpenAITTSService(TTSService): """OpenAI Text-to-Speech service that generates audio from text. @@ -118,7 +133,7 @@ class OpenAITTSService(TTSService): super().__init__(sample_rate=sample_rate, **kwargs) self.set_model_name(model) - self.set_voice(voice) + self._voice_id = voice self._client = AsyncOpenAI(api_key=api_key, base_url=base_url) if instructions or speed: @@ -132,10 +147,12 @@ class OpenAITTSService(TTSService): stacklevel=2, ) - self._settings = { - "instructions": params.instructions if params else instructions, - "speed": params.speed if params else speed, - } + self._settings: OpenAITTSSettings = OpenAITTSSettings( + model=model, + voice=voice, + instructions=params.instructions if params else instructions, + speed=params.speed if params else speed, + ) def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -145,15 +162,6 @@ class OpenAITTSService(TTSService): """ return True - async def set_model(self, model: str): - """Set the TTS model to use. - - Args: - model: The model name to use for text-to-speech synthesis. - """ - logger.info(f"Switching TTS model to: [{model}]") - self.set_model_name(model) - async def start(self, frame: StartFrame): """Start the OpenAI TTS service. @@ -190,11 +198,11 @@ class OpenAITTSService(TTSService): "response_format": "pcm", } - if self._settings["instructions"]: - create_params["instructions"] = self._settings["instructions"] + if self._settings.instructions: + create_params["instructions"] = self._settings.instructions - if self._settings["speed"]: - create_params["speed"] = self._settings["speed"] + if self._settings.speed: + create_params["speed"] = self._settings.speed async with self._client.audio.speech.with_streaming_response.create( **create_params diff --git a/src/pipecat/services/openai_realtime_beta/openai.py b/src/pipecat/services/openai_realtime_beta/openai.py index 1199d8556..d37b1434e 100644 --- a/src/pipecat/services/openai_realtime_beta/openai.py +++ b/src/pipecat/services/openai_realtime_beta/openai.py @@ -10,8 +10,8 @@ import base64 import json import time import warnings -from dataclasses import dataclass -from typing import Optional +from dataclasses import dataclass, field +from typing import Any, Optional from loguru import logger @@ -54,6 +54,7 @@ from pipecat.processors.aggregators.openai_llm_context import ( from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import FunctionCallFromLLM, LLMService from pipecat.services.openai.llm import OpenAIContextAggregatorPair +from pipecat.services.settings import NOT_GIVEN, LLMSettings from pipecat.transcriptions.language import Language from pipecat.utils.time import time_now_iso8601 from pipecat.utils.tracing.service_decorators import traced_openai_realtime, traced_stt @@ -91,6 +92,17 @@ class CurrentAudioResponse: total_size: int = 0 +@dataclass +class OpenAIRealtimeBetaLLMSettings(LLMSettings): + """Typed settings for OpenAI Realtime Beta LLM services. + + Parameters: + session_properties: OpenAI Realtime session configuration. + """ + + session_properties: Any = field(default_factory=lambda: NOT_GIVEN) + + class OpenAIRealtimeBetaLLMService(LLMService): """OpenAI Realtime Beta LLM service providing real-time audio and text communication. @@ -146,8 +158,9 @@ class OpenAIRealtimeBetaLLMService(LLMService): self.base_url = full_url self.set_model_name(model) - self._session_properties: events.SessionProperties = ( - session_properties or events.SessionProperties() + self._settings = OpenAIRealtimeBetaLLMSettings( + model=model, + session_properties=session_properties or events.SessionProperties(), ) self._audio_input_paused = start_audio_paused self._send_transcription_frames = send_transcription_frames @@ -187,12 +200,12 @@ class OpenAIRealtimeBetaLLMService(LLMService): def _is_modality_enabled(self, modality: str) -> bool: """Check if a specific modality is enabled, "text" or "audio".""" - modalities = self._session_properties.modalities or ["audio", "text"] + modalities = self._settings.session_properties.modalities or ["audio", "text"] return modality in modalities def _get_enabled_modalities(self) -> list[str]: """Get the list of enabled modalities.""" - return self._session_properties.modalities or ["audio", "text"] + return self._settings.session_properties.modalities or ["audio", "text"] async def retrieve_conversation_item(self, item_id: str): """Retrieve a conversation item by ID from the server. @@ -259,7 +272,7 @@ class OpenAIRealtimeBetaLLMService(LLMService): async def _handle_interruption(self): # None and False are different. Check for False. None means we're using OpenAI's # built-in turn detection defaults. - if self._session_properties.turn_detection is False: + if self._settings.session_properties.turn_detection is False: await self.send_client_event(events.InputAudioBufferClearEvent()) await self.send_client_event(events.ResponseCancelEvent()) await self._truncate_current_audio_response() @@ -276,7 +289,7 @@ class OpenAIRealtimeBetaLLMService(LLMService): async def _handle_user_stopped_speaking(self, frame): # None and False are different. Check for False. None means we're using OpenAI's # built-in turn detection defaults. - if self._session_properties.turn_detection is False: + if self._settings.session_properties.turn_detection is False: await self.send_client_event(events.InputAudioBufferCommitEvent()) await self.send_client_event(events.ResponseCreateEvent()) @@ -342,6 +355,16 @@ class OpenAIRealtimeBetaLLMService(LLMService): frame: The frame to process. direction: The direction of frame flow in the pipeline. """ + # Legacy dict path: frame.settings contains SessionProperties fields, + # not our Settings fields, so we construct SessionProperties directly. + # The new typed path (frame.update) falls through to super, which calls + # _update_settings_from_typed → our override handles the rest. + if isinstance(frame, LLMUpdateSettingsFrame) and frame.update is None: + self._settings.session_properties = events.SessionProperties(**frame.settings) + await self._update_settings() + await self.push_frame(frame, direction) + return + await super().process_frame(frame, direction) if isinstance(frame, TranscriptionFrame): @@ -377,9 +400,6 @@ class OpenAIRealtimeBetaLLMService(LLMService): await self._handle_messages_append(frame) elif isinstance(frame, RealtimeMessagesUpdateFrame): self._context = frame.context - elif isinstance(frame, LLMUpdateSettingsFrame): - self._session_properties = events.SessionProperties(**frame.settings) - await self._update_settings() elif isinstance(frame, LLMSetToolsFrame): await self._update_settings() elif isinstance(frame, RealtimeFunctionCallResultFrame): @@ -456,8 +476,15 @@ class OpenAIRealtimeBetaLLMService(LLMService): # treat a send-side error as fatal. await self.push_error(error_msg=f"Error sending client event: {e}", exception=e) + async def _update_settings_from_typed(self, update): + """Apply a typed settings update, sending a session update if needed.""" + changed = await super()._update_settings_from_typed(update) + if "session_properties" in changed: + await self._update_settings() + return changed + async def _update_settings(self): - settings = self._session_properties + settings = self._settings.session_properties # tools given in the context override the tools in the session properties if self._context and self._context.tools: settings.tools = self._context.tools diff --git a/src/pipecat/services/perplexity/llm.py b/src/pipecat/services/perplexity/llm.py index 4ea23aa82..d2dd40a57 100644 --- a/src/pipecat/services/perplexity/llm.py +++ b/src/pipecat/services/perplexity/llm.py @@ -72,16 +72,16 @@ class PerplexityLLMService(OpenAILLMService): } # Add OpenAI-compatible parameters if they're set - if self._settings["frequency_penalty"] is not NOT_GIVEN: - params["frequency_penalty"] = self._settings["frequency_penalty"] - if self._settings["presence_penalty"] is not NOT_GIVEN: - params["presence_penalty"] = self._settings["presence_penalty"] - if self._settings["temperature"] is not NOT_GIVEN: - params["temperature"] = self._settings["temperature"] - if self._settings["top_p"] is not NOT_GIVEN: - params["top_p"] = self._settings["top_p"] - if self._settings["max_tokens"] is not NOT_GIVEN: - params["max_tokens"] = self._settings["max_tokens"] + if self._settings.frequency_penalty is not NOT_GIVEN: + params["frequency_penalty"] = self._settings.frequency_penalty + if self._settings.presence_penalty is not NOT_GIVEN: + params["presence_penalty"] = self._settings.presence_penalty + if self._settings.temperature is not NOT_GIVEN: + params["temperature"] = self._settings.temperature + if self._settings.top_p is not NOT_GIVEN: + params["top_p"] = self._settings.top_p + if self._settings.max_tokens is not NOT_GIVEN: + params["max_tokens"] = self._settings.max_tokens return params diff --git a/src/pipecat/services/playht/tts.py b/src/pipecat/services/playht/tts.py index 2d4cd0427..f79f54560 100644 --- a/src/pipecat/services/playht/tts.py +++ b/src/pipecat/services/playht/tts.py @@ -14,6 +14,7 @@ import io import json import struct import warnings +from dataclasses import dataclass, field from typing import AsyncGenerator, Optional import aiohttp @@ -32,6 +33,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, TTSSettings, is_given from pipecat.services.tts_service import InterruptibleTTSService, TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -97,6 +99,25 @@ def language_to_playht_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=False) +@dataclass +class PlayHTTTSSettings(TTSSettings): + """Typed settings for PlayHT TTS services. + + Parameters: + output_format: Audio output format. + voice_engine: Voice engine to use. + speed: Speech speed multiplier. Defaults to 1.0. + seed: Random seed for voice consistency. + playht_sample_rate: Audio sample rate sent to the API. + """ + + output_format: str = field(default_factory=lambda: NOT_GIVEN) + voice_engine: str = field(default_factory=lambda: NOT_GIVEN) + speed: float = field(default_factory=lambda: NOT_GIVEN) + seed: int = field(default_factory=lambda: NOT_GIVEN) + playht_sample_rate: int = field(default_factory=lambda: NOT_GIVEN) + + class PlayHTTTSService(InterruptibleTTSService): """PlayHT WebSocket-based text-to-speech service. @@ -170,17 +191,19 @@ class PlayHTTTSService(InterruptibleTTSService): self._receive_task = None self._context_id = None - self._settings = { - "language": self.language_to_service_language(params.language) + self._settings: PlayHTTTSSettings = PlayHTTTSSettings( + model=voice_engine, + voice=voice_url, + language=self.language_to_service_language(params.language) if params.language else "english", - "output_format": output_format, - "voice_engine": voice_engine, - "speed": params.speed, - "seed": params.seed, - } + output_format=output_format, + voice_engine=voice_engine, + speed=params.speed, + seed=params.seed, + ) self.set_model_name(voice_engine) - self.set_voice(voice_url) + self._voice_id = voice_url def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -304,13 +327,13 @@ class PlayHTTTSService(InterruptibleTTSService): # Handle the new response format with multiple URLs if "websocket_urls" in data: # Select URL based on voice_engine - if self._settings["voice_engine"] in data["websocket_urls"]: + if self._settings.voice_engine in data["websocket_urls"]: self._websocket_url = data["websocket_urls"][ - self._settings["voice_engine"] + self._settings.voice_engine ] else: raise ValueError( - f"Unsupported voice engine: {self._settings['voice_engine']}" + f"Unsupported voice engine: {self._settings.voice_engine}" ) else: raise ValueError("Invalid response: missing websocket_urls") @@ -382,12 +405,12 @@ class PlayHTTTSService(InterruptibleTTSService): tts_command = { "text": text, "voice": self._voice_id, - "voice_engine": self._settings["voice_engine"], - "output_format": self._settings["output_format"], + "voice_engine": self._settings.voice_engine, + "output_format": self._settings.output_format, "sample_rate": self.sample_rate, - "language": self._settings["language"], - "speed": self._settings["speed"], - "seed": self._settings["seed"], + "language": self._settings.language, + "speed": self._settings.speed, + "seed": self._settings.seed, "request_id": self._context_id, } @@ -499,17 +522,18 @@ class PlayHTHttpTTSService(TTSService): # Extract the base engine name voice_engine = voice_engine.replace("-ws", "") - self._settings = { - "language": self.language_to_service_language(params.language) + self._settings: PlayHTTTSSettings = PlayHTTTSSettings( + voice=voice_url, + language=self.language_to_service_language(params.language) if params.language else "english", - "output_format": output_format, - "voice_engine": voice_engine, - "speed": params.speed, - "seed": params.seed, - } + output_format=output_format, + voice_engine=voice_engine, + speed=params.speed, + seed=params.seed, + ) self.set_model_name(voice_engine) - self.set_voice(voice_url) + self._voice_id = voice_url async def start(self, frame: StartFrame): """Start the PlayHT HTTP TTS service. @@ -518,7 +542,7 @@ class PlayHTHttpTTSService(TTSService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings["sample_rate"] = self.sample_rate + self._settings.playht_sample_rate = self.sample_rate def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -559,17 +583,17 @@ class PlayHTHttpTTSService(TTSService): payload = { "text": text, "voice": self._voice_id, - "voice_engine": self._settings["voice_engine"], - "output_format": self._settings["output_format"], + "voice_engine": self._settings.voice_engine, + "output_format": self._settings.output_format, "sample_rate": self.sample_rate, - "language": self._settings["language"], + "language": self._settings.language, } # Add optional parameters if they exist - if self._settings["speed"] is not None: - payload["speed"] = self._settings["speed"] - if self._settings["seed"] is not None: - payload["seed"] = self._settings["seed"] + if self._settings.speed is not None: + payload["speed"] = self._settings.speed + if self._settings.seed is not None: + payload["seed"] = self._settings.seed headers = { "Authorization": f"Bearer {self._api_key}", diff --git a/src/pipecat/services/resembleai/tts.py b/src/pipecat/services/resembleai/tts.py index 964b9fa18..08f9b81bd 100644 --- a/src/pipecat/services/resembleai/tts.py +++ b/src/pipecat/services/resembleai/tts.py @@ -8,6 +8,7 @@ import base64 import json +from dataclasses import dataclass, field from typing import AsyncGenerator, Optional from loguru import logger @@ -24,6 +25,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, TTSSettings from pipecat.services.tts_service import AudioContextWordTTSService from pipecat.transcriptions.language import Language from pipecat.utils.text.base_text_aggregator import BaseTextAggregator @@ -38,6 +40,21 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class ResembleAITTSSettings(TTSSettings): + """Typed settings for Resemble AI TTS service. + + Parameters: + precision: PCM bit depth (PCM_32, PCM_24, PCM_16, or MULAW). + output_format: Audio format (wav or mp3). + resemble_sample_rate: Audio sample rate sent to the API. + """ + + precision: str = field(default_factory=lambda: NOT_GIVEN) + output_format: str = field(default_factory=lambda: NOT_GIVEN) + resemble_sample_rate: int = field(default_factory=lambda: NOT_GIVEN) + + class ResembleAITTSService(AudioContextWordTTSService): """Resemble AI TTS service with WebSocket streaming and word timestamps. @@ -76,11 +93,12 @@ class ResembleAITTSService(AudioContextWordTTSService): self._api_key = api_key self._voice_id = voice_id self._url = url - self._settings = { - "precision": precision, - "output_format": output_format, - "sample_rate": sample_rate, - } + self._settings: ResembleAITTSSettings = ResembleAITTSSettings( + voice=voice_id, + precision=precision, + output_format=output_format, + resemble_sample_rate=sample_rate, + ) self._websocket = None self._request_id_counter = 0 @@ -101,7 +119,7 @@ class ResembleAITTSService(AudioContextWordTTSService): self._jitter_buffer_bytes = 44100 # ~1000ms at 22050Hz to handle 400ms+ network gaps self._playback_started: dict[str, bool] = {} # Track if we've started playback per request - self.set_voice(voice_id) + self._voice_id = voice_id def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -125,9 +143,9 @@ class ResembleAITTSService(AudioContextWordTTSService): "data": text, "binary_response": False, # Use JSON frames to get timestamps "request_id": self._request_id_counter, # ResembleAI only accepts number - "output_format": self._settings["output_format"], - "sample_rate": self._settings["sample_rate"], - "precision": self._settings["precision"], + "output_format": self._settings.output_format, + "sample_rate": self._settings.resemble_sample_rate, + "precision": self._settings.precision, "no_audio_header": True, } @@ -141,7 +159,7 @@ class ResembleAITTSService(AudioContextWordTTSService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings["sample_rate"] = self.sample_rate + self._settings.resemble_sample_rate = self.sample_rate await self._connect() async def stop(self, frame: EndFrame): diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index e38e840e6..5a3ed67a2 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -12,7 +12,8 @@ using Rime's API for streaming and batch audio synthesis. import base64 import json -from typing import Any, AsyncGenerator, Mapping, Optional +from dataclasses import dataclass, field +from typing import AsyncGenerator, Optional import aiohttp from loguru import logger @@ -30,6 +31,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, TTSSettings, is_given from pipecat.services.tts_service import ( AudioContextWordTTSService, InterruptibleTTSService, @@ -68,6 +70,62 @@ def language_to_rime_language(language: Language) -> str: return resolve_language(language, LANGUAGE_MAP, use_base_code=False) +@dataclass +class RimeTTSSettings(TTSSettings): + """Typed settings for Rime WS JSON and HTTP TTS services. + + Parameters: + speaker: Voice speaker ID. + modelId: Rime model identifier. + audioFormat: Audio output format. + samplingRate: Audio sample rate. + lang: Rime language code. + speedAlpha: Speech speed multiplier. Defaults to 1.0. + reduceLatency: Whether to reduce latency at potential quality cost. + pauseBetweenBrackets: Whether to add pauses between bracketed content. + phonemizeBetweenBrackets: Whether to phonemize bracketed content. + inlineSpeedAlpha: Inline speed control markup. + """ + + speaker: str = field(default_factory=lambda: NOT_GIVEN) + modelId: str = field(default_factory=lambda: NOT_GIVEN) + audioFormat: str = field(default_factory=lambda: NOT_GIVEN) + samplingRate: int = field(default_factory=lambda: NOT_GIVEN) + lang: str = field(default_factory=lambda: NOT_GIVEN) + speedAlpha: float = field(default_factory=lambda: NOT_GIVEN) + reduceLatency: bool = field(default_factory=lambda: NOT_GIVEN) + pauseBetweenBrackets: bool = field(default_factory=lambda: NOT_GIVEN) + phonemizeBetweenBrackets: bool = field(default_factory=lambda: NOT_GIVEN) + inlineSpeedAlpha: str = field(default_factory=lambda: NOT_GIVEN) + + +@dataclass +class RimeNonJsonTTSSettings(TTSSettings): + """Typed settings for Rime non-JSON WS TTS service. + + Parameters: + speaker: Voice speaker ID. + modelId: Rime model identifier. + audioFormat: Audio output format. + samplingRate: Audio sample rate. + lang: Rime language code. + segment: Text segmentation mode ("immediate", "bySentence", "never"). + repetition_penalty: Token repetition penalty (1.0-2.0). + temperature: Sampling temperature (0.0-1.0). + top_p: Cumulative probability threshold (0.0-1.0). + """ + + speaker: str = field(default_factory=lambda: NOT_GIVEN) + modelId: str = field(default_factory=lambda: NOT_GIVEN) + audioFormat: str = field(default_factory=lambda: NOT_GIVEN) + samplingRate: int = field(default_factory=lambda: NOT_GIVEN) + lang: str = field(default_factory=lambda: NOT_GIVEN) + segment: str = field(default_factory=lambda: NOT_GIVEN) + repetition_penalty: float = field(default_factory=lambda: NOT_GIVEN) + temperature: float = field(default_factory=lambda: NOT_GIVEN) + top_p: float = field(default_factory=lambda: NOT_GIVEN) + + class RimeTTSService(AudioContextWordTTSService): """Text-to-Speech service using Rime's websocket API. @@ -149,19 +207,17 @@ class RimeTTSService(AudioContextWordTTSService): self._url = url self._voice_id = voice_id self._model = model - self._settings = { - "speaker": voice_id, - "modelId": model, - "audioFormat": "pcm", - "samplingRate": 0, - "lang": self.language_to_service_language(params.language) - if params.language - else "eng", - "speedAlpha": params.speed_alpha, - "reduceLatency": params.reduce_latency, - "pauseBetweenBrackets": json.dumps(params.pause_between_brackets), - "phonemizeBetweenBrackets": json.dumps(params.phonemize_between_brackets), - } + self._settings: RimeTTSSettings = RimeTTSSettings( + speaker=voice_id, + modelId=model, + audioFormat="pcm", + samplingRate=0, + lang=self.language_to_service_language(params.language) if params.language else "eng", + speedAlpha=params.speed_alpha, + reduceLatency=params.reduce_latency, + pauseBetweenBrackets=json.dumps(params.pause_between_brackets), + phonemizeBetweenBrackets=json.dumps(params.phonemize_between_brackets), + ) # State tracking self._context_id = None # Tracks current turn @@ -188,15 +244,6 @@ class RimeTTSService(AudioContextWordTTSService): """ return language_to_rime_language(language) - async def set_model(self, model: str): - """Update the TTS model. - - Args: - model: The model name to use for synthesis. - """ - self._model = model - await super().set_model(model) - # A set of Rime-specific helpers for text transformations def SPELL(text: str) -> str: """Wrap text in Rime spell function.""" @@ -222,15 +269,15 @@ class RimeTTSService(AudioContextWordTTSService): self._extra_msg_fields["inlineSpeedAlpha"] = ",".join(speed_vals + [str(speed)]) return f"[{text}]" - async def _update_settings(self, settings: Mapping[str, Any]): - """Update service settings and reconnect if voice changed.""" + async def _update_settings_from_typed(self, update: TTSSettings) -> set[str]: + """Apply a typed settings update and reconnect if voice changed.""" prev_voice = self._voice_id - await super()._update_settings(settings) - if not prev_voice == self._voice_id: - self._settings["speaker"] = self._voice_id - logger.info(f"Switching TTS voice to: [{self._voice_id}]") + changed = await super()._update_settings_from_typed(update) + if "voice" in changed: + self._settings.speaker = self._voice_id await self._disconnect() await self._connect() + return changed def _build_msg(self, text: str = "") -> dict: """Build JSON message for Rime API.""" @@ -255,7 +302,7 @@ class RimeTTSService(AudioContextWordTTSService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings["samplingRate"] = self.sample_rate + self._settings.samplingRate = self.sample_rate await self._connect() async def stop(self, frame: EndFrame): @@ -301,7 +348,20 @@ class RimeTTSService(AudioContextWordTTSService): if self._websocket and self._websocket.state is State.OPEN: return - params = "&".join(f"{k}={v}" for k, v in self._settings.items()) + params = "&".join( + f"{k}={v}" + for k, v in { + "speaker": self._settings.speaker, + "modelId": self._settings.modelId, + "audioFormat": self._settings.audioFormat, + "samplingRate": self._settings.samplingRate, + "lang": self._settings.lang, + "speedAlpha": self._settings.speedAlpha, + "reduceLatency": self._settings.reduceLatency, + "pauseBetweenBrackets": self._settings.pauseBetweenBrackets, + "phonemizeBetweenBrackets": self._settings.phonemizeBetweenBrackets, + }.items() + ) url = f"{self._url}?{params}" headers = {"Authorization": f"Bearer {self._api_key}"} self._websocket = await websocket_connect(url, additional_headers=headers) @@ -525,21 +585,17 @@ class RimeHttpTTSService(TTSService): self._api_key = api_key self._session = aiohttp_session self._base_url = "https://users.rime.ai/v1/rime-tts" - self._settings = { - "lang": self.language_to_service_language(params.language) - if params.language - else "eng", - "speedAlpha": params.speed_alpha, - "reduceLatency": params.reduce_latency, - "pauseBetweenBrackets": params.pause_between_brackets, - "phonemizeBetweenBrackets": params.phonemize_between_brackets, - } - self.set_voice(voice_id) + self._settings: RimeTTSSettings = RimeTTSSettings( + lang=self.language_to_service_language(params.language) if params.language else "eng", + speedAlpha=params.speed_alpha, + reduceLatency=params.reduce_latency, + pauseBetweenBrackets=params.pause_between_brackets, + phonemizeBetweenBrackets=params.phonemize_between_brackets, + inlineSpeedAlpha=params.inline_speed_alpha if params.inline_speed_alpha else NOT_GIVEN, + ) + self._voice_id = voice_id self.set_model_name(model) - if params.inline_speed_alpha: - self._settings["inlineSpeedAlpha"] = params.inline_speed_alpha - def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -578,7 +634,15 @@ class RimeHttpTTSService(TTSService): "Content-Type": "application/json", } - payload = self._settings.copy() + payload = { + "lang": self._settings.lang, + "speedAlpha": self._settings.speedAlpha, + "reduceLatency": self._settings.reduceLatency, + "pauseBetweenBrackets": self._settings.pauseBetweenBrackets, + "phonemizeBetweenBrackets": self._settings.phonemizeBetweenBrackets, + } + if is_given(self._settings.inlineSpeedAlpha): + payload["inlineSpeedAlpha"] = self._settings.inlineSpeedAlpha payload["text"] = text payload["speaker"] = self._voice_id payload["modelId"] = self._model_name @@ -699,26 +763,24 @@ class RimeNonJsonTTSService(InterruptibleTTSService): self._url = url self._voice_id = voice_id self._model = model - self._settings = { - "speaker": voice_id, - "modelId": model, - "audioFormat": audio_format, - "samplingRate": sample_rate, - } - - if params.language: - self._settings["lang"] = self.language_to_service_language(params.language) - if params.segment is not None: - self._settings["segment"] = params.segment - if params.repetition_penalty is not None: - self._settings["repetition_penalty"] = params.repetition_penalty - if params.temperature is not None: - self._settings["temperature"] = params.temperature - if params.top_p is not None: - self._settings["top_p"] = params.top_p + self._settings: RimeNonJsonTTSSettings = RimeNonJsonTTSSettings( + speaker=voice_id, + modelId=model, + audioFormat=audio_format, + samplingRate=sample_rate, + lang=self.language_to_service_language(params.language) + if params.language + else NOT_GIVEN, + segment=params.segment if params.segment is not None else NOT_GIVEN, + repetition_penalty=params.repetition_penalty + if params.repetition_penalty is not None + else NOT_GIVEN, + temperature=params.temperature if params.temperature is not None else NOT_GIVEN, + top_p=params.top_p if params.top_p is not None else NOT_GIVEN, + ) # Add any extra parameters for future compatibility if params.extra: - self._settings.update(params.extra) + self._settings.extra.update(params.extra) self._receive_task = None self._context_id: Optional[str] = None @@ -750,7 +812,7 @@ class RimeNonJsonTTSService(InterruptibleTTSService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings["samplingRate"] = self.sample_rate + self._settings.samplingRate = self.sample_rate await self._connect() async def stop(self, frame: EndFrame): @@ -794,8 +856,26 @@ class RimeNonJsonTTSService(InterruptibleTTSService): try: if self._websocket and self._websocket.state is State.OPEN: return - # Build URL with query parameters (only non-None values) - params = "&".join(f"{k}={v}" for k, v in self._settings.items() if v is not None) + # Build URL with query parameters (only given, non-None values) + settings_dict = { + "speaker": self._settings.speaker, + "modelId": self._settings.modelId, + "audioFormat": self._settings.audioFormat, + "samplingRate": self._settings.samplingRate, + } + if is_given(self._settings.lang): + settings_dict["lang"] = self._settings.lang + if is_given(self._settings.segment): + settings_dict["segment"] = self._settings.segment + if is_given(self._settings.repetition_penalty): + settings_dict["repetition_penalty"] = self._settings.repetition_penalty + if is_given(self._settings.temperature): + settings_dict["temperature"] = self._settings.temperature + if is_given(self._settings.top_p): + settings_dict["top_p"] = self._settings.top_p + # Include extras + settings_dict.update(self._settings.extra) + params = "&".join(f"{k}={v}" for k, v in settings_dict.items() if v is not None) url = f"{self._url}?{params}" headers = {"Authorization": f"Bearer {self._api_key}"} self._websocket = await websocket_connect( @@ -889,68 +969,23 @@ class RimeNonJsonTTSService(InterruptibleTTSService): except Exception as e: yield ErrorFrame(error=f"Unknown error occurred: {e}") - async def _update_settings(self, settings: Mapping[str, Any]): - """Update service settings and reconnect if necessary. + async def _update_settings_from_typed(self, update: TTSSettings) -> set[str]: + """Apply a typed settings update and reconnect if necessary. Since all settings are WebSocket URL query parameters, any setting change requires reconnecting to apply the new values. """ - needs_reconnect = False + changed = await super()._update_settings_from_typed(update) - # Track previous values from self._settings only - prev_settings = self._settings.copy() + # Sync voice and model to settings dict fields + if "voice" in changed: + self._settings.speaker = self._voice_id + if "model" in changed: + self._settings.modelId = self._model_name - # Let parent class handle standard settings (voice, model, language) - await super()._update_settings(settings) - - # Check if voice changed and update settings dict - if "voice" in settings or "voice_id" in settings: - self._settings["speaker"] = self._voice_id - if prev_settings.get("speaker") != self._voice_id: - logger.info(f"Switching TTS voice to: [{self._voice_id}]") - needs_reconnect = True - - # Check if model changed and update settings dict - if "model" in settings: - self._settings["modelId"] = self._model - if prev_settings.get("modelId") != self._model: - logger.info(f"Switching TTS model to: [{self._model}]") - needs_reconnect = True - - # Handle language explicitly - if "language" in settings: - new_lang = self.language_to_service_language(settings["language"]) - if new_lang and new_lang != prev_settings.get("lang"): - logger.info(f"Updating language to: [{new_lang}]") - self._settings["lang"] = new_lang - needs_reconnect = True - - # Check other parameters - for key in ["segment", "repetition_penalty", "temperature", "top_p"]: - if key in settings and settings[key] != prev_settings.get(key): - logger.info(f"Updating {key} to: [{settings[key]}]") - self._settings[key] = settings[key] - needs_reconnect = True - - # Handle extra parameters - for key, value in settings.items(): - if key not in [ - "voice", - "voice_id", - "model", - "language", - "segment", - "repetition_penalty", - "temperature", - "top_p", - ]: - if value != prev_settings.get(key): - logger.info(f"Updating extra parameter {key} to: [{value}]") - self._settings[key] = value - needs_reconnect = True - - # Reconnect if any setting changed - if needs_reconnect: + if changed: logger.debug("Settings changed, reconnecting WebSocket with new parameters") await self._disconnect() await self._connect() + + return changed diff --git a/src/pipecat/services/sambanova/llm.py b/src/pipecat/services/sambanova/llm.py index 047ce0e6c..99c7bca2c 100644 --- a/src/pipecat/services/sambanova/llm.py +++ b/src/pipecat/services/sambanova/llm.py @@ -87,16 +87,16 @@ class SambaNovaLLMService(OpenAILLMService): # type: ignore "model": self.model_name, "stream": True, "stream_options": {"include_usage": True}, - "temperature": self._settings["temperature"], - "top_p": self._settings["top_p"], - "max_tokens": self._settings["max_tokens"], - "max_completion_tokens": self._settings["max_completion_tokens"], + "temperature": self._settings.temperature, + "top_p": self._settings.top_p, + "max_tokens": self._settings.max_tokens, + "max_completion_tokens": self._settings.max_completion_tokens, } # Messages, tools, tool_choice params.update(params_from_context) - params.update(self._settings["extra"]) + params.update(self._settings.extra) return params @traced_llm # type: ignore diff --git a/src/pipecat/services/sarvam/stt.py b/src/pipecat/services/sarvam/stt.py index 998597956..e2bc6a08f 100644 --- a/src/pipecat/services/sarvam/stt.py +++ b/src/pipecat/services/sarvam/stt.py @@ -12,7 +12,7 @@ can handle multiple audio formats for Indian language speech recognition. """ import base64 -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import AsyncGenerator, Dict, Literal, Optional from loguru import logger @@ -32,6 +32,7 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.sarvam._sdk import sdk_headers +from pipecat.services.settings import NOT_GIVEN, STTSettings, is_given from pipecat.services.stt_latency import SARVAM_TTFS_P99 from pipecat.services.stt_service import STTService from pipecat.transcriptions.language import Language, resolve_language @@ -130,6 +131,23 @@ MODEL_CONFIGS: Dict[str, ModelConfig] = { } +@dataclass +class SarvamSTTSettings(STTSettings): + """Typed settings for the Sarvam STT service. + + Parameters: + prompt: Optional prompt to guide transcription/translation style. + mode: Mode of operation (transcribe, translate, verbatim, etc.). + vad_signals: Enable VAD signals in response. + high_vad_sensitivity: Enable high VAD sensitivity. + """ + + prompt: Optional[str] = field(default_factory=lambda: NOT_GIVEN) + mode: Optional[str] = field(default_factory=lambda: NOT_GIVEN) + vad_signals: Optional[bool] = field(default_factory=lambda: NOT_GIVEN) + high_vad_sensitivity: Optional[bool] = field(default_factory=lambda: NOT_GIVEN) + + class SarvamSTTService(STTService): """Sarvam speech-to-text service. @@ -207,22 +225,8 @@ class SarvamSTTService(STTService): self.set_model_name(model) self._api_key = api_key - self._language_code: Optional[Language] = params.language - - # Set language string: use provided language or model's default - if params.language: - self._language_string = language_to_sarvam_language(params.language) - else: - self._language_string = self._config.default_language - - self._prompt = params.prompt - - # Set mode: use provided mode or model's default - self._mode = params.mode if params.mode is not None else self._config.default_mode # Store connection parameters - self._vad_signals = params.vad_signals - self._high_vad_sensitivity = params.high_vad_sensitivity self._input_audio_codec = input_audio_codec # Initialize Sarvam SDK client @@ -240,7 +244,19 @@ class SarvamSTTService(STTService): self._socket_client = None self._receive_task = None - if self._vad_signals: + # Resolve mode default from model config + mode = params.mode if params.mode is not None else self._config.default_mode + + self._settings: SarvamSTTSettings = SarvamSTTSettings( + model=model, + language=params.language, + prompt=params.prompt if params.prompt is not None else NOT_GIVEN, + mode=mode if mode is not None else NOT_GIVEN, + vad_signals=params.vad_signals, + high_vad_sensitivity=params.high_vad_sensitivity, + ) + + if params.vad_signals: self._register_event_handler("on_speech_started") self._register_event_handler("on_speech_stopped") self._register_event_handler("on_utterance_end") @@ -258,6 +274,12 @@ class SarvamSTTService(STTService): """ return language_to_sarvam_language(language) + def _get_language_string(self) -> Optional[str]: + """Resolve the current language setting to a Sarvam language code string.""" + if self._settings.language: + return language_to_sarvam_language(self._settings.language) + return self._config.default_language + def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -275,42 +297,74 @@ class SarvamSTTService(STTService): await super().process_frame(frame, direction) # Only handle VAD frames when not using Sarvam's VAD signals - if not self._vad_signals: + if not self._settings.vad_signals: if isinstance(frame, VADUserStartedSpeakingFrame): await self._start_metrics() elif isinstance(frame, VADUserStoppedSpeakingFrame): if self._socket_client: await self._socket_client.flush() - async def set_language(self, language: Language): - """Set the recognition language and reconnect. + async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: + """Apply a typed settings update, validate, sync state, and reconnect. Args: - language: The language to use for speech recognition. + update: A :class:`STTSettings` (or ``SarvamSTTSettings``) delta. + + Returns: + Set of field names whose values actually changed. Raises: - ValueError: If called on a model that auto-detects language. + ValueError: If a setting is not supported by the current model. """ - if not self._config.supports_language: - raise ValueError( - f"Model '{self.model_name}' does not support language parameter " - "(auto-detects language)." - ) + # Validate against model capabilities before applying + if is_given(update.language) and update.language is not None: + if not self._config.supports_language: + raise ValueError( + f"Model '{self.model_name}' does not support language parameter " + "(auto-detects language)." + ) + + if isinstance(update, SarvamSTTSettings): + if is_given(update.prompt) and update.prompt is not None: + if not self._config.supports_prompt: + raise ValueError( + f"Model '{self.model_name}' does not support prompt parameter." + ) + if is_given(update.mode) and update.mode is not None: + if not self._config.supports_mode: + raise ValueError(f"Model '{self.model_name}' does not support mode parameter.") + + changed = await super()._update_settings_from_typed(update) + + if not changed: + return changed - logger.info(f"Switching STT language to: [{language}]") - self._language_code = language - self._language_string = language_to_sarvam_language(language) await self._disconnect() await self._connect() + return changed async def set_prompt(self, prompt: Optional[str]): """Set the transcription/translation prompt and reconnect. + .. deprecated:: + Use ``STTUpdateSettingsFrame(SarvamSTTSettings(prompt=...))`` instead. + Args: prompt: Prompt text to guide transcription/translation style/context. Pass None to clear/disable prompt. Only applicable to models that support prompts. """ + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + f"{self.__class__.__name__}.set_prompt() is deprecated. " + "Use STTUpdateSettingsFrame(SarvamSTTSettings(prompt=...)) instead.", + DeprecationWarning, + stacklevel=2, + ) + if not self._config.supports_prompt: if prompt is not None: raise ValueError(f"Model '{self.model_name}' does not support prompt parameter.") @@ -318,7 +372,7 @@ class SarvamSTTService(STTService): return logger.info(f"Updating {self.model_name} prompt.") - self._prompt = prompt + self._settings.prompt = prompt await self._disconnect() await self._connect() @@ -405,24 +459,25 @@ class SarvamSTTService(STTService): # Enable flush signal when using Pipecat's VAD (not Sarvam's) so that # the flush() call on user-stopped-speaking is honored by the server. - if not self._vad_signals: + if not self._settings.vad_signals: connect_kwargs["flush_signal"] = "true" # Only send vad parameters when explicitly set (avoid overriding server defaults) - if self._vad_signals is not None: - connect_kwargs["vad_signals"] = "true" if self._vad_signals else "false" - if self._high_vad_sensitivity is not None: + if self._settings.vad_signals is not None: + connect_kwargs["vad_signals"] = "true" if self._settings.vad_signals else "false" + if self._settings.high_vad_sensitivity is not None: connect_kwargs["high_vad_sensitivity"] = ( - "true" if self._high_vad_sensitivity else "false" + "true" if self._settings.high_vad_sensitivity else "false" ) # Add language_code for models that support it - if self._language_string is not None: - connect_kwargs["language_code"] = self._language_string + language_string = self._get_language_string() + if language_string is not None: + connect_kwargs["language_code"] = language_string # Add mode for models that support it - if self._config.supports_mode and self._mode is not None: - connect_kwargs["mode"] = self._mode + if self._config.supports_mode and is_given(self._settings.mode): + connect_kwargs["mode"] = self._settings.mode def _connect_with_sdk_headers(connect_fn, **kwargs): # Different SDK versions may use different kwarg names. @@ -449,8 +504,8 @@ class SarvamSTTService(STTService): self._socket_client = await self._websocket_context.__aenter__() # Set prompt if provided (only for models that support prompts) - if self._prompt is not None and self._config.supports_prompt: - await self._socket_client.set_prompt(self._prompt) + if is_given(self._settings.prompt) and self._config.supports_prompt: + await self._socket_client.set_prompt(self._settings.prompt) # Register event handler for incoming messages def _message_handler(message): @@ -544,10 +599,12 @@ class SarvamSTTService(STTService): # Prefer language from message (auto-detected for translate models). Fallback to configured. if language_code: language = self._map_language_code_to_enum(language_code) - elif self._language_string: - language = self._map_language_code_to_enum(self._language_string) else: - language = Language.HI_IN + language_string = self._get_language_string() + if language_string: + language = self._map_language_code_to_enum(language_string) + else: + language = Language.HI_IN # Emit utterance end event await self._call_event_handler("on_utterance_end") diff --git a/src/pipecat/services/sarvam/tts.py b/src/pipecat/services/sarvam/tts.py index 753293c75..e28914b4c 100644 --- a/src/pipecat/services/sarvam/tts.py +++ b/src/pipecat/services/sarvam/tts.py @@ -40,9 +40,9 @@ See https://docs.sarvam.ai/api-reference-docs/text-to-speech/stream for full API import asyncio import base64 import json -from dataclasses import dataclass +from dataclasses import dataclass, field from enum import Enum -from typing import Any, AsyncGenerator, Dict, List, Mapping, Optional, Tuple +from typing import AsyncGenerator, Dict, List, Optional, Tuple import aiohttp from loguru import logger @@ -62,6 +62,7 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.sarvam._sdk import sdk_headers +from pipecat.services.settings import NOT_GIVEN, TTSSettings, is_given from pipecat.services.tts_service import InterruptibleTTSService, TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -244,6 +245,80 @@ def language_to_sarvam_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=False) +@dataclass +class SarvamHttpTTSSettings(TTSSettings): + """Typed settings for Sarvam HTTP TTS service. + + Parameters: + language: Sarvam language code. + enable_preprocessing: Whether to enable text preprocessing. Defaults to False. + **Note:** Always enabled for bulbul:v3-beta (cannot be disabled). + pace: Speech pace multiplier. Defaults to 1.0. + - bulbul:v2: Range 0.3 to 3.0 + - bulbul:v3-beta: Range 0.5 to 2.0 + pitch: Voice pitch adjustment (-0.75 to 0.75). Defaults to 0.0. + **Note:** Only supported for bulbul:v2. Ignored for v3 models. + loudness: Volume multiplier (0.3 to 3.0). Defaults to 1.0. + **Note:** Only supported for bulbul:v2. Ignored for v3 models. + temperature: Controls output randomness for bulbul:v3-beta (0.01 to 1.0). + Lower values = more deterministic, higher = more random. Defaults to 0.6. + **Note:** Only supported for bulbul:v3-beta. Ignored for v2. + sample_rate: Audio sample rate. + """ + + language: str = field(default_factory=lambda: NOT_GIVEN) + enable_preprocessing: bool = field(default_factory=lambda: NOT_GIVEN) + pace: float = field(default_factory=lambda: NOT_GIVEN) + pitch: float = field(default_factory=lambda: NOT_GIVEN) + loudness: float = field(default_factory=lambda: NOT_GIVEN) + temperature: float = field(default_factory=lambda: NOT_GIVEN) + sarvam_sample_rate: int = field(default_factory=lambda: NOT_GIVEN) + + +@dataclass +class SarvamWSTTSSettings(TTSSettings): + """Typed settings for Sarvam WebSocket TTS service. + + Parameters: + target_language_code: Sarvam language code. + speaker: Voice speaker ID. + speech_sample_rate: Audio sample rate as string. + enable_preprocessing: Enable text preprocessing. Defaults to False. + **Note:** Always enabled for bulbul:v3-beta. + min_buffer_size: Minimum characters to buffer before generating audio. + Lower values reduce latency but may affect quality. Defaults to 50. + max_chunk_length: Maximum characters processed in a single chunk. + Controls memory usage and processing efficiency. Defaults to 150. + output_audio_codec: Audio codec format. Options: linear16, mulaw, alaw, + opus, flac, aac, wav, mp3. Defaults to "linear16". + output_audio_bitrate: Audio bitrate (32k, 64k, 96k, 128k, 192k). + Defaults to "128k". + pace: Speech pace multiplier. Defaults to 1.0. + - bulbul:v2: Range 0.3 to 3.0 + - bulbul:v3-beta: Range 0.5 to 2.0 + pitch: Voice pitch adjustment (-0.75 to 0.75). Defaults to 0.0. + **Note:** Only supported for bulbul:v2. Ignored for v3 models. + loudness: Volume multiplier (0.3 to 3.0). Defaults to 1.0. + **Note:** Only supported for bulbul:v2. Ignored for v3 models. + temperature: Controls output randomness for bulbul:v3-beta (0.01 to 1.0). + Lower = more deterministic, higher = more random. Defaults to 0.6. + **Note:** Only supported for bulbul:v3-beta. Ignored for v2. + """ + + target_language_code: str = field(default_factory=lambda: NOT_GIVEN) + speaker: str = field(default_factory=lambda: NOT_GIVEN) + speech_sample_rate: str = field(default_factory=lambda: NOT_GIVEN) + enable_preprocessing: bool = field(default_factory=lambda: NOT_GIVEN) + min_buffer_size: int = field(default_factory=lambda: NOT_GIVEN) + max_chunk_length: int = field(default_factory=lambda: NOT_GIVEN) + output_audio_codec: str = field(default_factory=lambda: NOT_GIVEN) + output_audio_bitrate: str = field(default_factory=lambda: NOT_GIVEN) + pace: float = field(default_factory=lambda: NOT_GIVEN) + pitch: float = field(default_factory=lambda: NOT_GIVEN) + loudness: float = field(default_factory=lambda: NOT_GIVEN) + temperature: float = field(default_factory=lambda: NOT_GIVEN) + + class SarvamHttpTTSService(TTSService): """Text-to-Speech service using Sarvam AI's API. @@ -403,35 +478,35 @@ class SarvamHttpTTSService(TTSService): pace = max(pace_min, min(pace_max, pace)) # Build base settings - self._settings = { - "language": ( + self._settings: SarvamHttpTTSSettings = SarvamHttpTTSSettings( + language=( self.language_to_service_language(params.language) if params.language else "en-IN" ), - "enable_preprocessing": ( + enable_preprocessing=( True if self._config.preprocessing_always_enabled else params.enable_preprocessing ), - "pace": pace, - "model": model, - } + pace=pace, + model=model, + ) # Add parameters based on model support if self._config.supports_pitch: - self._settings["pitch"] = params.pitch + self._settings.pitch = params.pitch elif params.pitch != 0.0: logger.warning(f"pitch parameter is ignored for {model}") if self._config.supports_loudness: - self._settings["loudness"] = params.loudness + self._settings.loudness = params.loudness elif params.loudness != 1.0: logger.warning(f"loudness parameter is ignored for {model}") if self._config.supports_temperature: - self._settings["temperature"] = params.temperature + self._settings.temperature = params.temperature elif params.temperature != 0.6: logger.warning(f"temperature parameter is ignored for {model}") self.set_model_name(model) - self.set_voice(voice_id) + self._voice_id = voice_id def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -459,7 +534,7 @@ class SarvamHttpTTSService(TTSService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings["sample_rate"] = self.sample_rate + self._settings.sarvam_sample_rate = self.sample_rate @traced_tts async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: @@ -480,21 +555,25 @@ class SarvamHttpTTSService(TTSService): # Build payload with common parameters payload = { "text": text, - "target_language_code": self._settings["language"], + "target_language_code": self._settings.language, "speaker": self._voice_id, "sample_rate": self.sample_rate, - "enable_preprocessing": self._settings["enable_preprocessing"], + "enable_preprocessing": self._settings.enable_preprocessing, "model": self._model_name, - "pace": self._settings.get("pace", 1.0), + "pace": self._settings.pace if is_given(self._settings.pace) else 1.0, } # Add model-specific parameters based on config if self._config.supports_pitch: - payload["pitch"] = self._settings.get("pitch", 0.0) + payload["pitch"] = self._settings.pitch if is_given(self._settings.pitch) else 0.0 if self._config.supports_loudness: - payload["loudness"] = self._settings.get("loudness", 1.0) + payload["loudness"] = ( + self._settings.loudness if is_given(self._settings.loudness) else 1.0 + ) if self._config.supports_temperature: - payload["temperature"] = self._settings.get("temperature", 0.6) + payload["temperature"] = ( + self._settings.temperature if is_given(self._settings.temperature) else 0.6 + ) headers = { "api-subscription-key": self._api_key, @@ -748,7 +827,7 @@ class SarvamTTSService(InterruptibleTTSService): self._websocket_url = f"{url}?model={model}" self._api_key = api_key self.set_model_name(model) - self.set_voice(voice_id) + self._voice_id = voice_id # Validate and clamp pace to model's valid range pace = params.pace @@ -758,36 +837,36 @@ class SarvamTTSService(InterruptibleTTSService): pace = max(pace_min, min(pace_max, pace)) # Build base settings - self._settings = { - "target_language_code": ( + self._settings: SarvamWSTTSSettings = SarvamWSTTSSettings( + target_language_code=( self.language_to_service_language(params.language) if params.language else "en-IN" ), - "speaker": voice_id, - "speech_sample_rate": str(sample_rate), - "enable_preprocessing": ( + speaker=voice_id, + speech_sample_rate=str(sample_rate), + enable_preprocessing=( True if self._config.preprocessing_always_enabled else params.enable_preprocessing ), - "min_buffer_size": params.min_buffer_size, - "max_chunk_length": params.max_chunk_length, - "output_audio_codec": params.output_audio_codec, - "output_audio_bitrate": params.output_audio_bitrate, - "pace": pace, - "model": model, - } + min_buffer_size=params.min_buffer_size, + max_chunk_length=params.max_chunk_length, + output_audio_codec=params.output_audio_codec, + output_audio_bitrate=params.output_audio_bitrate, + pace=pace, + model=model, + ) # Add parameters based on model support if self._config.supports_pitch: - self._settings["pitch"] = params.pitch + self._settings.pitch = params.pitch elif params.pitch != 0.0: logger.warning(f"pitch parameter is ignored for {model}") if self._config.supports_loudness: - self._settings["loudness"] = params.loudness + self._settings.loudness = params.loudness elif params.loudness != 1.0: logger.warning(f"loudness parameter is ignored for {model}") if self._config.supports_temperature: - self._settings["temperature"] = params.temperature + self._settings.temperature = params.temperature elif params.temperature != 0.6: logger.warning(f"temperature parameter is ignored for {model}") @@ -823,7 +902,7 @@ class SarvamTTSService(InterruptibleTTSService): await super().start(frame) # WebSocket API expects sample rate as string - self._settings["speech_sample_rate"] = str(self.sample_rate) + self._settings.speech_sample_rate = str(self.sample_rate) await self._connect() async def stop(self, frame: EndFrame): @@ -870,13 +949,12 @@ class SarvamTTSService(InterruptibleTTSService): if isinstance(frame, (LLMFullResponseEndFrame, EndFrame)): await self.flush_audio() - async def _update_settings(self, settings: Mapping[str, Any]): - """Update service settings and reconnect if voice changed.""" - prev_voice = self._voice_id - await super()._update_settings(settings) - if not prev_voice == self._voice_id: - logger.info(f"Switching TTS voice to: [{self._voice_id}]") + async def _update_settings_from_typed(self, update: TTSSettings) -> set[str]: + """Apply a typed settings update and resend config if voice changed.""" + changed = await super()._update_settings_from_typed(update) + if "voice" in changed: await self._send_config() + return changed async def _connect(self): """Connect to Sarvam WebSocket and start background tasks.""" @@ -934,9 +1012,28 @@ class SarvamTTSService(InterruptibleTTSService): """Send initial configuration message.""" if not self._websocket: raise Exception("WebSocket not connected") - self._settings["speaker"] = self._voice_id - logger.debug(f"Config being sent is {self._settings}") - config_message = {"type": "config", "data": self._settings} + self._settings.speaker = self._voice_id + # Build config dict for the API + config_data = { + "target_language_code": self._settings.target_language_code, + "speaker": self._settings.speaker, + "speech_sample_rate": self._settings.speech_sample_rate, + "enable_preprocessing": self._settings.enable_preprocessing, + "min_buffer_size": self._settings.min_buffer_size, + "max_chunk_length": self._settings.max_chunk_length, + "output_audio_codec": self._settings.output_audio_codec, + "output_audio_bitrate": self._settings.output_audio_bitrate, + "pace": self._settings.pace, + "model": self._settings.model, + } + if is_given(self._settings.pitch): + config_data["pitch"] = self._settings.pitch + if is_given(self._settings.loudness): + config_data["loudness"] = self._settings.loudness + if is_given(self._settings.temperature): + config_data["temperature"] = self._settings.temperature + logger.debug(f"Config being sent is {config_data}") + config_message = {"type": "config", "data": config_data} try: await self._websocket.send(json.dumps(config_message)) diff --git a/src/pipecat/services/settings.py b/src/pipecat/services/settings.py new file mode 100644 index 000000000..fbec5cdf8 --- /dev/null +++ b/src/pipecat/services/settings.py @@ -0,0 +1,297 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Typed settings infrastructure for Pipecat AI services. + +This module provides typed dataclass-based settings objects that replace the +stringly-typed ``Mapping[str, Any]`` dictionaries previously used for service +configuration. Each service type has a corresponding settings class (e.g. +``TTSSettings``, ``LLMSettings``) whose fields use the ``NOT_GIVEN`` sentinel +to distinguish "leave unchanged" from an explicit ``None``. + +Key concepts: + +- **NOT_GIVEN sentinel**: A value meaning "this field was not provided in the + update". Distinct from ``None`` (which may be a valid value for a setting). +- **Settings as both state and delta**: The same class is used for the + service's current settings *and* for update objects. Fields set to + ``NOT_GIVEN`` are simply skipped when applying an update. +- **apply_update**: Applies a delta onto a target settings object and returns + the set of field names that actually changed. +- **from_mapping**: Constructs a typed settings object from a plain dict, + supporting field aliases (e.g. ``"voice_id"`` → ``"voice"``). +- **Extras**: Unknown keys land in the ``extra`` dict so services that have + non-standard settings don't lose data. +""" + +from __future__ import annotations + +import copy +from dataclasses import dataclass, field, fields +from typing import Any, ClassVar, Dict, Mapping, Optional, Set, Type, TypeVar + +from loguru import logger + +# --------------------------------------------------------------------------- +# NOT_GIVEN sentinel +# --------------------------------------------------------------------------- + + +class _NotGiven: + """Sentinel indicating a settings field was not provided. + + ``NOT_GIVEN`` means "the caller did not supply this value" — distinct from + ``None``, which may be a legitimate setting value. It is used as the + default for every settings field so that ``apply_update`` can tell which + fields the caller actually wants to change. + """ + + _instance: Optional[_NotGiven] = None + + def __new__(cls) -> _NotGiven: + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __repr__(self) -> str: + return "NOT_GIVEN" + + def __bool__(self) -> bool: + return False + + +NOT_GIVEN: _NotGiven = _NotGiven() +"""Singleton sentinel meaning "this field was not included in the update".""" + + +def is_given(value: Any) -> bool: + """Check whether a value was explicitly provided (i.e. is not ``NOT_GIVEN``). + + Args: + value: The value to check. + + Returns: + ``True`` if *value* is anything other than ``NOT_GIVEN``. + """ + return not isinstance(value, _NotGiven) + + +# --------------------------------------------------------------------------- +# Base ServiceSettings +# --------------------------------------------------------------------------- + +_S = TypeVar("_S", bound="ServiceSettings") + + +@dataclass +class ServiceSettings: + """Base class for typed service settings. + + Every AI service type (LLM, TTS, STT) extends this with its own fields. + Fields default to ``NOT_GIVEN`` so that an instance can represent either + the full current state **or** a sparse update delta. + + Parameters: + model: The model identifier used by the service. + extra: Overflow dict for service-specific keys that don't map to a + declared field. + """ + + # -- common fields ------------------------------------------------------- + + model: Any = field(default_factory=lambda: NOT_GIVEN) + """AI model identifier (e.g. ``"gpt-4o"``, ``"eleven_turbo_v2_5"``).""" + + extra: Dict[str, Any] = field(default_factory=dict) + """Catch-all for service-specific keys that have no declared field.""" + + # -- class-level configuration ------------------------------------------- + + _aliases: ClassVar[Dict[str, str]] = {} + """Map of alternative key names to canonical field names. + + For example ``{"voice_id": "voice"}`` lets callers use either spelling. + Subclasses should override this as needed. + """ + + # -- public API ---------------------------------------------------------- + + def given_fields(self) -> Dict[str, Any]: + """Return a dict of only the fields that were explicitly provided. + + Skips ``NOT_GIVEN`` values and the ``extra`` field itself. Entries + from ``extra`` are included at the top level. + + Returns: + Dictionary mapping field names to their provided values. + """ + result: Dict[str, Any] = {} + for f in fields(self): + if f.name == "extra": + continue + val = getattr(self, f.name) + if is_given(val): + result[f.name] = val + result.update(self.extra) + return result + + def apply_update(self: _S, update: _S) -> Set[str]: + """Apply *update* onto this settings object, returning changed field names. + + Only fields in *update* that are **given** (i.e. not ``NOT_GIVEN``) + are considered. A field is "changed" if its new value differs from + the current value. + + The ``extra`` dicts are merged: keys present in the update overwrite + keys in the target. + + Args: + update: A settings object of the same type containing the delta. + + Returns: + The set of field names whose values actually changed. + + Examples:: + + current = TTSSettings(voice="alice", language="en") + delta = TTSSettings(voice="bob") + changed = current.apply_update(delta) + # changed == {"voice"} + # current.voice == "bob", current.language == "en" + """ + changed: Set[str] = set() + for f in fields(self): + if f.name == "extra": + continue + new_val = getattr(update, f.name) + if not is_given(new_val): + continue + old_val = getattr(self, f.name) + if old_val != new_val: + setattr(self, f.name, new_val) + changed.add(f.name) + + # Merge extra + for key, new_val in update.extra.items(): + old_val = self.extra.get(key, NOT_GIVEN) + if old_val != new_val: + self.extra[key] = new_val + changed.add(key) + + return changed + + @classmethod + def from_mapping(cls: Type[_S], settings: Mapping[str, Any]) -> _S: + """Construct a typed settings object from a plain dictionary. + + Keys are matched to dataclass fields by name. Keys listed in + ``_aliases`` are translated to their canonical name first. Any + remaining unrecognized keys are placed into ``extra``. + + Args: + settings: A dictionary of setting names to values. + + Returns: + A new settings instance with the corresponding fields populated. + + Examples:: + + update = TTSSettings.from_mapping({"voice_id": "alice", "speed": 1.2}) + # update.voice == "alice" (via alias) + # update.extra == {"speed": 1.2} + """ + field_names = {f.name for f in fields(cls)} - {"extra"} + kwargs: Dict[str, Any] = {} + extra: Dict[str, Any] = {} + + for key, value in settings.items(): + # Resolve aliases first + canonical = cls._aliases.get(key, key) + if canonical in field_names: + kwargs[canonical] = value + else: + extra[key] = value + + instance = cls(**kwargs) + instance.extra = extra + return instance + + def to_dict(self) -> Dict[str, Any]: + """Serialize to a flat dictionary, including extra. + + Only given (non-``NOT_GIVEN``) values are included. This is the + inverse of ``from_mapping`` and useful for passing settings to APIs + that expect plain dicts. + + Returns: + A flat dictionary of all given settings. + """ + return self.given_fields() + + def copy(self: _S) -> _S: + """Return a deep copy of this settings instance. + + Returns: + A new settings object with the same field values. + """ + return copy.deepcopy(self) + + +# --------------------------------------------------------------------------- +# Service-specific settings +# --------------------------------------------------------------------------- + + +@dataclass +class LLMSettings(ServiceSettings): + """Typed settings for LLM services. + + Parameters: + model: LLM model identifier. + temperature: Sampling temperature. + max_tokens: Maximum tokens to generate. + top_p: Nucleus sampling probability. + top_k: Top-k sampling parameter. + frequency_penalty: Frequency penalty. + presence_penalty: Presence penalty. + seed: Random seed for reproducibility. + """ + + temperature: Any = field(default_factory=lambda: NOT_GIVEN) + max_tokens: Any = field(default_factory=lambda: NOT_GIVEN) + top_p: Any = field(default_factory=lambda: NOT_GIVEN) + top_k: Any = field(default_factory=lambda: NOT_GIVEN) + frequency_penalty: Any = field(default_factory=lambda: NOT_GIVEN) + presence_penalty: Any = field(default_factory=lambda: NOT_GIVEN) + seed: Any = field(default_factory=lambda: NOT_GIVEN) + + +@dataclass +class TTSSettings(ServiceSettings): + """Typed settings for TTS services. + + Parameters: + model: TTS model identifier. + voice: Voice identifier or name. + language: Language for speech synthesis. + """ + + voice: Any = field(default_factory=lambda: NOT_GIVEN) + language: Any = field(default_factory=lambda: NOT_GIVEN) + + _aliases: ClassVar[Dict[str, str]] = {"voice_id": "voice"} + + +@dataclass +class STTSettings(ServiceSettings): + """Typed settings for STT services. + + Parameters: + model: STT model identifier. + language: Language for speech recognition. + """ + + language: Any = field(default_factory=lambda: NOT_GIVEN) diff --git a/src/pipecat/services/soniox/stt.py b/src/pipecat/services/soniox/stt.py index c9184ba4c..9d732a356 100644 --- a/src/pipecat/services/soniox/stt.py +++ b/src/pipecat/services/soniox/stt.py @@ -8,7 +8,8 @@ import json import time -from typing import AsyncGenerator, List, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, List, Optional from loguru import logger from pydantic import BaseModel @@ -23,6 +24,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, STTSettings, is_given from pipecat.services.stt_latency import SONIOX_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.transcriptions.language import Language @@ -134,6 +136,17 @@ def _prepare_language_hints( return list(set(prepared_languages)) +@dataclass +class SonioxSTTSettings(STTSettings): + """Typed settings for Soniox STT service. + + Parameters: + input_params: Soniox ``SonioxInputParams`` for detailed configuration. + """ + + input_params: SonioxInputParams = field(default_factory=lambda: NOT_GIVEN) + + class SonioxSTTService(WebsocketSTTService): """Speech-to-Text service using Soniox's WebSocket API. @@ -181,9 +194,13 @@ class SonioxSTTService(WebsocketSTTService): self._api_key = api_key self._url = url self.set_model_name(params.model) - self._params = params self._vad_force_turn_endpoint = vad_force_turn_endpoint + self._settings = SonioxSTTSettings( + model=params.model, + input_params=params, + ) + self._final_transcription_buffer = [] self._last_tokens_received: Optional[float] = None @@ -198,6 +215,43 @@ class SonioxSTTService(WebsocketSTTService): await super().start(frame) await self._connect() + async def _update_settings_from_typed(self, update: SonioxSTTSettings) -> set[str]: + """Apply a typed settings update, keeping ``input_params`` in sync. + + Top-level ``model`` is the source of truth. When it is given in + *update* its value is propagated into ``input_params``. When only + ``input_params`` is given, its ``model`` is propagated *up* to the + top-level field. + + Any change triggers a WebSocket reconnect. + + Args: + update: A typed settings delta. + + Returns: + Set of field names whose values actually changed. + """ + model_given = is_given(getattr(update, "model", NOT_GIVEN)) + + changed = await super()._update_settings_from_typed(update) + + if not changed: + return changed + + # --- Sync model -------------------------------------------------- + if model_given: + # Top-level model wins → push into input_params. + self._settings.input_params.model = self._settings.model + elif "input_params" in changed and self._settings.input_params.model is not None: + # Only input_params was given → pull model up. + self._settings.model = self._settings.input_params.model + self.set_model_name(self._settings.model) + + await self._disconnect() + await self._connect() + + return changed + async def stop(self, frame: EndFrame): """Stop the Soniox STT websocket connection. @@ -311,7 +365,9 @@ class SonioxSTTService(WebsocketSTTService): # Either one or the other is required. enable_endpoint_detection = not self._vad_force_turn_endpoint - context = self._params.context + params = self._settings.input_params + + context = params.context if isinstance(context, SonioxContextObject): context = context.model_dump() @@ -319,16 +375,16 @@ class SonioxSTTService(WebsocketSTTService): config = { "api_key": self._api_key, "model": self._model_name, - "audio_format": self._params.audio_format, - "num_channels": self._params.num_channels or 1, + "audio_format": params.audio_format, + "num_channels": params.num_channels or 1, "enable_endpoint_detection": enable_endpoint_detection, "sample_rate": self.sample_rate, - "language_hints": _prepare_language_hints(self._params.language_hints), - "language_hints_strict": self._params.language_hints_strict, + "language_hints": _prepare_language_hints(params.language_hints), + "language_hints_strict": params.language_hints_strict, "context": context, - "enable_speaker_diarization": self._params.enable_speaker_diarization, - "enable_language_identification": self._params.enable_language_identification, - "client_reference_id": self._params.client_reference_id, + "enable_speaker_diarization": params.enable_speaker_diarization, + "enable_language_identification": params.enable_language_identification, + "client_reference_id": params.client_reference_id, } # Send the configuration message. diff --git a/src/pipecat/services/speechmatics/stt.py b/src/pipecat/services/speechmatics/stt.py index ca949a9fd..d04bb564d 100644 --- a/src/pipecat/services/speechmatics/stt.py +++ b/src/pipecat/services/speechmatics/stt.py @@ -8,8 +8,10 @@ import asyncio import os +import warnings +from dataclasses import dataclass, field from enum import Enum -from typing import Any, AsyncGenerator +from typing import Any, AsyncGenerator, ClassVar from dotenv import load_dotenv from loguru import logger @@ -31,6 +33,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, STTSettings, is_given from pipecat.services.stt_latency import SPEECHMATICS_TTFS_P99 from pipecat.services.stt_service import STTService from pipecat.transcriptions.language import Language, resolve_language @@ -80,6 +83,81 @@ class TurnDetectionMode(str, Enum): SMART_TURN = "smart_turn" +@dataclass +class SpeechmaticsSTTSettings(STTSettings): + """Typed settings for Speechmatics STT service. + + See ``SpeechmaticsSTTService.InputParams`` for detailed descriptions of each field. + + Parameters: + model: The operating point / model name. + domain: Domain for Speechmatics API. + turn_detection_mode: Endpoint handling mode. + speaker_active_format: Formatter for active speaker ID. + speaker_passive_format: Formatter for passive speaker ID. + focus_speakers: List of speaker IDs to focus on. + ignore_speakers: List of speaker IDs to ignore. + focus_mode: Speaker focus mode for diarization. + known_speakers: List of known speaker labels and identifiers. + additional_vocab: List of additional vocabulary entries. + audio_encoding: Audio encoding format. + operating_point: Operating point for accuracy vs. latency. + max_delay: Maximum delay in seconds for transcription. + end_of_utterance_silence_trigger: Maximum delay for end of utterance trigger. + end_of_utterance_max_delay: Maximum delay for end of utterance. + punctuation_overrides: Punctuation overrides. + include_partials: Include partial segment fragments. + split_sentences: Emit finalized sentences mid-turn. + enable_diarization: Enable speaker diarization. + speaker_sensitivity: Diarization sensitivity. + max_speakers: Maximum number of speakers to detect. + prefer_current_speaker: Prefer current speaker ID. + extra_params: Extra parameters for the STT engine. + """ + + domain: str = field(default_factory=lambda: NOT_GIVEN) + turn_detection_mode: TurnDetectionMode = field(default_factory=lambda: NOT_GIVEN) + speaker_active_format: str = field(default_factory=lambda: NOT_GIVEN) + speaker_passive_format: str = field(default_factory=lambda: NOT_GIVEN) + focus_speakers: list = field(default_factory=lambda: NOT_GIVEN) + ignore_speakers: list = field(default_factory=lambda: NOT_GIVEN) + focus_mode: Any = field(default_factory=lambda: NOT_GIVEN) + known_speakers: list = field(default_factory=lambda: NOT_GIVEN) + additional_vocab: list = field(default_factory=lambda: NOT_GIVEN) + audio_encoding: Any = field(default_factory=lambda: NOT_GIVEN) + operating_point: Any = field(default_factory=lambda: NOT_GIVEN) + max_delay: float = field(default_factory=lambda: NOT_GIVEN) + end_of_utterance_silence_trigger: float = field(default_factory=lambda: NOT_GIVEN) + end_of_utterance_max_delay: float = field(default_factory=lambda: NOT_GIVEN) + punctuation_overrides: dict = field(default_factory=lambda: NOT_GIVEN) + include_partials: bool = field(default_factory=lambda: NOT_GIVEN) + split_sentences: bool = field(default_factory=lambda: NOT_GIVEN) + enable_diarization: bool = field(default_factory=lambda: NOT_GIVEN) + speaker_sensitivity: float = field(default_factory=lambda: NOT_GIVEN) + max_speakers: int = field(default_factory=lambda: NOT_GIVEN) + prefer_current_speaker: bool = field(default_factory=lambda: NOT_GIVEN) + extra_params: dict = field(default_factory=lambda: NOT_GIVEN) + + #: Fields that can be updated on a live connection via the Speechmatics + #: diarization-config API — no reconnect needed. + HOT_FIELDS: ClassVar[frozenset[str]] = frozenset( + { + "focus_speakers", + "ignore_speakers", + "focus_mode", + } + ) + + #: Fields that are purely local (formatting templates) — no reconnect + #: and no API call needed. + LOCAL_FIELDS: ClassVar[frozenset[str]] = frozenset( + { + "speaker_active_format", + "speaker_passive_format", + } + ) + + class SpeechmaticsSTTService(STTService): """Speechmatics STT service implementation. @@ -327,30 +405,56 @@ class SpeechmaticsSTTService(STTService): # Deprecation check self._check_deprecated_args(kwargs, params) - # Voice agent + # Output formatting defaults + speaker_active_format = params.speaker_active_format + if speaker_active_format is None: + speaker_active_format = ( + "@{speaker_id}: {text}" if params.enable_diarization else "{text}" + ) + speaker_passive_format = params.speaker_passive_format or speaker_active_format + + # Typed settings — seeded from InputParams + self._settings = SpeechmaticsSTTSettings( + language=params.language, + domain=params.domain, + turn_detection_mode=params.turn_detection_mode, + speaker_active_format=speaker_active_format, + speaker_passive_format=speaker_passive_format, + focus_speakers=params.focus_speakers, + ignore_speakers=params.ignore_speakers, + focus_mode=params.focus_mode, + known_speakers=params.known_speakers, + additional_vocab=params.additional_vocab, + audio_encoding=params.audio_encoding, + operating_point=params.operating_point, + max_delay=params.max_delay, + end_of_utterance_silence_trigger=params.end_of_utterance_silence_trigger, + end_of_utterance_max_delay=params.end_of_utterance_max_delay, + punctuation_overrides=params.punctuation_overrides, + include_partials=params.include_partials, + split_sentences=params.split_sentences, + enable_diarization=params.enable_diarization, + speaker_sensitivity=params.speaker_sensitivity, + max_speakers=params.max_speakers, + prefer_current_speaker=params.prefer_current_speaker, + extra_params=params.extra_params, + ) + + # Build SDK config from settings self._client: VoiceAgentClient | None = None - self._config: VoiceAgentConfig = self._prepare_config(params) + self._config: VoiceAgentConfig = self._build_config() # Outbound frame queue self._outbound_frames: asyncio.Queue[Frame] = asyncio.Queue() - # Output formatting - if params.speaker_active_format is None: - params.speaker_active_format = ( - "@{speaker_id}: {text}" if params.enable_diarization else "{text}" - ) - # Framework options self._enable_vad: bool = self._config.end_of_utterance_mode not in [ EndOfUtteranceMode.FIXED, EndOfUtteranceMode.EXTERNAL, ] - self._speaker_active_format: str = params.speaker_active_format - self._speaker_passive_format: str = ( - params.speaker_passive_format or params.speaker_active_format - ) - # Model + metrics + # Model + metrics (operating_point comes from the SDK config/preset) + self._settings.model = self._config.operating_point.value self.set_model_name(self._config.operating_point.value) # Message queue @@ -374,6 +478,56 @@ class SpeechmaticsSTTService(STTService): await super().start(frame) await self._connect() + async def _update_settings_from_typed(self, update: SpeechmaticsSTTSettings) -> set[str]: + """Apply typed settings update, reconnecting only when necessary. + + Fields are classified into three categories (see + ``SpeechmaticsSTTSettings``): + + * **HOT_FIELDS** – diarization speaker settings that can be pushed + to a live Speechmatics connection without reconnecting. + * **LOCAL_FIELDS** – formatting templates evaluated locally; no + reconnect or API call needed. + * Everything else – baked into ``VoiceAgentConfig`` at connection + time and therefore require a full disconnect / reconnect. + + Args: + update: A typed settings delta. + + Returns: + Set of field names whose values actually changed. + """ + changed = await super()._update_settings_from_typed(update) + + if not changed: + return changed + + no_reconnect = SpeechmaticsSTTSettings.HOT_FIELDS | SpeechmaticsSTTSettings.LOCAL_FIELDS + needs_reconnect = bool(changed - no_reconnect) + + if needs_reconnect: + # Connection-level fields changed — rebuild the SDK config + # from the now-updated self._settings, then reconnect. + self._config = self._build_config() + await self._disconnect() + await self._connect() + elif changed & SpeechmaticsSTTSettings.HOT_FIELDS: + if self._config.enable_diarization: + # Only hot-updatable fields changed — push to the live session. + self._config.speaker_config.focus_speakers = self._settings.focus_speakers + self._config.speaker_config.ignore_speakers = self._settings.ignore_speakers + self._config.speaker_config.focus_mode = self._settings.focus_mode + if self._client: + self._client.update_diarization_config(self._config.speaker_config) + else: + # Diarization not enabled — need a full reconnect to apply. + self._config = self._build_config() + await self._disconnect() + await self._connect() + # LOCAL_FIELDS: already applied by super(); nothing else to do. + + return changed + async def stop(self, frame: EndFrame): """Called when the session ends.""" await super().stop(frame) @@ -484,28 +638,35 @@ class SpeechmaticsSTTService(STTService): # CONFIGURATION # ============================================================================ - def _prepare_config(self, params: InputParams) -> VoiceAgentConfig: - """Parse the InputParams into VoiceAgentConfig.""" - # Preset - config = VoiceAgentConfigPreset.load(params.turn_detection_mode.value) + def _build_config(self) -> VoiceAgentConfig: + """Build a ``VoiceAgentConfig`` from the current ``self._settings``. + + Used both at init time and before reconnecting so the connection + always reflects the latest settings. + """ + s = self._settings + + # Preset from turn detection mode + config = VoiceAgentConfigPreset.load(s.turn_detection_mode.value) # Language + domain - config.language = self._language_to_speechmatics_language(params.language) - config.domain = params.domain - config.output_locale = self._locale_to_speechmatics_locale(config.language, params.language) + language = s.language + config.language = self._language_to_speechmatics_language(language) + config.domain = s.domain if is_given(s.domain) else None + config.output_locale = self._locale_to_speechmatics_locale(config.language, language) # Speaker config config.speaker_config = SpeakerFocusConfig( - focus_speakers=params.focus_speakers, - ignore_speakers=params.ignore_speakers, - focus_mode=params.focus_mode, + focus_speakers=s.focus_speakers if is_given(s.focus_speakers) else [], + ignore_speakers=s.ignore_speakers if is_given(s.ignore_speakers) else [], + focus_mode=s.focus_mode if is_given(s.focus_mode) else SpeakerFocusMode.RETAIN, ) - config.known_speakers = params.known_speakers + config.known_speakers = s.known_speakers if is_given(s.known_speakers) else [] # Custom dictionary - config.additional_vocab = params.additional_vocab + config.additional_vocab = s.additional_vocab if is_given(s.additional_vocab) else [] - # Advanced parameters + # Advanced parameters — only set if given (not NOT_GIVEN or None) for param in [ "operating_point", "max_delay", @@ -519,21 +680,20 @@ class SpeechmaticsSTTService(STTService): "max_speakers", "prefer_current_speaker", ]: - if getattr(params, param) is not None: - setattr(config, param, getattr(params, param)) + val = getattr(s, param) + if is_given(val) and val is not None: + setattr(config, param, val) # Extra parameters - if isinstance(params.extra_params, dict): - for key, value in params.extra_params.items(): + if is_given(s.extra_params) and isinstance(s.extra_params, dict): + for key, value in s.extra_params.items(): if hasattr(config, key): setattr(config, key, value) # Enable sentences - config.speech_segment_config = SpeechSegmentConfig( - emit_sentences=params.split_sentences or False - ) + split = s.split_sentences if is_given(s.split_sentences) else False + config.speech_segment_config = SpeechSegmentConfig(emit_sentences=split or False) - # Return the complete config return config def update_params( @@ -542,12 +702,23 @@ class SpeechmaticsSTTService(STTService): ) -> None: """Updates the speaker configuration. + .. deprecated:: + Use ``STTUpdateSettingsFrame`` with + ``SpeechmaticsSTTSettings(...)`` instead. + This can update the speakers to listen to or ignore during an in-flight transcription. Only available if diarization is enabled. Args: params: Update parameters for the service. """ + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "update_params() is deprecated. Use STTUpdateSettingsFrame with " + "SpeechmaticsSTTSettings(...) instead.", + DeprecationWarning, + ) # Check possible if not self._config.enable_diarization: raise ValueError("Diarization is not enabled") @@ -717,9 +888,9 @@ class SpeechmaticsSTTService(STTService): def attr_from_segment(segment: dict[str, Any]) -> dict[str, Any]: # Formats the output text based on the speaker and defined formats from the config. text = ( - self._speaker_active_format + self._settings.speaker_active_format if segment.get("is_active", True) - else self._speaker_passive_format + else self._settings.speaker_passive_format ).format( **{ "speaker_id": segment.get("speaker_id", "UU"), diff --git a/src/pipecat/services/speechmatics/tts.py b/src/pipecat/services/speechmatics/tts.py index 0f3ff0cb6..0907b4e26 100644 --- a/src/pipecat/services/speechmatics/tts.py +++ b/src/pipecat/services/speechmatics/tts.py @@ -95,7 +95,7 @@ class SpeechmaticsTTSService(TTSService): self._params = params or SpeechmaticsTTSService.InputParams() # Set voice from constructor parameter - self.set_voice(voice_id) + self._voice_id = voice_id def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. diff --git a/src/pipecat/services/stt_service.py b/src/pipecat/services/stt_service.py index b556bb23a..d4e5f4cb5 100644 --- a/src/pipecat/services/stt_service.py +++ b/src/pipecat/services/stt_service.py @@ -34,6 +34,7 @@ from pipecat.frames.frames import ( from pipecat.metrics.metrics import TTFBMetricsData from pipecat.processors.frame_processor import FrameDirection from pipecat.services.ai_service import AIService +from pipecat.services.settings import ServiceSettings, STTSettings from pipecat.services.stt_latency import DEFAULT_TTFS_P99 from pipecat.services.websocket_service import WebsocketService from pipecat.transcriptions.language import Language @@ -101,7 +102,6 @@ class STTService(AIService): self._audio_passthrough = audio_passthrough self._init_sample_rate = sample_rate self._sample_rate = 0 - self._settings: Dict[str, Any] = {} self._tracing_enabled: bool = False self._muted: bool = False self._user_id: str = "" @@ -166,18 +166,36 @@ class STTService(AIService): async def set_model(self, model: str): """Set the speech recognition model. + When the service has been migrated to typed settings this routes + through :meth:`_update_settings_from_typed` so that concrete + services can react (e.g. reconnect) in a single place. + Args: model: The name of the model to use for speech recognition. """ - self.set_model_name(model) + logger.info(f"Switching STT model to: [{model}]") + if isinstance(self._settings, ServiceSettings): + settings_cls = type(self._settings) + await self._update_settings_from_typed(settings_cls(model=model)) + else: + self.set_model_name(model) async def set_language(self, language: Language): """Set the language for speech recognition. + When the service has been migrated to typed settings this routes + through :meth:`_update_settings_from_typed` so that concrete + services can react (e.g. reconnect) in a single place. + Args: language: The language to use for speech recognition. """ - pass + logger.info(f"Switching STT language to: [{language}]") + if isinstance(self._settings, ServiceSettings): + settings_cls = type(self._settings) + await self._update_settings_from_typed(settings_cls(language=language)) + else: + pass @abstractmethod async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]: @@ -224,6 +242,23 @@ class STTService(AIService): else: logger.warning(f"Unknown setting for STT service: {key}") + async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: + """Apply a typed STT settings update. + + Handles ``model`` (via parent). Does **not** call ``set_language`` + — concrete services should override this method and handle language + changes (including any reconnect logic) based on the returned + changed-field set. + + Args: + update: A typed STT settings delta. + + Returns: + Set of field names whose values actually changed. + """ + changed = await super()._update_settings_from_typed(update) + return changed + async def process_audio_frame(self, frame: AudioRawFrame, direction: FrameDirection): """Process an audio frame for speech recognition. @@ -285,7 +320,16 @@ class STTService(AIService): await self._handle_vad_user_stopped_speaking(frame) await self.push_frame(frame, direction) elif isinstance(frame, STTUpdateSettingsFrame): - await self._update_settings(frame.settings) + # New path: typed settings update object. + if frame.update is not None: + await self._update_settings_from_typed(frame.update) + # Legacy path: plain dict, but service uses typed settings — convert. + elif isinstance(self._settings, ServiceSettings): + update = type(self._settings).from_mapping(frame.settings) + await self._update_settings_from_typed(update) + # Legacy path: plain dict, service still uses dict-based settings. + else: + await self._update_settings(frame.settings) elif isinstance(frame, STTMuteFrame): self._muted = frame.mute logger.debug(f"STT service {'muted' if frame.mute else 'unmuted'}") diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index 239d2398b..4196e7872 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -52,6 +52,7 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.ai_service import AIService +from pipecat.services.settings import ServiceSettings, TTSSettings, is_given from pipecat.services.websocket_service import WebsocketService from pipecat.transcriptions.language import Language from pipecat.utils.text.base_text_aggregator import BaseTextAggregator @@ -189,7 +190,6 @@ class TTSService(AIService): self._init_sample_rate = sample_rate self._sample_rate = 0 self._voice_id: str = "" - self._settings: Dict[str, Any] = {} self._text_aggregator: BaseTextAggregator = text_aggregator or SimpleTextAggregator() if text_aggregator: import warnings @@ -263,18 +263,40 @@ class TTSService(AIService): async def set_model(self, model: str): """Set the TTS model to use. + When the service has been migrated to typed settings this routes + through :meth:`_update_settings_from_typed` so that concrete + services can react (e.g. reconnect) in a single place. + Args: model: The name of the TTS model. """ - self.set_model_name(model) + logger.info(f"Switching TTS model to: [{model}]") + if isinstance(self._settings, ServiceSettings): + settings_cls = type(self._settings) + await self._update_settings_from_typed(settings_cls(model=model)) + else: + self.set_model_name(model) - def set_voice(self, voice: str): + async def set_voice(self, voice: str): """Set the voice for speech synthesis. + When the service has been migrated to typed settings this routes + through :meth:`_update_settings_from_typed` so that concrete + services can react (e.g. reconnect) in a single place. + + .. versionchanged:: 0.0.103 + Now ``async``. In ``__init__`` methods, set + ``self._voice_id`` directly instead of calling this method. + Args: voice: The voice identifier or name. """ - self._voice_id = voice + logger.info(f"Switching TTS voice to: [{voice}]") + if isinstance(self._settings, ServiceSettings): + settings_cls = type(self._settings) + await self._update_settings_from_typed(settings_cls(voice=voice)) + else: + self._voice_id = voice def create_context_id(self) -> str: """Generate a unique context ID for a TTS request. @@ -416,13 +438,42 @@ class TTSService(AIService): elif key == "model": self.set_model_name(value) elif key == "voice" or key == "voice_id": - self.set_voice(value) + self._voice_id = value elif key == "text_filter": for filter in self._text_filters: await filter.update_settings(value) else: logger.warning(f"Unknown setting for TTS service: {key}") + async def _update_settings_from_typed(self, update: TTSSettings) -> set[str]: + """Apply a typed TTS settings update. + + Handles ``model`` (via parent) and syncs ``_voice_id`` when voice + changes. Translates language values before applying. Does **not** + call ``set_voice`` or ``set_model`` directly — concrete services + should override this method and handle reconnect logic based on the + returned changed-field set. + + Args: + update: A typed TTS settings delta. + + Returns: + Set of field names whose values actually changed. + """ + # Translate language *before* applying so the stored value is canonical + if is_given(update.language) and update.language is not None: + converted = self.language_to_service_language(update.language) + if converted is not None: + update.language = converted + + changed = await super()._update_settings_from_typed(update) + + # Keep _voice_id in sync for code that reads it directly + if "voice" in changed and isinstance(self._settings, TTSSettings): + self._voice_id = self._settings.voice + + return changed + async def say(self, text: str): """Immediately speak the provided text. @@ -504,7 +555,16 @@ class TTSService(AIService): await self.flush_audio() self._processing_text = processing_text elif isinstance(frame, TTSUpdateSettingsFrame): - await self._update_settings(frame.settings) + # New path: typed settings update object. + if frame.update is not None: + await self._update_settings_from_typed(frame.update) + # Legacy path: plain dict, but service uses typed settings — convert. + elif isinstance(self._settings, ServiceSettings): + update = type(self._settings).from_mapping(frame.settings) + await self._update_settings_from_typed(update) + # Legacy path: plain dict, service still uses dict-based settings. + else: + await self._update_settings(frame.settings) elif isinstance(frame, BotStoppedSpeakingFrame): await self._maybe_resume_frame_processing() await self.push_frame(frame, direction) diff --git a/src/pipecat/services/ultravox/llm.py b/src/pipecat/services/ultravox/llm.py index d549b11e5..9f0658486 100644 --- a/src/pipecat/services/ultravox/llm.py +++ b/src/pipecat/services/ultravox/llm.py @@ -15,6 +15,7 @@ import asyncio import datetime import json import uuid +from dataclasses import dataclass, field from typing import Any, Dict, List, Literal, Optional, Union import aiohttp @@ -34,7 +35,6 @@ from pipecat.frames.frames import ( LLMFullResponseEndFrame, LLMFullResponseStartFrame, LLMTextFrame, - LLMUpdateSettingsFrame, StartFrame, TranscriptionFrame, TTSAudioRawFrame, @@ -56,6 +56,7 @@ from pipecat.processors.aggregators.openai_llm_context import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import FunctionCallFromLLM, LLMService +from pipecat.services.settings import NOT_GIVEN, LLMSettings from pipecat.utils.time import time_now_iso8601 try: @@ -66,6 +67,17 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class UltravoxRealtimeLLMSettings(LLMSettings): + """Settings for UltravoxRealtimeLLMService. + + Parameters: + output_medium: The output medium for the model ("voice" or "text"). + """ + + output_medium: str = field(default=NOT_GIVEN) + + class AgentInputParams(BaseModel): """Input parameters for Ultravox Realtime generation using a pre-defined Agent. @@ -163,6 +175,7 @@ class UltravoxRealtimeLLMService(LLMService): **kwargs: Additional arguments passed to parent LLMService. """ super().__init__(**kwargs) + self._settings = UltravoxRealtimeLLMSettings() self._params = params if one_shot_selected_tools: if not isinstance(self._params, OneShotInputParams): @@ -310,6 +323,12 @@ class UltravoxRealtimeLLMService(LLMService): await self.cancel_task(self._receive_task, timeout=1.0) self._receive_task = None + async def _update_settings_from_typed(self, update: UltravoxRealtimeLLMSettings): + changed = await super()._update_settings_from_typed(update) + if "output_medium" in changed: + await self._update_output_medium(self._settings.output_medium) + return changed + # # frame processing # StartFrame, StopFrame, CancelFrame implemented in base class @@ -331,9 +350,6 @@ class UltravoxRealtimeLLMService(LLMService): else LLMContext.from_openai_context(frame.context) ) await self._handle_context(context) - elif isinstance(frame, LLMUpdateSettingsFrame): - if "output_medium" in frame.settings: - await self._update_output_medium(frame.settings.get("output_medium")) elif isinstance(frame, InputTextRawFrame): await self._send_user_text(frame.text) await self.push_frame(frame, direction) diff --git a/src/pipecat/services/whisper/base_stt.py b/src/pipecat/services/whisper/base_stt.py index bc999dba4..2a02c6ce7 100644 --- a/src/pipecat/services/whisper/base_stt.py +++ b/src/pipecat/services/whisper/base_stt.py @@ -10,6 +10,7 @@ This module provides common functionality for services implementing the Whisper interface, including language mapping, metrics generation, and error handling. """ +from dataclasses import dataclass, field from typing import AsyncGenerator, Optional from loguru import logger @@ -17,6 +18,7 @@ from openai import AsyncOpenAI from openai.types.audio import Transcription from pipecat.frames.frames import ErrorFrame, Frame, TranscriptionFrame +from pipecat.services.settings import NOT_GIVEN, STTSettings from pipecat.services.stt_latency import WHISPER_TTFS_P99 from pipecat.services.stt_service import SegmentedSTTService from pipecat.transcriptions.language import Language, resolve_language @@ -24,6 +26,22 @@ from pipecat.utils.time import time_now_iso8601 from pipecat.utils.tracing.service_decorators import traced_stt +@dataclass +class BaseWhisperSTTSettings(STTSettings): + """Typed settings for Whisper API-based STT services. + + Parameters: + base_url: API base URL. + prompt: Optional text to guide the model's style or continue + a previous segment. + temperature: Sampling temperature between 0 and 1. + """ + + base_url: Optional[str] = field(default_factory=lambda: NOT_GIVEN) + prompt: Optional[str] = field(default_factory=lambda: NOT_GIVEN) + temperature: Optional[float] = field(default_factory=lambda: NOT_GIVEN) + + def language_to_whisper_language(language: Language) -> Optional[str]: """Maps pipecat Language enum to Whisper API language codes. @@ -143,26 +161,36 @@ class BaseWhisperSTTService(SegmentedSTTService): self._temperature = temperature self._include_prob_metrics = include_prob_metrics - self._settings = { - "base_url": base_url, - "language": self._language, - "prompt": self._prompt, - "temperature": self._temperature, - } + self._settings: BaseWhisperSTTSettings = BaseWhisperSTTSettings( + model=model, + language=self._language, + base_url=base_url, + prompt=self._prompt, + temperature=self._temperature, + ) def _create_client(self, api_key: Optional[str], base_url: Optional[str]): return AsyncOpenAI(api_key=api_key, base_url=base_url) - async def set_model(self, model: str): - """Set the model name for transcription. + async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: + """Apply a typed settings update, syncing instance variables. - Args: - model: The name of the model to use. + Keeps ``_language``, ``_prompt``, and ``_temperature`` in sync with + the typed settings fields. """ - self.set_model_name(model) + changed = await super()._update_settings_from_typed(update) + + if "language" in changed: + self._language = self.language_to_service_language(Language(self._settings.language)) + if "prompt" in changed: + self._prompt = self._settings.prompt + if "temperature" in changed: + self._temperature = self._settings.temperature + + return changed def can_generate_metrics(self) -> bool: - """Indicates whether this service can generate metrics. + """Whether this service can generate processing metrics. Returns: bool: True, as this service supports metric generation. @@ -180,15 +208,6 @@ class BaseWhisperSTTService(SegmentedSTTService): """ return language_to_whisper_language(language) - async def set_language(self, language: Language): - """Set the language for transcription. - - Args: - language: The Language enum value to use for transcription. - """ - logger.info(f"Switching STT language to: [{language}]") - self._language = self.language_to_service_language(language) - @traced_stt async def _handle_transcription( self, transcript: str, is_final: bool, language: Optional[Language] = None diff --git a/src/pipecat/services/whisper/stt.py b/src/pipecat/services/whisper/stt.py index f11978cc2..30451e6d0 100644 --- a/src/pipecat/services/whisper/stt.py +++ b/src/pipecat/services/whisper/stt.py @@ -11,6 +11,7 @@ supporting both Faster Whisper and MLX Whisper backends for efficient inference. """ import asyncio +from dataclasses import dataclass, field from enum import Enum from typing import AsyncGenerator, Optional @@ -19,6 +20,7 @@ from loguru import logger from typing_extensions import TYPE_CHECKING, override from pipecat.frames.frames import ErrorFrame, Frame, TranscriptionFrame +from pipecat.services.settings import NOT_GIVEN, STTSettings from pipecat.services.stt_service import SegmentedSTTService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.time import time_now_iso8601 @@ -172,6 +174,36 @@ def language_to_whisper_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=True) +@dataclass +class WhisperSTTSettings(STTSettings): + """Typed settings for the local Whisper (Faster Whisper) STT service. + + Parameters: + device: Inference device ('cpu', 'cuda', or 'auto'). + compute_type: Compute type for inference ('default', 'int8', etc.). + no_speech_prob: Probability threshold for filtering non-speech segments. + """ + + device: str = field(default_factory=lambda: NOT_GIVEN) + compute_type: str = field(default_factory=lambda: NOT_GIVEN) + no_speech_prob: float = field(default_factory=lambda: NOT_GIVEN) + + +@dataclass +class WhisperMLXSTTSettings(STTSettings): + """Typed settings for the MLX Whisper STT service. + + Parameters: + no_speech_prob: Probability threshold for filtering non-speech segments. + temperature: Sampling temperature (0.0-1.0). + engine: Whisper engine identifier. + """ + + no_speech_prob: float = field(default_factory=lambda: NOT_GIVEN) + temperature: float = field(default_factory=lambda: NOT_GIVEN) + engine: str = field(default_factory=lambda: NOT_GIVEN) + + class WhisperSTTService(SegmentedSTTService): """Class to transcribe audio with a locally-downloaded Whisper model. @@ -206,12 +238,13 @@ class WhisperSTTService(SegmentedSTTService): self._no_speech_prob = no_speech_prob self._model: Optional[WhisperModel] = None - self._settings = { - "language": language, - "device": self._device, - "compute_type": self._compute_type, - "no_speech_prob": self._no_speech_prob, - } + self._settings: WhisperSTTSettings = WhisperSTTSettings( + model=model if isinstance(model, str) else model.value, + language=language, + device=self._device, + compute_type=self._compute_type, + no_speech_prob=self._no_speech_prob, + ) self._load() @@ -234,15 +267,6 @@ class WhisperSTTService(SegmentedSTTService): """ return language_to_whisper_language(language) - async def set_language(self, language: Language): - """Set the language for transcription. - - Args: - language: The Language enum value to use for transcription. - """ - logger.info(f"Switching STT language to: [{language}]") - self._settings["language"] = language - def _load(self): """Loads the Whisper model. @@ -293,7 +317,7 @@ class WhisperSTTService(SegmentedSTTService): # Divide by 32768 because we have signed 16-bit data. audio_float = np.frombuffer(audio, dtype=np.int16).astype(np.float32) / 32768.0 - whisper_lang = self.language_to_service_language(self._settings["language"]) + whisper_lang = self.language_to_service_language(self._settings.language) segments, _ = await asyncio.to_thread( self._model.transcribe, audio_float, language=whisper_lang ) @@ -305,13 +329,13 @@ class WhisperSTTService(SegmentedSTTService): await self.stop_processing_metrics() if text: - await self._handle_transcription(text, True, self._settings["language"]) + await self._handle_transcription(text, True, self._settings.language) logger.debug(f"Transcription: [{text}]") yield TranscriptionFrame( text, self._user_id, time_now_iso8601(), - self._settings["language"], + self._settings.language, ) @@ -347,12 +371,13 @@ class WhisperSTTServiceMLX(WhisperSTTService): self._no_speech_prob = no_speech_prob self._temperature = temperature - self._settings = { - "language": language, - "no_speech_prob": self._no_speech_prob, - "temperature": self._temperature, - "engine": "mlx", - } + self._settings: WhisperMLXSTTSettings = WhisperMLXSTTSettings( + model=model if isinstance(model, str) else model.value, + language=language, + no_speech_prob=self._no_speech_prob, + temperature=self._temperature, + engine="mlx", + ) # No need to call _load() as MLX Whisper loads models on demand @@ -390,7 +415,7 @@ class WhisperSTTServiceMLX(WhisperSTTService): # Divide by 32768 because we have signed 16-bit data. audio_float = np.frombuffer(audio, dtype=np.int16).astype(np.float32) / 32768.0 - whisper_lang = self.language_to_service_language(self._settings["language"]) + whisper_lang = self.language_to_service_language(self._settings.language) chunk = await asyncio.to_thread( mlx_whisper.transcribe, audio_float, @@ -413,13 +438,13 @@ class WhisperSTTServiceMLX(WhisperSTTService): await self.stop_processing_metrics() if text: - await self._handle_transcription(text, True, self._settings["language"]) + await self._handle_transcription(text, True, self._settings.language) logger.debug(f"Transcription: [{text}]") yield TranscriptionFrame( text, self._user_id, time_now_iso8601(), - self._settings["language"], + self._settings.language, ) except Exception as e: diff --git a/src/pipecat/services/xtts/tts.py b/src/pipecat/services/xtts/tts.py index bf4eb4f03..664d3d4be 100644 --- a/src/pipecat/services/xtts/tts.py +++ b/src/pipecat/services/xtts/tts.py @@ -10,7 +10,8 @@ This module provides integration with Coqui XTTS streaming server for text-to-speech synthesis using local Docker deployment. """ -from typing import Any, AsyncGenerator, Dict, Optional +from dataclasses import dataclass, field +from typing import AsyncGenerator, Dict, Optional import aiohttp from loguru import logger @@ -24,6 +25,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) +from pipecat.services.settings import NOT_GIVEN, TTSSettings from pipecat.services.tts_service import TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -68,6 +70,17 @@ def language_to_xtts_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=True) +@dataclass +class XTTSTTSSettings(TTSSettings): + """Typed settings for XTTS TTS service. + + Parameters: + base_url: Base URL of the XTTS streaming server. + """ + + base_url: str = field(default_factory=lambda: NOT_GIVEN) + + class XTTSService(TTSService): """Coqui XTTS text-to-speech service. @@ -98,11 +111,12 @@ class XTTSService(TTSService): """ super().__init__(sample_rate=sample_rate, **kwargs) - self._settings = { - "language": self.language_to_service_language(language), - "base_url": base_url, - } - self.set_voice(voice_id) + self._settings: XTTSTTSSettings = XTTSTTSSettings( + voice=voice_id, + language=self.language_to_service_language(language), + base_url=base_url, + ) + self._voice_id = voice_id self._studio_speakers: Optional[Dict[str, Any]] = None self._aiohttp_session = aiohttp_session @@ -138,7 +152,7 @@ class XTTSService(TTSService): if self._studio_speakers: return - async with self._aiohttp_session.get(self._settings["base_url"] + "/studio_speakers") as r: + async with self._aiohttp_session.get(self._settings.base_url + "/studio_speakers") as r: if r.status != 200: text = await r.text() await self.push_error( @@ -166,11 +180,11 @@ class XTTSService(TTSService): embeddings = self._studio_speakers[self._voice_id] - url = self._settings["base_url"] + "/tts_stream" + url = self._settings.base_url + "/tts_stream" payload = { "text": text.replace(".", "").replace("*", ""), - "language": self._settings["language"], + "language": self._settings.language, "speaker_embedding": embeddings["speaker_embedding"], "gpt_cond_latent": embeddings["gpt_cond_latent"], "add_wav_header": False, diff --git a/tests/test_settings.py b/tests/test_settings.py new file mode 100644 index 000000000..62583b00b --- /dev/null +++ b/tests/test_settings.py @@ -0,0 +1,308 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Tests for the typed settings infrastructure in pipecat.services.settings.""" + +import pytest + +from pipecat.services.settings import ( + NOT_GIVEN, + LLMSettings, + ServiceSettings, + STTSettings, + TTSSettings, + _NotGiven, + is_given, +) + +# --------------------------------------------------------------------------- +# NOT_GIVEN sentinel +# --------------------------------------------------------------------------- + + +class TestNotGiven: + def test_singleton(self): + """NOT_GIVEN is a singleton — every reference is the same object.""" + assert _NotGiven() is _NotGiven() + assert NOT_GIVEN is _NotGiven() + + def test_repr(self): + assert repr(NOT_GIVEN) == "NOT_GIVEN" + + def test_bool_is_false(self): + assert not NOT_GIVEN + assert bool(NOT_GIVEN) is False + + def test_is_given_with_not_given(self): + assert is_given(NOT_GIVEN) is False + + def test_is_given_with_none(self): + assert is_given(None) is True + + def test_is_given_with_values(self): + assert is_given(0) is True + assert is_given("") is True + assert is_given(False) is True + assert is_given(42) is True + assert is_given("hello") is True + + +# --------------------------------------------------------------------------- +# ServiceSettings base +# --------------------------------------------------------------------------- + + +class TestServiceSettings: + def test_default_fields_are_not_given(self): + s = ServiceSettings() + assert not is_given(s.model) + assert s.extra == {} + + def test_given_fields_empty_by_default(self): + s = ServiceSettings() + assert s.given_fields() == {} + + def test_given_fields_includes_set_values(self): + s = ServiceSettings(model="gpt-4o") + assert s.given_fields() == {"model": "gpt-4o"} + + def test_given_fields_includes_extra(self): + s = ServiceSettings(model="gpt-4o") + s.extra = {"custom_key": 42} + result = s.given_fields() + assert result == {"model": "gpt-4o", "custom_key": 42} + + def test_to_dict(self): + s = ServiceSettings(model="gpt-4o") + assert s.to_dict() == {"model": "gpt-4o"} + + def test_copy_is_deep(self): + s = ServiceSettings(model="gpt-4o") + s.extra = {"nested": {"a": 1}} + c = s.copy() + assert c.model == "gpt-4o" + assert c.extra == {"nested": {"a": 1}} + # Mutating the copy shouldn't affect the original + c.extra["nested"]["a"] = 999 + assert s.extra["nested"]["a"] == 1 + + +# --------------------------------------------------------------------------- +# apply_update +# --------------------------------------------------------------------------- + + +class TestApplyUpdate: + def test_apply_update_basic(self): + current = TTSSettings(voice="alice", language="en") + delta = TTSSettings(voice="bob") + changed = current.apply_update(delta) + assert changed == {"voice"} + assert current.voice == "bob" + assert current.language == "en" + + def test_apply_update_no_change(self): + current = TTSSettings(voice="alice", language="en") + delta = TTSSettings(voice="alice") + changed = current.apply_update(delta) + assert changed == set() + assert current.voice == "alice" + + def test_apply_update_not_given_skipped(self): + current = TTSSettings(voice="alice", language="en") + delta = TTSSettings() # all NOT_GIVEN + changed = current.apply_update(delta) + assert changed == set() + assert current.voice == "alice" + assert current.language == "en" + + def test_apply_update_multiple_fields(self): + current = LLMSettings(temperature=0.7, max_tokens=100) + delta = LLMSettings(temperature=0.9, max_tokens=200, top_p=0.95) + changed = current.apply_update(delta) + assert changed == {"temperature", "max_tokens", "top_p"} + assert current.temperature == 0.9 + assert current.max_tokens == 200 + assert current.top_p == 0.95 + + def test_apply_update_extra_merged(self): + current = TTSSettings(voice="alice") + current.extra = {"speed": 1.0, "stability": 0.5} + delta = TTSSettings() + delta.extra = {"speed": 1.2} + changed = current.apply_update(delta) + assert "speed" in changed + assert current.extra == {"speed": 1.2, "stability": 0.5} + + def test_apply_update_extra_no_change(self): + current = TTSSettings(voice="alice") + current.extra = {"speed": 1.0} + delta = TTSSettings() + delta.extra = {"speed": 1.0} + changed = current.apply_update(delta) + assert changed == set() + + def test_apply_update_model_field(self): + current = ServiceSettings(model="old-model") + delta = ServiceSettings(model="new-model") + changed = current.apply_update(delta) + assert changed == {"model"} + assert current.model == "new-model" + + def test_apply_update_none_is_a_valid_value(self): + """Setting a field to None should be treated as a change from NOT_GIVEN.""" + current = TTSSettings() + delta = TTSSettings(language=None) + changed = current.apply_update(delta) + assert "language" in changed + assert current.language is None + + def test_apply_update_none_to_value(self): + current = TTSSettings(language=None) + delta = TTSSettings(language="en") + changed = current.apply_update(delta) + assert "language" in changed + assert current.language == "en" + + +# --------------------------------------------------------------------------- +# from_mapping +# --------------------------------------------------------------------------- + + +class TestFromMapping: + def test_basic_mapping(self): + s = TTSSettings.from_mapping({"voice": "alice", "language": "en"}) + assert s.voice == "alice" + assert s.language == "en" + assert not is_given(s.model) + + def test_alias_resolution(self): + """'voice_id' is an alias for 'voice' in TTSSettings.""" + s = TTSSettings.from_mapping({"voice_id": "alice"}) + assert s.voice == "alice" + + def test_unknown_keys_go_to_extra(self): + s = TTSSettings.from_mapping({"voice": "alice", "speed": 1.2, "stability": 0.5}) + assert s.voice == "alice" + assert s.extra == {"speed": 1.2, "stability": 0.5} + + def test_model_field(self): + s = LLMSettings.from_mapping({"model": "gpt-4o", "temperature": 0.7}) + assert s.model == "gpt-4o" + assert s.temperature == 0.7 + + def test_empty_mapping(self): + s = ServiceSettings.from_mapping({}) + assert s.given_fields() == {} + + def test_all_unknown_keys(self): + s = ServiceSettings.from_mapping({"foo": 1, "bar": 2}) + assert not is_given(s.model) + assert s.extra == {"foo": 1, "bar": 2} + + def test_llm_settings_from_mapping(self): + s = LLMSettings.from_mapping({"temperature": 0.5, "max_tokens": 1000, "custom_param": True}) + assert s.temperature == 0.5 + assert s.max_tokens == 1000 + assert s.extra == {"custom_param": True} + + def test_stt_settings_from_mapping(self): + s = STTSettings.from_mapping({"language": "fr", "model": "whisper-large"}) + assert s.language == "fr" + assert s.model == "whisper-large" + + +# --------------------------------------------------------------------------- +# LLMSettings specifics +# --------------------------------------------------------------------------- + + +class TestLLMSettings: + def test_all_fields_not_given_by_default(self): + s = LLMSettings() + for name in ( + "model", + "temperature", + "max_tokens", + "top_p", + "top_k", + "frequency_penalty", + "presence_penalty", + "seed", + ): + assert not is_given(getattr(s, name)), f"{name} should be NOT_GIVEN" + + def test_given_fields(self): + s = LLMSettings(temperature=0.7, seed=42) + assert s.given_fields() == {"temperature": 0.7, "seed": 42} + + +# --------------------------------------------------------------------------- +# TTSSettings specifics +# --------------------------------------------------------------------------- + + +class TestTTSSettings: + def test_all_fields_not_given_by_default(self): + s = TTSSettings() + for name in ("model", "voice", "language"): + assert not is_given(getattr(s, name)), f"{name} should be NOT_GIVEN" + + def test_aliases_class_var(self): + assert TTSSettings._aliases == {"voice_id": "voice"} + + def test_given_fields(self): + s = TTSSettings(voice="alice") + assert s.given_fields() == {"voice": "alice"} + + +# --------------------------------------------------------------------------- +# STTSettings specifics +# --------------------------------------------------------------------------- + + +class TestSTTSettings: + def test_all_fields_not_given_by_default(self): + s = STTSettings() + for name in ("model", "language"): + assert not is_given(getattr(s, name)), f"{name} should be NOT_GIVEN" + + def test_given_fields(self): + s = STTSettings(language="en", model="whisper-large") + assert s.given_fields() == {"language": "en", "model": "whisper-large"} + + +# --------------------------------------------------------------------------- +# Integration: roundtrip from_mapping → apply_update +# --------------------------------------------------------------------------- + + +class TestRoundtrip: + def test_from_mapping_then_apply_update(self): + """Simulate the real flow: dict arrives via frame, gets converted, applied.""" + # Simulating current service state + current = TTSSettings(model="eleven_turbo_v2_5", voice="alice", language="en") + current.extra = {"stability": 0.5, "speed": 1.0} + + # Incoming dict-based update + raw = {"voice_id": "bob", "speed": 1.2} + delta = TTSSettings.from_mapping(raw) + + changed = current.apply_update(delta) + assert changed == {"voice", "speed"} + assert current.voice == "bob" + assert current.language == "en" + assert current.extra["speed"] == 1.2 + assert current.extra["stability"] == 0.5 + + def test_from_mapping_preserves_model(self): + current = LLMSettings(model="gpt-4o", temperature=0.7) + delta = LLMSettings.from_mapping({"model": "gpt-4o-mini", "temperature": 0.9}) + changed = current.apply_update(delta) + assert changed == {"model", "temperature"} + assert current.model == "gpt-4o-mini" + assert current.temperature == 0.9 From 444cbb64991c0bb67a42df46320b0919dee16fd6 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Fri, 13 Feb 2026 10:31:25 -0500 Subject: [PATCH 002/189] Add turn-completion fields to LLMSettings and handle them in the typed-service-settings path. `filter_incomplete_user_turns` and `user_turn_completion_config` were only handled in the legacy dict-based `_update_settings` code path. This adds them to `LLMSettings` and introduces `LLMService._update_settings_from_typed` so the typed path handles them too. --- src/pipecat/services/llm_service.py | 25 ++++++++++++++++++++++++- src/pipecat/services/settings.py | 16 +++++++++++++++- 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/src/pipecat/services/llm_service.py b/src/pipecat/services/llm_service.py index 77af50f15..3a6e64def 100644 --- a/src/pipecat/services/llm_service.py +++ b/src/pipecat/services/llm_service.py @@ -59,7 +59,7 @@ from pipecat.processors.aggregators.llm_response import ( from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.processors.frame_processor import FrameDirection from pipecat.services.ai_service import AIService -from pipecat.services.settings import ServiceSettings +from pipecat.services.settings import LLMSettings, ServiceSettings, is_given from pipecat.turns.user_turn_completion_mixin import UserTurnCompletionLLMServiceMixin from pipecat.utils.context.llm_context_summarization import ( LLMContextSummarizationUtil, @@ -311,6 +311,29 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): await self._cancel_sequential_runner_task() await self._cancel_summary_task() + async def _update_settings_from_typed(self, update: LLMSettings) -> set[str]: + """Apply a typed settings update, handling turn-completion fields. + + Args: + update: A typed LLM settings delta. + + Returns: + Set of field names whose values actually changed. + """ + changed = await super()._update_settings_from_typed(update) + + if "filter_incomplete_user_turns" in changed: + self._filter_incomplete_user_turns = self._settings.filter_incomplete_user_turns + logger.info( + f"{self}: Incomplete turn filtering " + f"{'enabled' if self._filter_incomplete_user_turns else 'disabled'}" + ) + + if "user_turn_completion_config" in changed and self._filter_incomplete_user_turns: + self.set_user_turn_completion_config(self._settings.user_turn_completion_config) + + return changed + async def _update_settings(self, settings: Mapping[str, Any]): """Update LLM service settings. diff --git a/src/pipecat/services/settings.py b/src/pipecat/services/settings.py index fbec5cdf8..718996984 100644 --- a/src/pipecat/services/settings.py +++ b/src/pipecat/services/settings.py @@ -31,10 +31,13 @@ from __future__ import annotations import copy from dataclasses import dataclass, field, fields -from typing import Any, ClassVar, Dict, Mapping, Optional, Set, Type, TypeVar +from typing import TYPE_CHECKING, Any, ClassVar, Dict, Mapping, Optional, Set, Type, TypeVar from loguru import logger +if TYPE_CHECKING: + from pipecat.turns.user_turn_completion_mixin import UserTurnCompletionConfig + # --------------------------------------------------------------------------- # NOT_GIVEN sentinel # --------------------------------------------------------------------------- @@ -258,6 +261,13 @@ class LLMSettings(ServiceSettings): frequency_penalty: Frequency penalty. presence_penalty: Presence penalty. seed: Random seed for reproducibility. + filter_incomplete_user_turns: Enable LLM-based turn completion detection + to suppress bot responses when the user was cut off mid-thought. + See ``examples/foundational/22-filter-incomplete-turns.py`` and + ``UserTurnCompletionLLMServiceMixin``. + user_turn_completion_config: Configuration for turn completion behavior + when ``filter_incomplete_user_turns`` is enabled. Controls timeouts + and prompts for incomplete turns. """ temperature: Any = field(default_factory=lambda: NOT_GIVEN) @@ -267,6 +277,10 @@ class LLMSettings(ServiceSettings): frequency_penalty: Any = field(default_factory=lambda: NOT_GIVEN) presence_penalty: Any = field(default_factory=lambda: NOT_GIVEN) seed: Any = field(default_factory=lambda: NOT_GIVEN) + filter_incomplete_user_turns: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + user_turn_completion_config: UserTurnCompletionConfig | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) @dataclass From e43351f5f8738fb2b8f7355de749f1e673077683 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Fri, 13 Feb 2026 11:29:37 -0500 Subject: [PATCH 003/189] Add class-level `_settings` type annotations to all service classes for better editor support. Standardize all STT, TTS, and LLM service classes to declare `_settings` with the narrowed Settings type as a class-level annotation. This gives editors and type checkers the specific type when hovering or autocompleting on `self._settings` in each service and its subclasses. Inline `self._settings: Type = ...` assignments are replaced with plain `self._settings = ...`. --- src/pipecat/services/anthropic/llm.py | 2 ++ src/pipecat/services/assemblyai/stt.py | 4 +++- src/pipecat/services/asyncai/tts.py | 8 ++++++-- src/pipecat/services/aws/llm.py | 2 ++ src/pipecat/services/aws/stt.py | 4 +++- src/pipecat/services/aws/tts.py | 4 +++- src/pipecat/services/azure/stt.py | 4 +++- src/pipecat/services/azure/tts.py | 4 +++- src/pipecat/services/camb/tts.py | 4 +++- src/pipecat/services/cartesia/stt.py | 4 +++- src/pipecat/services/cartesia/tts.py | 8 ++++++-- src/pipecat/services/deepgram/stt.py | 4 +++- src/pipecat/services/deepgram/stt_sagemaker.py | 4 +++- src/pipecat/services/deepgram/tts.py | 8 ++++++-- src/pipecat/services/elevenlabs/stt.py | 8 ++++++-- src/pipecat/services/elevenlabs/tts.py | 8 ++++++-- src/pipecat/services/fal/stt.py | 4 +++- src/pipecat/services/fish/tts.py | 4 +++- src/pipecat/services/gladia/stt.py | 2 ++ src/pipecat/services/google/gemini_live/llm.py | 2 ++ src/pipecat/services/google/llm.py | 2 ++ src/pipecat/services/google/stt.py | 2 ++ src/pipecat/services/google/tts.py | 12 +++++++++--- src/pipecat/services/gradium/stt.py | 4 +++- src/pipecat/services/gradium/tts.py | 4 +++- src/pipecat/services/grok/realtime/llm.py | 2 ++ src/pipecat/services/groq/tts.py | 4 +++- src/pipecat/services/hathora/stt.py | 4 +++- src/pipecat/services/hathora/tts.py | 4 +++- src/pipecat/services/inworld/tts.py | 8 ++++++-- src/pipecat/services/kokoro/tts.py | 4 +++- src/pipecat/services/llm_service.py | 2 ++ src/pipecat/services/lmnt/tts.py | 4 +++- src/pipecat/services/minimax/tts.py | 4 +++- src/pipecat/services/neuphonic/tts.py | 4 +++- src/pipecat/services/nvidia/stt.py | 8 ++++++-- src/pipecat/services/openai/base_llm.py | 2 ++ src/pipecat/services/openai/realtime/llm.py | 2 ++ src/pipecat/services/openai/stt.py | 4 +++- src/pipecat/services/openai/tts.py | 4 +++- src/pipecat/services/openai_realtime_beta/openai.py | 2 ++ src/pipecat/services/playht/tts.py | 8 ++++++-- src/pipecat/services/resembleai/tts.py | 4 +++- src/pipecat/services/rime/tts.py | 12 +++++++++--- src/pipecat/services/sarvam/stt.py | 4 +++- src/pipecat/services/sarvam/tts.py | 8 ++++++-- src/pipecat/services/soniox/stt.py | 2 ++ src/pipecat/services/speechmatics/stt.py | 2 ++ src/pipecat/services/stt_service.py | 2 ++ src/pipecat/services/tts_service.py | 2 ++ src/pipecat/services/ultravox/llm.py | 2 ++ src/pipecat/services/whisper/base_stt.py | 4 +++- src/pipecat/services/whisper/stt.py | 8 ++++++-- src/pipecat/services/xtts/tts.py | 4 +++- 54 files changed, 188 insertions(+), 52 deletions(-) diff --git a/src/pipecat/services/anthropic/llm.py b/src/pipecat/services/anthropic/llm.py index 36ee104f5..25611d0d1 100644 --- a/src/pipecat/services/anthropic/llm.py +++ b/src/pipecat/services/anthropic/llm.py @@ -129,6 +129,8 @@ class AnthropicLLMService(LLMService): Can use custom clients like AsyncAnthropicBedrock and AsyncAnthropicVertex. """ + _settings: AnthropicLLMSettings + # Overriding the default adapter to use the Anthropic one. adapter_class = AnthropicLLMAdapter diff --git a/src/pipecat/services/assemblyai/stt.py b/src/pipecat/services/assemblyai/stt.py index 278873fdf..910d1e005 100644 --- a/src/pipecat/services/assemblyai/stt.py +++ b/src/pipecat/services/assemblyai/stt.py @@ -75,6 +75,8 @@ class AssemblyAISTTService(WebsocketSTTService): for audio processing and connection management. """ + _settings: AssemblyAISTTSettings + def __init__( self, *, @@ -111,7 +113,7 @@ class AssemblyAISTTService(WebsocketSTTService): ) self._api_key = api_key - self._settings: AssemblyAISTTSettings = AssemblyAISTTSettings( + self._settings = AssemblyAISTTSettings( language=language, connection_params=connection_params, ) diff --git a/src/pipecat/services/asyncai/tts.py b/src/pipecat/services/asyncai/tts.py index aecf69a26..2b0740956 100644 --- a/src/pipecat/services/asyncai/tts.py +++ b/src/pipecat/services/asyncai/tts.py @@ -95,6 +95,8 @@ class AsyncAITTSService(AudioContextTTSService): Provides text-to-speech using Async's streaming WebSocket API. """ + _settings: AsyncAITTSSettings + class InputParams(BaseModel): """Input parameters for Async TTS configuration. @@ -148,7 +150,7 @@ class AsyncAITTSService(AudioContextTTSService): self._api_key = api_key self._api_version = version self._url = url - self._settings: AsyncAITTSSettings = AsyncAITTSSettings( + self._settings = AsyncAITTSSettings( model=model, voice=voice_id, output_format={ @@ -431,6 +433,8 @@ class AsyncAIHttpTTSService(TTSService): connection is not required or desired. """ + _settings: AsyncAITTSSettings + class InputParams(BaseModel): """Input parameters for Async API. @@ -477,7 +481,7 @@ class AsyncAIHttpTTSService(TTSService): self._api_key = api_key self._base_url = url self._api_version = version - self._settings: AsyncAITTSSettings = AsyncAITTSSettings( + self._settings = AsyncAITTSSettings( model=model, voice=voice_id, output_container=container, diff --git a/src/pipecat/services/aws/llm.py b/src/pipecat/services/aws/llm.py index 032cee060..50de0de2c 100644 --- a/src/pipecat/services/aws/llm.py +++ b/src/pipecat/services/aws/llm.py @@ -743,6 +743,8 @@ class AWSBedrockLLMService(LLMService): vision capabilities. """ + _settings: AWSBedrockLLMSettings + # Overriding the default adapter to use the Anthropic one. adapter_class = AWSBedrockLLMAdapter diff --git a/src/pipecat/services/aws/stt.py b/src/pipecat/services/aws/stt.py index cb52da12a..cd8a7103c 100644 --- a/src/pipecat/services/aws/stt.py +++ b/src/pipecat/services/aws/stt.py @@ -72,6 +72,8 @@ class AWSTranscribeSTTService(WebsocketSTTService): final transcription results. """ + _settings: AWSTranscribeSTTSettings + def __init__( self, *, @@ -99,7 +101,7 @@ class AWSTranscribeSTTService(WebsocketSTTService): """ super().__init__(ttfs_p99_latency=ttfs_p99_latency, **kwargs) - self._settings: AWSTranscribeSTTSettings = AWSTranscribeSTTSettings( + self._settings = AWSTranscribeSTTSettings( language=language, sample_rate=sample_rate, media_encoding="linear16", diff --git a/src/pipecat/services/aws/tts.py b/src/pipecat/services/aws/tts.py index 5086b1469..b7f6386ca 100644 --- a/src/pipecat/services/aws/tts.py +++ b/src/pipecat/services/aws/tts.py @@ -150,6 +150,8 @@ class AWSPollyTTSService(TTSService): options including prosody controls. """ + _settings: AWSPollyTTSSettings + class InputParams(BaseModel): """Input parameters for AWS Polly TTS configuration. @@ -206,7 +208,7 @@ class AWSPollyTTSService(TTSService): } self._aws_session = aioboto3.Session() - self._settings: AWSPollyTTSSettings = AWSPollyTTSSettings( + self._settings = AWSPollyTTSSettings( voice=voice_id, engine=params.engine, language=self.language_to_service_language(params.language) diff --git a/src/pipecat/services/azure/stt.py b/src/pipecat/services/azure/stt.py index bf3f70653..95840bde9 100644 --- a/src/pipecat/services/azure/stt.py +++ b/src/pipecat/services/azure/stt.py @@ -71,6 +71,8 @@ class AzureSTTService(STTService): provides real-time transcription results with timing information. """ + _settings: AzureSTTSettings + def __init__( self, *, @@ -107,7 +109,7 @@ class AzureSTTService(STTService): self._audio_stream = None self._speech_recognizer = None - self._settings: AzureSTTSettings = AzureSTTSettings( + self._settings = AzureSTTSettings( region=region, language=language_to_azure_language(language), sample_rate=sample_rate, diff --git a/src/pipecat/services/azure/tts.py b/src/pipecat/services/azure/tts.py index 04b51d10b..ba6d8ac13 100644 --- a/src/pipecat/services/azure/tts.py +++ b/src/pipecat/services/azure/tts.py @@ -100,6 +100,8 @@ class AzureBaseTTSService: This is a mixin class and should be used alongside TTSService or its subclasses. """ + _settings: AzureTTSSettings + # Define SSML escape mappings based on SSML reserved characters # See - https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-structure SSML_ESCAPE_CHARS = { @@ -153,7 +155,7 @@ class AzureBaseTTSService: """ params = params or AzureBaseTTSService.InputParams() - self._settings: AzureTTSSettings = AzureTTSSettings( + self._settings = AzureTTSSettings( emphasis=params.emphasis, language=self.language_to_service_language(params.language) if params.language diff --git a/src/pipecat/services/camb/tts.py b/src/pipecat/services/camb/tts.py index 8a6f67231..c484a3c80 100644 --- a/src/pipecat/services/camb/tts.py +++ b/src/pipecat/services/camb/tts.py @@ -170,6 +170,8 @@ class CambTTSService(TTSService): ) """ + _settings: CambTTSSettings + class InputParams(BaseModel): """Input parameters for Camb.ai TTS configuration. @@ -226,7 +228,7 @@ class CambTTSService(TTSService): ) # Build settings - self._settings: CambTTSSettings = CambTTSSettings( + self._settings = CambTTSSettings( model=model, voice=voice_id, language=( diff --git a/src/pipecat/services/cartesia/stt.py b/src/pipecat/services/cartesia/stt.py index 624801bfb..7f684f886 100644 --- a/src/pipecat/services/cartesia/stt.py +++ b/src/pipecat/services/cartesia/stt.py @@ -149,6 +149,8 @@ class CartesiaSTTService(WebsocketSTTService): See: https://docs.cartesia.ai/api-reference/stt/stt """ + _settings: CartesiaSTTSettings + def __init__( self, *, @@ -194,7 +196,7 @@ class CartesiaSTTService(WebsocketSTTService): k: v for k, v in merged_options.items() if not isinstance(v, str) or v != "None" } - self._settings: CartesiaSTTSettings = CartesiaSTTSettings( + self._settings = CartesiaSTTSettings( model=merged_options["model"], language=merged_options.get("language"), encoding=merged_options.get("encoding", "pcm_s16le"), diff --git a/src/pipecat/services/cartesia/tts.py b/src/pipecat/services/cartesia/tts.py index 531aafdf7..117853e36 100644 --- a/src/pipecat/services/cartesia/tts.py +++ b/src/pipecat/services/cartesia/tts.py @@ -226,6 +226,8 @@ class CartesiaTTSService(AudioContextWordTTSService): customization options including speed and emotion controls. """ + _settings: CartesiaTTSSettings + class InputParams(BaseModel): """Input parameters for Cartesia TTS configuration. @@ -316,7 +318,7 @@ class CartesiaTTSService(AudioContextWordTTSService): self._api_key = api_key self._cartesia_version = cartesia_version self._url = url - self._settings: CartesiaTTSSettings = CartesiaTTSSettings( + self._settings = CartesiaTTSSettings( output_container=container, output_encoding=encoding, output_sample_rate=0, @@ -655,6 +657,8 @@ class CartesiaHttpTTSService(TTSService): integration is preferred. """ + _settings: CartesiaTTSSettings + class InputParams(BaseModel): """Input parameters for Cartesia HTTP TTS configuration. @@ -712,7 +716,7 @@ class CartesiaHttpTTSService(TTSService): self._api_key = api_key self._base_url = base_url self._cartesia_version = cartesia_version - self._settings: CartesiaTTSSettings = CartesiaTTSSettings( + self._settings = CartesiaTTSSettings( model=model, voice=voice_id, output_container=container, diff --git a/src/pipecat/services/deepgram/stt.py b/src/pipecat/services/deepgram/stt.py index 91d4308cb..2beaec80c 100644 --- a/src/pipecat/services/deepgram/stt.py +++ b/src/pipecat/services/deepgram/stt.py @@ -65,6 +65,8 @@ class DeepgramSTTService(STTService): Supports configurable models, languages, and various audio processing options. """ + _settings: DeepgramSTTSettings + def __init__( self, *, @@ -143,7 +145,7 @@ class DeepgramSTTService(STTService): self.set_model_name(merged_options["model"]) merged_live_options = LiveOptions(**merged_options) - self._settings: DeepgramSTTSettings = DeepgramSTTSettings( + self._settings = DeepgramSTTSettings( model=merged_options.get("model"), language=merged_options.get("language"), live_options=merged_live_options, diff --git a/src/pipecat/services/deepgram/stt_sagemaker.py b/src/pipecat/services/deepgram/stt_sagemaker.py index 95242ade6..68ec9651b 100644 --- a/src/pipecat/services/deepgram/stt_sagemaker.py +++ b/src/pipecat/services/deepgram/stt_sagemaker.py @@ -88,6 +88,8 @@ class DeepgramSageMakerSTTService(STTService): ) """ + _settings: DeepgramSageMakerSTTSettings + def __init__( self, *, @@ -143,7 +145,7 @@ class DeepgramSageMakerSTTService(STTService): self.set_model_name(merged_options["model"]) merged_live_options = LiveOptions(**merged_options) - self._settings: DeepgramSageMakerSTTSettings = DeepgramSageMakerSTTSettings( + self._settings = DeepgramSageMakerSTTSettings( model=merged_options.get("model"), language=merged_options.get("language"), live_options=merged_live_options, diff --git a/src/pipecat/services/deepgram/tts.py b/src/pipecat/services/deepgram/tts.py index 4c698dcea..7ae0fb9ac 100644 --- a/src/pipecat/services/deepgram/tts.py +++ b/src/pipecat/services/deepgram/tts.py @@ -64,6 +64,8 @@ class DeepgramTTSService(WebsocketTTSService): message for conversational AI use cases. """ + _settings: DeepgramTTSSettings + SUPPORTED_ENCODINGS = ("linear16", "mulaw", "alaw") def __init__( @@ -104,7 +106,7 @@ class DeepgramTTSService(WebsocketTTSService): self._api_key = api_key self._base_url = base_url - self._settings: DeepgramTTSSettings = DeepgramTTSSettings( + self._settings = DeepgramTTSSettings( model=voice, voice=voice, encoding=encoding, @@ -345,6 +347,8 @@ class DeepgramHttpTTSService(TTSService): configurable sample rates and quality settings. """ + _settings: DeepgramTTSSettings + def __init__( self, *, @@ -372,7 +376,7 @@ class DeepgramHttpTTSService(TTSService): self._api_key = api_key self._session = aiohttp_session self._base_url = base_url - self._settings: DeepgramTTSSettings = DeepgramTTSSettings( + self._settings = DeepgramTTSSettings( model=voice, voice=voice, encoding=encoding, diff --git a/src/pipecat/services/elevenlabs/stt.py b/src/pipecat/services/elevenlabs/stt.py index 950dc5de9..b33fee710 100644 --- a/src/pipecat/services/elevenlabs/stt.py +++ b/src/pipecat/services/elevenlabs/stt.py @@ -215,6 +215,8 @@ class ElevenLabsSTTService(SegmentedSTTService): The service uploads audio files to ElevenLabs and receives transcription results directly. """ + _settings: ElevenLabsSTTSettings + class InputParams(BaseModel): """Configuration parameters for ElevenLabs STT API. @@ -264,7 +266,7 @@ class ElevenLabsSTTService(SegmentedSTTService): self._session = aiohttp_session self._model_id = model - self._settings: ElevenLabsSTTSettings = ElevenLabsSTTSettings( + self._settings = ElevenLabsSTTSettings( model=model, language=self.language_to_service_language(params.language) if params.language @@ -449,6 +451,8 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): commit transcript segments, providing consistency with other STT services. """ + _settings: ElevenLabsRealtimeSTTSettings + class InputParams(BaseModel): """Configuration parameters for ElevenLabs Realtime STT API. @@ -517,7 +521,7 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): self._audio_format = "" # initialized in start() self._receive_task = None - self._settings: ElevenLabsRealtimeSTTSettings = ElevenLabsRealtimeSTTSettings( + self._settings = ElevenLabsRealtimeSTTSettings( model=model, language=params.language_code, commit_strategy=params.commit_strategy, diff --git a/src/pipecat/services/elevenlabs/tts.py b/src/pipecat/services/elevenlabs/tts.py index b061383f3..cd3a99e1e 100644 --- a/src/pipecat/services/elevenlabs/tts.py +++ b/src/pipecat/services/elevenlabs/tts.py @@ -321,6 +321,8 @@ class ElevenLabsTTSService(AudioContextWordTTSService): customization options including stability, similarity boost, and speed controls. """ + _settings: ElevenLabsTTSSettings + class InputParams(BaseModel): """Input parameters for ElevenLabs TTS configuration. @@ -401,7 +403,7 @@ class ElevenLabsTTSService(AudioContextWordTTSService): self._api_key = api_key self._url = url - self._settings: ElevenLabsTTSSettings = ElevenLabsTTSSettings( + self._settings = ElevenLabsTTSSettings( model=model, voice=voice_id, language=( @@ -836,6 +838,8 @@ class ElevenLabsHttpTTSService(WordTTSService): connection is not required or desired. """ + _settings: ElevenLabsHttpTTSSettings + class InputParams(BaseModel): """Input parameters for ElevenLabs HTTP TTS configuration. @@ -902,7 +906,7 @@ class ElevenLabsHttpTTSService(WordTTSService): self._params = params self._session = aiohttp_session - self._settings: ElevenLabsHttpTTSSettings = ElevenLabsHttpTTSSettings( + self._settings = ElevenLabsHttpTTSSettings( model=model, voice=voice_id, language=self.language_to_service_language(params.language) diff --git a/src/pipecat/services/fal/stt.py b/src/pipecat/services/fal/stt.py index eef0e0487..a459d15dd 100644 --- a/src/pipecat/services/fal/stt.py +++ b/src/pipecat/services/fal/stt.py @@ -171,6 +171,8 @@ class FalSTTService(SegmentedSTTService): segments. It inherits from SegmentedSTTService to handle audio buffering and speech detection. """ + _settings: FalSTTSettings + class InputParams(BaseModel): """Configuration parameters for Fal's Wizper API. @@ -221,7 +223,7 @@ class FalSTTService(SegmentedSTTService): ) self._fal_client = fal_client.AsyncClient(key=api_key or os.getenv("FAL_KEY")) - self._settings: FalSTTSettings = FalSTTSettings( + self._settings = FalSTTSettings( language=self.language_to_service_language(params.language) if params.language else "en", diff --git a/src/pipecat/services/fish/tts.py b/src/pipecat/services/fish/tts.py index 5517758ad..5c56d0c91 100644 --- a/src/pipecat/services/fish/tts.py +++ b/src/pipecat/services/fish/tts.py @@ -78,6 +78,8 @@ class FishAudioTTSService(InterruptibleTTSService): audio generation with interruption handling. """ + _settings: FishAudioTTSSettings + class InputParams(BaseModel): """Input parameters for Fish Audio TTS configuration. @@ -161,7 +163,7 @@ class FishAudioTTSService(InterruptibleTTSService): self._receive_task = None self._request_id = None - self._settings: FishAudioTTSSettings = FishAudioTTSSettings( + self._settings = FishAudioTTSSettings( voice=reference_id, fish_sample_rate=0, latency=params.latency, diff --git a/src/pipecat/services/gladia/stt.py b/src/pipecat/services/gladia/stt.py index 76a1620e1..d56150a29 100644 --- a/src/pipecat/services/gladia/stt.py +++ b/src/pipecat/services/gladia/stt.py @@ -204,6 +204,8 @@ class GladiaSTTService(WebsocketSTTService): Use :class:`~pipecat.services.gladia.config.GladiaInputParams` directly instead. """ + _settings: GladiaSTTSettings + # Maintain backward compatibility InputParams = _InputParamsDescriptor() diff --git a/src/pipecat/services/google/gemini_live/llm.py b/src/pipecat/services/google/gemini_live/llm.py index 1edab5783..2e8a2efbd 100644 --- a/src/pipecat/services/google/gemini_live/llm.py +++ b/src/pipecat/services/google/gemini_live/llm.py @@ -635,6 +635,8 @@ class GeminiLiveLLMService(LLMService): responses, and tool usage. """ + _settings: GeminiLiveLLMSettings + # Overriding the default adapter to use the Gemini one. adapter_class = GeminiLLMAdapter diff --git a/src/pipecat/services/google/llm.py b/src/pipecat/services/google/llm.py index 692106241..7f6dd724c 100644 --- a/src/pipecat/services/google/llm.py +++ b/src/pipecat/services/google/llm.py @@ -692,6 +692,8 @@ class GoogleLLMService(LLMService): expected by the Google AI model. """ + _settings: GoogleLLMSettings + # Overriding the default adapter to use the Gemini one. adapter_class = GeminiLLMAdapter diff --git a/src/pipecat/services/google/stt.py b/src/pipecat/services/google/stt.py index 8f762da9d..585b766f4 100644 --- a/src/pipecat/services/google/stt.py +++ b/src/pipecat/services/google/stt.py @@ -412,6 +412,8 @@ class GoogleSTTService(STTService): ValueError: If project ID is not found in credentials. """ + _settings: GoogleSTTSettings + # Google Cloud's STT service has a connection time limit of 5 minutes per stream. # They've shared an "endless streaming" example that guided this implementation: # https://cloud.google.com/speech-to-text/docs/transcribe-streaming-audio#endless-streaming diff --git a/src/pipecat/services/google/tts.py b/src/pipecat/services/google/tts.py index d015571d0..46ec96f3e 100644 --- a/src/pipecat/services/google/tts.py +++ b/src/pipecat/services/google/tts.py @@ -547,6 +547,8 @@ class GoogleHttpTTSService(TTSService): Chirp and Journey voices don't support SSML and will use plain text input. """ + _settings: GoogleHttpTTSSettings + class InputParams(BaseModel): """Input parameters for Google HTTP TTS voice customization. @@ -597,7 +599,7 @@ class GoogleHttpTTSService(TTSService): params = params or GoogleHttpTTSService.InputParams() self._location = location - self._settings: GoogleHttpTTSSettings = GoogleHttpTTSSettings( + self._settings = GoogleHttpTTSSettings( pitch=params.pitch, rate=params.rate, speaking_rate=params.speaking_rate, @@ -968,6 +970,8 @@ class GoogleTTSService(GoogleBaseTTSService): ) """ + _settings: GoogleStreamTTSSettings + class InputParams(BaseModel): """Input parameters for Google streaming TTS configuration. @@ -1008,7 +1012,7 @@ class GoogleTTSService(GoogleBaseTTSService): params = params or GoogleTTSService.InputParams() self._location = location - self._settings: GoogleStreamTTSSettings = GoogleStreamTTSSettings( + self._settings = GoogleStreamTTSSettings( language=self.language_to_service_language(params.language) if params.language else "en-US", @@ -1109,6 +1113,8 @@ class GeminiTTSService(GoogleBaseTTSService): ) """ + _settings: GeminiTTSSettings + GOOGLE_SAMPLE_RATE = 24000 # Google TTS always outputs at 24kHz # List of available Gemini TTS voices @@ -1216,7 +1222,7 @@ class GeminiTTSService(GoogleBaseTTSService): self._location = location self._model = model self._voice_id = voice_id - self._settings: GeminiTTSSettings = GeminiTTSSettings( + self._settings = GeminiTTSSettings( language=self.language_to_service_language(params.language) if params.language else "en-US", diff --git a/src/pipecat/services/gradium/stt.py b/src/pipecat/services/gradium/stt.py index 2bad8cf30..ff2002625 100644 --- a/src/pipecat/services/gradium/stt.py +++ b/src/pipecat/services/gradium/stt.py @@ -86,6 +86,8 @@ class GradiumSTTService(WebsocketSTTService): for audio processing and connection management. """ + _settings: GradiumSTTSettings + class InputParams(BaseModel): """Configuration parameters for Gradium STT API. @@ -145,7 +147,7 @@ class GradiumSTTService(WebsocketSTTService): params = params or GradiumSTTService.InputParams() - self._settings: GradiumSTTSettings = GradiumSTTSettings( + self._settings = GradiumSTTSettings( language=params.language, delay_in_frames=params.delay_in_frames if params.delay_in_frames else NOT_GIVEN, ) diff --git a/src/pipecat/services/gradium/tts.py b/src/pipecat/services/gradium/tts.py index e129fba68..5dc355b91 100644 --- a/src/pipecat/services/gradium/tts.py +++ b/src/pipecat/services/gradium/tts.py @@ -53,6 +53,8 @@ class GradiumTTSSettings(TTSSettings): class GradiumTTSService(InterruptibleWordTTSService): """Text-to-Speech service using Gradium's websocket API.""" + _settings: GradiumTTSSettings + class InputParams(BaseModel): """Configuration parameters for Gradium TTS service. @@ -99,7 +101,7 @@ class GradiumTTSService(InterruptibleWordTTSService): self._url = url self._voice_id = voice_id self._json_config = json_config - self._settings: GradiumTTSSettings = GradiumTTSSettings( + self._settings = GradiumTTSSettings( model=model, voice=voice_id, output_format="pcm", diff --git a/src/pipecat/services/grok/realtime/llm.py b/src/pipecat/services/grok/realtime/llm.py index 7cb619a7d..dfe039bbc 100644 --- a/src/pipecat/services/grok/realtime/llm.py +++ b/src/pipecat/services/grok/realtime/llm.py @@ -113,6 +113,8 @@ class GrokRealtimeLLMService(LLMService): - Server-side VAD (Voice Activity Detection) """ + _settings: GrokRealtimeLLMSettings + # Use the Grok-specific adapter adapter_class = GrokRealtimeLLMAdapter diff --git a/src/pipecat/services/groq/tts.py b/src/pipecat/services/groq/tts.py index 678a2426d..8e551725f 100644 --- a/src/pipecat/services/groq/tts.py +++ b/src/pipecat/services/groq/tts.py @@ -57,6 +57,8 @@ class GroqTTSService(TTSService): and output formats. """ + _settings: GroqTTSSettings + class InputParams(BaseModel): """Input parameters for Groq TTS configuration. @@ -109,7 +111,7 @@ class GroqTTSService(TTSService): self._voice_id = voice_id self._params = params - self._settings: GroqTTSSettings = GroqTTSSettings( + self._settings = GroqTTSSettings( model=model_name, voice=voice_id, language=str(params.language) if params.language else "en", diff --git a/src/pipecat/services/hathora/stt.py b/src/pipecat/services/hathora/stt.py index b0e3beead..8af53958d 100644 --- a/src/pipecat/services/hathora/stt.py +++ b/src/pipecat/services/hathora/stt.py @@ -48,6 +48,8 @@ class HathoraSTTService(SegmentedSTTService): [Documentation](https://models.hathora.dev) """ + _settings: HathoraSTTSettings + class InputParams(BaseModel): """Optional input parameters for Hathora STT configuration. @@ -98,7 +100,7 @@ class HathoraSTTService(SegmentedSTTService): params = params or HathoraSTTService.InputParams() - self._settings: HathoraSTTSettings = HathoraSTTSettings( + self._settings = HathoraSTTSettings( model=model, language=params.language, config=params.config, diff --git a/src/pipecat/services/hathora/tts.py b/src/pipecat/services/hathora/tts.py index b821b1e05..2b3a5ddb1 100644 --- a/src/pipecat/services/hathora/tts.py +++ b/src/pipecat/services/hathora/tts.py @@ -68,6 +68,8 @@ class HathoraTTSService(TTSService): [Documentation](https://models.hathora.dev) """ + _settings: HathoraTTSSettings + class InputParams(BaseModel): """Optional input parameters for Hathora TTS configuration. @@ -115,7 +117,7 @@ class HathoraTTSService(TTSService): params = params or HathoraTTSService.InputParams() - self._settings: HathoraTTSSettings = HathoraTTSSettings( + self._settings = HathoraTTSSettings( model=model, voice=voice_id, speed=params.speed, diff --git a/src/pipecat/services/inworld/tts.py b/src/pipecat/services/inworld/tts.py index 68c140187..7f4374b93 100644 --- a/src/pipecat/services/inworld/tts.py +++ b/src/pipecat/services/inworld/tts.py @@ -82,6 +82,8 @@ class InworldHttpTTSService(WordTTSService): Outputs LINEAR16 audio at configurable sample rates with word-level timestamps. """ + _settings: InworldTTSSettings + class InputParams(BaseModel): """Input parameters for Inworld TTS configuration. @@ -138,7 +140,7 @@ class InworldHttpTTSService(WordTTSService): else: self._base_url = "https://api.inworld.ai/tts/v1/voice" - self._settings: InworldTTSSettings = InworldTTSSettings( + self._settings = InworldTTSSettings( model=model, voice=voice_id, audio_encoding=encoding, @@ -438,6 +440,8 @@ class InworldTTSService(AudioContextWordTTSService): with word-level timestamps. """ + _settings: InworldTTSSettings + class InputParams(BaseModel): """Input parameters for Inworld WebSocket TTS configuration. @@ -503,7 +507,7 @@ class InworldTTSService(AudioContextWordTTSService): self._api_key = api_key self._url = url - self._settings: InworldTTSSettings = InworldTTSSettings( + self._settings = InworldTTSSettings( model=model, voice=voice_id, audio_encoding=encoding, diff --git a/src/pipecat/services/kokoro/tts.py b/src/pipecat/services/kokoro/tts.py index 242446de9..6d46441f6 100644 --- a/src/pipecat/services/kokoro/tts.py +++ b/src/pipecat/services/kokoro/tts.py @@ -107,6 +107,8 @@ class KokoroTTSService(TTSService): Automatically downloads model files on first use. """ + _settings: KokoroTTSSettings + class InputParams(BaseModel): """Input parameters for Kokoro TTS configuration. @@ -142,7 +144,7 @@ class KokoroTTSService(TTSService): self._voice_id = voice_id self._lang_code = language_to_kokoro_language(params.language) - self._settings: KokoroTTSSettings = KokoroTTSSettings( + self._settings = KokoroTTSSettings( voice=voice_id, language=language_to_kokoro_language(params.language), lang_code=language_to_kokoro_language(params.language), diff --git a/src/pipecat/services/llm_service.py b/src/pipecat/services/llm_service.py index 3a6e64def..693448cd5 100644 --- a/src/pipecat/services/llm_service.py +++ b/src/pipecat/services/llm_service.py @@ -174,6 +174,8 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): logger.info(f"Starting {len(function_calls)} function calls") """ + _settings: LLMSettings + # OpenAILLMAdapter is used as the default adapter since it aligns with most LLM implementations. # However, subclasses should override this with a more specific adapter when necessary. adapter_class: Type[BaseLLMAdapter] = OpenAILLMAdapter diff --git a/src/pipecat/services/lmnt/tts.py b/src/pipecat/services/lmnt/tts.py index 97569fa1d..9fa727c5f 100644 --- a/src/pipecat/services/lmnt/tts.py +++ b/src/pipecat/services/lmnt/tts.py @@ -92,6 +92,8 @@ class LmntTTSService(InterruptibleTTSService): language settings. """ + _settings: LmntTTSSettings + def __init__( self, *, @@ -122,7 +124,7 @@ class LmntTTSService(InterruptibleTTSService): self._api_key = api_key self._voice_id = voice_id self.set_model_name(model) - self._settings: LmntTTSSettings = LmntTTSSettings( + self._settings = LmntTTSSettings( model=model, voice=voice_id, language=self.language_to_service_language(language), diff --git a/src/pipecat/services/minimax/tts.py b/src/pipecat/services/minimax/tts.py index 6ce3e4b45..db236d6e4 100644 --- a/src/pipecat/services/minimax/tts.py +++ b/src/pipecat/services/minimax/tts.py @@ -132,6 +132,8 @@ class MiniMaxHttpTTSService(TTSService): https://www.minimax.io/platform/document/T2A%20V2?key=66719005a427f0c8a5701643 """ + _settings: MiniMaxTTSSettings + class InputParams(BaseModel): """Configuration parameters for MiniMax TTS. @@ -208,7 +210,7 @@ class MiniMaxHttpTTSService(TTSService): self._voice_id = voice_id # Create voice settings - self._settings: MiniMaxTTSSettings = MiniMaxTTSSettings( + self._settings = MiniMaxTTSSettings( model=model, voice=voice_id, stream=True, diff --git a/src/pipecat/services/neuphonic/tts.py b/src/pipecat/services/neuphonic/tts.py index b7019e6d6..de92c48a2 100644 --- a/src/pipecat/services/neuphonic/tts.py +++ b/src/pipecat/services/neuphonic/tts.py @@ -99,6 +99,8 @@ class NeuphonicTTSService(InterruptibleTTSService): parameters for high-quality speech generation. """ + _settings: NeuphonicTTSSettings + class InputParams(BaseModel): """Input parameters for Neuphonic TTS configuration. @@ -146,7 +148,7 @@ class NeuphonicTTSService(InterruptibleTTSService): self._api_key = api_key self._url = url - self._settings: NeuphonicTTSSettings = NeuphonicTTSSettings( + self._settings = NeuphonicTTSSettings( lang_code=self.language_to_service_language(params.language), speed=params.speed, encoding=encoding, diff --git a/src/pipecat/services/nvidia/stt.py b/src/pipecat/services/nvidia/stt.py index c65d6da62..3a36e062e 100644 --- a/src/pipecat/services/nvidia/stt.py +++ b/src/pipecat/services/nvidia/stt.py @@ -125,6 +125,8 @@ class NvidiaSTTService(STTService): processing for low-latency applications. """ + _settings: NvidiaSTTSettings + class InputParams(BaseModel): """Configuration parameters for NVIDIA Riva STT service. @@ -178,7 +180,7 @@ class NvidiaSTTService(STTService): self._custom_configuration = "" self._function_id = model_function_map.get("function_id") - self._settings: NvidiaSTTSettings = NvidiaSTTSettings( + self._settings = NvidiaSTTSettings( language=params.language, ) @@ -399,6 +401,8 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): audio buffering and speech detection. """ + _settings: NvidiaSegmentedSTTSettings + class InputParams(BaseModel): """Configuration parameters for NVIDIA Riva segmented STT service. @@ -470,7 +474,7 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): self._config = None self._asr_service = None - self._settings: NvidiaSegmentedSTTSettings = NvidiaSegmentedSTTSettings( + self._settings = NvidiaSegmentedSTTSettings( language=params.language or Language.EN_US, profanity_filter=params.profanity_filter, automatic_punctuation=params.automatic_punctuation, diff --git a/src/pipecat/services/openai/base_llm.py b/src/pipecat/services/openai/base_llm.py index 2ac53794c..454087508 100644 --- a/src/pipecat/services/openai/base_llm.py +++ b/src/pipecat/services/openai/base_llm.py @@ -70,6 +70,8 @@ class BaseOpenAILLMService(LLMService): configurations. """ + _settings: OpenAILLMSettings + class InputParams(BaseModel): """Input parameters for OpenAI model configuration. diff --git a/src/pipecat/services/openai/realtime/llm.py b/src/pipecat/services/openai/realtime/llm.py index abd66963b..825639950 100644 --- a/src/pipecat/services/openai/realtime/llm.py +++ b/src/pipecat/services/openai/realtime/llm.py @@ -110,6 +110,8 @@ class OpenAIRealtimeLLMService(LLMService): management, and real-time transcription. """ + _settings: OpenAIRealtimeLLMSettings + # Overriding the default adapter to use the OpenAIRealtimeLLMAdapter one. adapter_class = OpenAIRealtimeLLMAdapter diff --git a/src/pipecat/services/openai/stt.py b/src/pipecat/services/openai/stt.py index 12eada24e..266b2964b 100644 --- a/src/pipecat/services/openai/stt.py +++ b/src/pipecat/services/openai/stt.py @@ -169,6 +169,8 @@ class OpenAIRealtimeSTTService(WebsocketSTTService): ) """ + _settings: OpenAIRealtimeSTTSettings + def __init__( self, *, @@ -231,7 +233,7 @@ class OpenAIRealtimeSTTService(WebsocketSTTService): self._noise_reduction = noise_reduction self._should_interrupt = should_interrupt - self._settings: OpenAIRealtimeSTTSettings = OpenAIRealtimeSTTSettings( + self._settings = OpenAIRealtimeSTTSettings( model=model, language=language, prompt=prompt, diff --git a/src/pipecat/services/openai/tts.py b/src/pipecat/services/openai/tts.py index ee1e34316..3572d287e 100644 --- a/src/pipecat/services/openai/tts.py +++ b/src/pipecat/services/openai/tts.py @@ -83,6 +83,8 @@ class OpenAITTSService(TTSService): speech synthesis with streaming audio output. """ + _settings: OpenAITTSSettings + OPENAI_SAMPLE_RATE = 24000 # OpenAI TTS always outputs at 24kHz class InputParams(BaseModel): @@ -147,7 +149,7 @@ class OpenAITTSService(TTSService): stacklevel=2, ) - self._settings: OpenAITTSSettings = OpenAITTSSettings( + self._settings = OpenAITTSSettings( model=model, voice=voice, instructions=params.instructions if params else instructions, diff --git a/src/pipecat/services/openai_realtime_beta/openai.py b/src/pipecat/services/openai_realtime_beta/openai.py index d37b1434e..b7703ad98 100644 --- a/src/pipecat/services/openai_realtime_beta/openai.py +++ b/src/pipecat/services/openai_realtime_beta/openai.py @@ -115,6 +115,8 @@ class OpenAIRealtimeBetaLLMService(LLMService): management, and real-time transcription. """ + _settings: OpenAIRealtimeBetaLLMSettings + # Overriding the default adapter to use the OpenAIRealtimeLLMAdapter one. adapter_class = OpenAIRealtimeLLMAdapter diff --git a/src/pipecat/services/playht/tts.py b/src/pipecat/services/playht/tts.py index f79f54560..7d61ffd87 100644 --- a/src/pipecat/services/playht/tts.py +++ b/src/pipecat/services/playht/tts.py @@ -131,6 +131,8 @@ class PlayHTTTSService(InterruptibleTTSService): language settings. """ + _settings: PlayHTTTSSettings + class InputParams(BaseModel): """Input parameters for PlayHT TTS configuration. @@ -191,7 +193,7 @@ class PlayHTTTSService(InterruptibleTTSService): self._receive_task = None self._context_id = None - self._settings: PlayHTTTSSettings = PlayHTTTSSettings( + self._settings = PlayHTTTSSettings( model=voice_engine, voice=voice_url, language=self.language_to_service_language(params.language) @@ -444,6 +446,8 @@ class PlayHTHttpTTSService(TTSService): required and simpler integration is preferred. """ + _settings: PlayHTTTSSettings + class InputParams(BaseModel): """Input parameters for PlayHT HTTP TTS configuration. @@ -522,7 +526,7 @@ class PlayHTHttpTTSService(TTSService): # Extract the base engine name voice_engine = voice_engine.replace("-ws", "") - self._settings: PlayHTTTSSettings = PlayHTTTSSettings( + self._settings = PlayHTTTSSettings( voice=voice_url, language=self.language_to_service_language(params.language) if params.language diff --git a/src/pipecat/services/resembleai/tts.py b/src/pipecat/services/resembleai/tts.py index 08f9b81bd..cd3da1767 100644 --- a/src/pipecat/services/resembleai/tts.py +++ b/src/pipecat/services/resembleai/tts.py @@ -63,6 +63,8 @@ class ResembleAITTSService(AudioContextWordTTSService): multiple simultaneous synthesis requests with proper interruption support. """ + _settings: ResembleAITTSSettings + def __init__( self, *, @@ -93,7 +95,7 @@ class ResembleAITTSService(AudioContextWordTTSService): self._api_key = api_key self._voice_id = voice_id self._url = url - self._settings: ResembleAITTSSettings = ResembleAITTSSettings( + self._settings = ResembleAITTSSettings( voice=voice_id, precision=precision, output_format=output_format, diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index 5a3ed67a2..4af9fe63b 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -134,6 +134,8 @@ class RimeTTSService(AudioContextWordTTSService): within a turn. """ + _settings: RimeTTSSettings + class InputParams(BaseModel): """Configuration parameters for Rime TTS service. @@ -207,7 +209,7 @@ class RimeTTSService(AudioContextWordTTSService): self._url = url self._voice_id = voice_id self._model = model - self._settings: RimeTTSSettings = RimeTTSSettings( + self._settings = RimeTTSSettings( speaker=voice_id, modelId=model, audioFormat="pcm", @@ -537,6 +539,8 @@ class RimeHttpTTSService(TTSService): Suitable for use cases where streaming is not required. """ + _settings: RimeTTSSettings + class InputParams(BaseModel): """Configuration parameters for Rime HTTP TTS service. @@ -585,7 +589,7 @@ class RimeHttpTTSService(TTSService): self._api_key = api_key self._session = aiohttp_session self._base_url = "https://users.rime.ai/v1/rime-tts" - self._settings: RimeTTSSettings = RimeTTSSettings( + self._settings = RimeTTSSettings( lang=self.language_to_service_language(params.language) if params.language else "eng", speedAlpha=params.speed_alpha, reduceLatency=params.reduce_latency, @@ -706,6 +710,8 @@ class RimeNonJsonTTSService(InterruptibleTTSService): future. This service focuses on the current plain text protocol. """ + _settings: RimeNonJsonTTSSettings + class InputParams(BaseModel): """Configuration parameters for Rime Non-JSON WebSocket TTS service. @@ -763,7 +769,7 @@ class RimeNonJsonTTSService(InterruptibleTTSService): self._url = url self._voice_id = voice_id self._model = model - self._settings: RimeNonJsonTTSSettings = RimeNonJsonTTSSettings( + self._settings = RimeNonJsonTTSSettings( speaker=voice_id, modelId=model, audioFormat=audio_format, diff --git a/src/pipecat/services/sarvam/stt.py b/src/pipecat/services/sarvam/stt.py index e2bc6a08f..834171b32 100644 --- a/src/pipecat/services/sarvam/stt.py +++ b/src/pipecat/services/sarvam/stt.py @@ -154,6 +154,8 @@ class SarvamSTTService(STTService): Provides real-time speech recognition using Sarvam's WebSocket API. """ + _settings: SarvamSTTSettings + class InputParams(BaseModel): """Configuration parameters for Sarvam STT service. @@ -247,7 +249,7 @@ class SarvamSTTService(STTService): # Resolve mode default from model config mode = params.mode if params.mode is not None else self._config.default_mode - self._settings: SarvamSTTSettings = SarvamSTTSettings( + self._settings = SarvamSTTSettings( model=model, language=params.language, prompt=params.prompt if params.prompt is not None else NOT_GIVEN, diff --git a/src/pipecat/services/sarvam/tts.py b/src/pipecat/services/sarvam/tts.py index e28914b4c..ec51a85d5 100644 --- a/src/pipecat/services/sarvam/tts.py +++ b/src/pipecat/services/sarvam/tts.py @@ -371,6 +371,8 @@ class SarvamHttpTTSService(TTSService): ) """ + _settings: SarvamHttpTTSSettings + class InputParams(BaseModel): """Input parameters for Sarvam TTS configuration. @@ -478,7 +480,7 @@ class SarvamHttpTTSService(TTSService): pace = max(pace_min, min(pace_max, pace)) # Build base settings - self._settings: SarvamHttpTTSSettings = SarvamHttpTTSSettings( + self._settings = SarvamHttpTTSSettings( language=( self.language_to_service_language(params.language) if params.language else "en-IN" ), @@ -684,6 +686,8 @@ class SarvamTTSService(InterruptibleTTSService): See https://docs.sarvam.ai/api-reference-docs/text-to-speech/stream for API details. """ + _settings: SarvamWSTTSSettings + class InputParams(BaseModel): """Configuration parameters for Sarvam TTS WebSocket service. @@ -837,7 +841,7 @@ class SarvamTTSService(InterruptibleTTSService): pace = max(pace_min, min(pace_max, pace)) # Build base settings - self._settings: SarvamWSTTSSettings = SarvamWSTTSSettings( + self._settings = SarvamWSTTSSettings( target_language_code=( self.language_to_service_language(params.language) if params.language else "en-IN" ), diff --git a/src/pipecat/services/soniox/stt.py b/src/pipecat/services/soniox/stt.py index 9d732a356..c3d9638c1 100644 --- a/src/pipecat/services/soniox/stt.py +++ b/src/pipecat/services/soniox/stt.py @@ -157,6 +157,8 @@ class SonioxSTTService(WebsocketSTTService): For complete API documentation, see: https://soniox.com/docs/speech-to-text/api-reference/websocket-api """ + _settings: SonioxSTTSettings + def __init__( self, *, diff --git a/src/pipecat/services/speechmatics/stt.py b/src/pipecat/services/speechmatics/stt.py index d04bb564d..462c2fc6f 100644 --- a/src/pipecat/services/speechmatics/stt.py +++ b/src/pipecat/services/speechmatics/stt.py @@ -166,6 +166,8 @@ class SpeechmaticsSTTService(STTService): and speaker diarization. """ + _settings: SpeechmaticsSTTSettings + # Export related classes as class attributes TurnDetectionMode = TurnDetectionMode AudioEncoding = AudioEncoding diff --git a/src/pipecat/services/stt_service.py b/src/pipecat/services/stt_service.py index d4e5f4cb5..22cd4f03d 100644 --- a/src/pipecat/services/stt_service.py +++ b/src/pipecat/services/stt_service.py @@ -70,6 +70,8 @@ class STTService(AIService): logger.error(f"STT connection error: {error}") """ + _settings: STTSettings + def __init__( self, *, diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index 4196e7872..49bb29970 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -104,6 +104,8 @@ class TTSService(AIService): logger.debug(f"TTS request: {context_id} - {text}") """ + _settings: TTSSettings + def __init__( self, *, diff --git a/src/pipecat/services/ultravox/llm.py b/src/pipecat/services/ultravox/llm.py index 9f0658486..6f5e5d2ee 100644 --- a/src/pipecat/services/ultravox/llm.py +++ b/src/pipecat/services/ultravox/llm.py @@ -158,6 +158,8 @@ class UltravoxRealtimeLLMService(LLMService): by the model and may not always align with its understanding of user input. """ + _settings: UltravoxRealtimeLLMSettings + def __init__( self, *, diff --git a/src/pipecat/services/whisper/base_stt.py b/src/pipecat/services/whisper/base_stt.py index 2a02c6ce7..6c35824a4 100644 --- a/src/pipecat/services/whisper/base_stt.py +++ b/src/pipecat/services/whisper/base_stt.py @@ -124,6 +124,8 @@ class BaseWhisperSTTService(SegmentedSTTService): including metrics generation and error handling. """ + _settings: BaseWhisperSTTSettings + def __init__( self, *, @@ -161,7 +163,7 @@ class BaseWhisperSTTService(SegmentedSTTService): self._temperature = temperature self._include_prob_metrics = include_prob_metrics - self._settings: BaseWhisperSTTSettings = BaseWhisperSTTSettings( + self._settings = BaseWhisperSTTSettings( model=model, language=self._language, base_url=base_url, diff --git a/src/pipecat/services/whisper/stt.py b/src/pipecat/services/whisper/stt.py index 30451e6d0..d5f4c3f1b 100644 --- a/src/pipecat/services/whisper/stt.py +++ b/src/pipecat/services/whisper/stt.py @@ -211,6 +211,8 @@ class WhisperSTTService(SegmentedSTTService): segments. It supports multiple languages and various model sizes. """ + _settings: WhisperSTTSettings + def __init__( self, *, @@ -238,7 +240,7 @@ class WhisperSTTService(SegmentedSTTService): self._no_speech_prob = no_speech_prob self._model: Optional[WhisperModel] = None - self._settings: WhisperSTTSettings = WhisperSTTSettings( + self._settings = WhisperSTTSettings( model=model if isinstance(model, str) else model.value, language=language, device=self._device, @@ -346,6 +348,8 @@ class WhisperSTTServiceMLX(WhisperSTTService): segments. It's optimized for Apple Silicon and supports multiple languages and quantizations. """ + _settings: WhisperMLXSTTSettings + def __init__( self, *, @@ -371,7 +375,7 @@ class WhisperSTTServiceMLX(WhisperSTTService): self._no_speech_prob = no_speech_prob self._temperature = temperature - self._settings: WhisperMLXSTTSettings = WhisperMLXSTTSettings( + self._settings = WhisperMLXSTTSettings( model=model if isinstance(model, str) else model.value, language=language, no_speech_prob=self._no_speech_prob, diff --git a/src/pipecat/services/xtts/tts.py b/src/pipecat/services/xtts/tts.py index 664d3d4be..4415f9f53 100644 --- a/src/pipecat/services/xtts/tts.py +++ b/src/pipecat/services/xtts/tts.py @@ -89,6 +89,8 @@ class XTTSService(TTSService): studio speakers configuration. """ + _settings: XTTSTTSSettings + def __init__( self, *, @@ -111,7 +113,7 @@ class XTTSService(TTSService): """ super().__init__(sample_rate=sample_rate, **kwargs) - self._settings: XTTSTTSSettings = XTTSTTSSettings( + self._settings = XTTSTTSSettings( voice=voice_id, language=self.language_to_service_language(language), base_url=base_url, From e37f2f99c473350de24f2ec3e52f98132fbc2c19 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Fri, 13 Feb 2026 11:39:34 -0500 Subject: [PATCH 004/189] Deprecate `set_model`, `set_voice`, and `set_language` in favor of `*UpdateSettingsFrame`. --- src/pipecat/services/stt_service.py | 25 +++++++++++++++++++------ src/pipecat/services/tts_service.py | 29 +++++++++++++++++++---------- 2 files changed, 38 insertions(+), 16 deletions(-) diff --git a/src/pipecat/services/stt_service.py b/src/pipecat/services/stt_service.py index 22cd4f03d..3ce143d92 100644 --- a/src/pipecat/services/stt_service.py +++ b/src/pipecat/services/stt_service.py @@ -9,6 +9,7 @@ import asyncio import io import time +import warnings import wave from abc import abstractmethod from typing import Any, AsyncGenerator, Dict, Mapping, Optional @@ -168,13 +169,19 @@ class STTService(AIService): async def set_model(self, model: str): """Set the speech recognition model. - When the service has been migrated to typed settings this routes - through :meth:`_update_settings_from_typed` so that concrete - services can react (e.g. reconnect) in a single place. + .. deprecated:: 0.0.103 + Use ``STTUpdateSettingsFrame(model=...)`` instead. Args: model: The name of the model to use for speech recognition. """ + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "'set_model' is deprecated, use 'STTUpdateSettingsFrame(model=...)' instead.", + DeprecationWarning, + stacklevel=2, + ) logger.info(f"Switching STT model to: [{model}]") if isinstance(self._settings, ServiceSettings): settings_cls = type(self._settings) @@ -185,13 +192,19 @@ class STTService(AIService): async def set_language(self, language: Language): """Set the language for speech recognition. - When the service has been migrated to typed settings this routes - through :meth:`_update_settings_from_typed` so that concrete - services can react (e.g. reconnect) in a single place. + .. deprecated:: 0.0.103 + Use ``STTUpdateSettingsFrame(language=...)`` instead. Args: language: The language to use for speech recognition. """ + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "'set_language' is deprecated, use 'STTUpdateSettingsFrame(language=...)' instead.", + DeprecationWarning, + stacklevel=2, + ) logger.info(f"Switching STT language to: [{language}]") if isinstance(self._settings, ServiceSettings): settings_cls = type(self._settings) diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index 49bb29970..b16ebdb24 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -8,6 +8,7 @@ import asyncio import uuid +import warnings from abc import abstractmethod from dataclasses import dataclass from typing import ( @@ -265,13 +266,19 @@ class TTSService(AIService): async def set_model(self, model: str): """Set the TTS model to use. - When the service has been migrated to typed settings this routes - through :meth:`_update_settings_from_typed` so that concrete - services can react (e.g. reconnect) in a single place. + .. deprecated:: 0.0.103 + Use ``TTSUpdateSettingsFrame(model=...)`` instead. Args: model: The name of the TTS model. """ + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "'set_model' is deprecated, use 'TTSUpdateSettingsFrame(model=...)' instead.", + DeprecationWarning, + stacklevel=2, + ) logger.info(f"Switching TTS model to: [{model}]") if isinstance(self._settings, ServiceSettings): settings_cls = type(self._settings) @@ -282,17 +289,19 @@ class TTSService(AIService): async def set_voice(self, voice: str): """Set the voice for speech synthesis. - When the service has been migrated to typed settings this routes - through :meth:`_update_settings_from_typed` so that concrete - services can react (e.g. reconnect) in a single place. - - .. versionchanged:: 0.0.103 - Now ``async``. In ``__init__`` methods, set - ``self._voice_id`` directly instead of calling this method. + .. deprecated:: 0.0.103 + Use ``TTSUpdateSettingsFrame(voice=...)`` instead. Args: voice: The voice identifier or name. """ + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "'set_voice' is deprecated, use 'TTSUpdateSettingsFrame(voice=...)' instead.", + DeprecationWarning, + stacklevel=2, + ) logger.info(f"Switching TTS voice to: [{voice}]") if isinstance(self._settings, ServiceSettings): settings_cls = type(self._settings) From ab92a0e1d7ca7b1a8b1dca558218a1054a71489c Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Fri, 13 Feb 2026 12:09:31 -0500 Subject: [PATCH 005/189] Remove/deprecate service-specific `set_model` and `set_voice` overrides. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - NvidiaSTTService.set_model: convert to proper DeprecationWarning (model can't change at runtime for Riva streaming STT) - NvidiaTTSService.set_model: same treatment for Riva TTS - NvidiaSegmentedSTTService.set_model: remove — base class now routes through _update_settings_from_typed which re-creates the recognition config - GeminiTTSService.set_voice: remove — move AVAILABLE_VOICES validation into _update_settings_from_typed so it fires on both legacy and new paths --- src/pipecat/services/google/tts.py | 20 +++++-------- src/pipecat/services/nvidia/stt.py | 47 ++++++++++++++---------------- src/pipecat/services/nvidia/tts.py | 30 ++++++++++++++----- 3 files changed, 52 insertions(+), 45 deletions(-) diff --git a/src/pipecat/services/google/tts.py b/src/pipecat/services/google/tts.py index 46ec96f3e..2b9ada224 100644 --- a/src/pipecat/services/google/tts.py +++ b/src/pipecat/services/google/tts.py @@ -1246,16 +1246,6 @@ class GeminiTTSService(GoogleBaseTTSService): """ return language_to_gemini_tts_language(language) - async def set_voice(self, voice_id: str): - """Set the voice for TTS generation. - - Args: - voice_id: Name of the voice to use from AVAILABLE_VOICES. - """ - if voice_id not in self.AVAILABLE_VOICES: - logger.warning(f"Voice '{voice_id}' not in known voices list. Using anyway.") - self._voice_id = voice_id - async def start(self, frame: StartFrame): """Start the Gemini TTS service. @@ -1270,11 +1260,17 @@ class GeminiTTSService(GoogleBaseTTSService): ) async def _update_settings_from_typed(self, update: TTSSettings) -> set[str]: - """Override to handle prompt updates. + """Apply a typed settings update with voice validation. Args: - update: Typed settings delta. Can include 'prompt' (str). + update: Typed settings delta. Can include 'voice', 'prompt', etc. + + Returns: + Set of field names whose values actually changed. """ + if is_given(update.voice) and update.voice not in self.AVAILABLE_VOICES: + logger.warning(f"Voice '{update.voice}' not in known voices list. Using anyway.") + return await super()._update_settings_from_typed(update) @traced_tts diff --git a/src/pipecat/services/nvidia/stt.py b/src/pipecat/services/nvidia/stt.py index 3a36e062e..6190e169d 100644 --- a/src/pipecat/services/nvidia/stt.py +++ b/src/pipecat/services/nvidia/stt.py @@ -241,18 +241,31 @@ class NvidiaSTTService(STTService): async def set_model(self, model: str): """Set the ASR model for transcription. + .. deprecated:: 0.0.103 + Model cannot be changed after initialization for NVIDIA Riva streaming STT. + Set model and function id in the constructor instead, e.g.:: + + NvidiaSTTService( + api_key=..., + model_function_map={"function_id": "", "model_name": ""}, + ) + Args: model: Model name to set. - - Note: - Model cannot be changed after initialization. Use model_function_map - parameter in constructor instead. """ - logger.warning(f"Cannot set model after initialization. Set model and function id like so:") - example = {"function_id": "", "model_name": ""} - logger.warning( - f"{self.__class__.__name__}(api_key=, model_function_map={example})" - ) + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "'set_model' is deprecated. Model cannot be changed after initialization" + " for NVIDIA Riva streaming STT. Set model and function id in the" + " constructor instead, e.g.:" + " NvidiaSTTService(api_key=..., model_function_map=" + "{'function_id': '', 'model_name': ''})", + DeprecationWarning, + stacklevel=2, + ) async def start(self, frame: StartFrame): """Start the NVIDIA Riva STT service and initialize streaming configuration. @@ -555,22 +568,6 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): """ return True - async def set_model(self, model: str): - """Set the ASR model for transcription. - - Args: - model: Model name to set. - - Note: - Model cannot be changed after initialization. Use model_function_map - parameter in constructor instead. - """ - logger.warning(f"Cannot set model after initialization. Set model and function id like so:") - example = {"function_id": "", "model_name": ""} - logger.warning( - f"{self.__class__.__name__}(api_key=, model_function_map={example})" - ) - async def start(self, frame: StartFrame): """Initialize the service when the pipeline starts. diff --git a/src/pipecat/services/nvidia/tts.py b/src/pipecat/services/nvidia/tts.py index 8a018d6aa..27ace15fb 100644 --- a/src/pipecat/services/nvidia/tts.py +++ b/src/pipecat/services/nvidia/tts.py @@ -106,18 +106,32 @@ class NvidiaTTSService(TTSService): self._config = None async def set_model(self, model: str): - """Attempt to set the TTS model. + """Set the TTS model. - Note: Model cannot be changed after initialization for Riva service. + .. deprecated:: 0.0.103 + Model cannot be changed after initialization for NVIDIA Riva TTS. + Set model and function id in the constructor instead, e.g.:: + + NvidiaTTSService( + api_key=..., + model_function_map={"function_id": "", "model_name": ""}, + ) Args: - model: The model name to set (operation not supported). + model: The model name to set. """ - logger.warning(f"Cannot set model after initialization. Set model and function id like so:") - example = {"function_id": "", "model_name": ""} - logger.warning( - f"{self.__class__.__name__}(api_key=, model_function_map={example})" - ) + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "'set_model' is deprecated. Model cannot be changed after initialization" + " for NVIDIA Riva TTS. Set model and function id in the constructor" + " instead, e.g.: NvidiaTTSService(api_key=..., model_function_map=" + "{'function_id': '', 'model_name': ''})", + DeprecationWarning, + stacklevel=2, + ) def _initialize_client(self): if self._service is not None: From b08548af9dc65227df7c49b26c46b43509e5bb13 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Fri, 13 Feb 2026 14:30:49 -0500 Subject: [PATCH 006/189] Remove typed-settings migration scaffolding and rename `_update_settings_from_typed` to `_update_settings`. Now that all services use typed `ServiceSettings` objects, this removes the interim scaffolding that supported both dict-based and typed settings paths in parallel. Specifically: removes old dict-based `_update_settings(settings: Mapping)` methods from base classes, removes `isinstance(self._settings, ServiceSettings)` guards, simplifies `process_frame` branching, and renames `_update_settings_from_typed` to `_update_settings` across all ~30 service implementations. Also renames the no-arg `_update_settings()` helper on realtime services to `_send_session_update()` to avoid collision, adds `from_mapping` overrides on `GoogleLLMSettings` and `AnthropicLLMSettings` for ThinkingConfig dict-to-object conversion, and replaces a broken no-arg `_update_settings()` call in Gemini Live with a TODO. --- src/pipecat/frames/frames.py | 8 +-- src/pipecat/services/ai_service.py | 69 +++---------------- src/pipecat/services/anthropic/llm.py | 16 ++++- src/pipecat/services/assemblyai/stt.py | 8 +-- src/pipecat/services/asyncai/tts.py | 2 +- src/pipecat/services/aws/llm.py | 2 +- src/pipecat/services/aws/stt.py | 8 +-- src/pipecat/services/aws/tts.py | 2 +- src/pipecat/services/azure/stt.py | 8 +-- src/pipecat/services/azure/tts.py | 2 +- src/pipecat/services/camb/tts.py | 2 +- src/pipecat/services/cartesia/stt.py | 8 +-- src/pipecat/services/cartesia/tts.py | 2 +- src/pipecat/services/deepgram/stt.py | 8 +-- .../services/deepgram/stt_sagemaker.py | 8 +-- src/pipecat/services/deepgram/tts.py | 2 +- src/pipecat/services/elevenlabs/stt.py | 16 ++--- src/pipecat/services/elevenlabs/tts.py | 18 ++--- src/pipecat/services/fal/stt.py | 8 +-- src/pipecat/services/fish/tts.py | 8 +-- src/pipecat/services/gladia/stt.py | 10 +-- .../services/google/gemini_live/llm.py | 5 +- src/pipecat/services/google/llm.py | 24 ++++--- src/pipecat/services/google/stt.py | 16 ++--- src/pipecat/services/google/tts.py | 26 +++---- src/pipecat/services/gradium/stt.py | 8 +-- src/pipecat/services/gradium/tts.py | 8 +-- src/pipecat/services/grok/realtime/llm.py | 28 ++++---- src/pipecat/services/groq/tts.py | 2 +- src/pipecat/services/hathora/stt.py | 2 +- src/pipecat/services/hathora/tts.py | 2 +- src/pipecat/services/inworld/tts.py | 2 +- src/pipecat/services/kokoro/tts.py | 2 +- src/pipecat/services/llm_service.py | 51 +++----------- src/pipecat/services/lmnt/tts.py | 2 +- src/pipecat/services/minimax/tts.py | 2 +- src/pipecat/services/neuphonic/tts.py | 8 +-- src/pipecat/services/nvidia/stt.py | 10 +-- src/pipecat/services/openai/base_llm.py | 2 +- src/pipecat/services/openai/realtime/llm.py | 28 ++++---- src/pipecat/services/openai/stt.py | 10 +-- src/pipecat/services/openai/tts.py | 2 +- .../services/openai_realtime_beta/openai.py | 28 ++++---- src/pipecat/services/playht/tts.py | 2 +- src/pipecat/services/resembleai/tts.py | 2 +- src/pipecat/services/rime/tts.py | 16 ++--- src/pipecat/services/sarvam/stt.py | 8 +-- src/pipecat/services/sarvam/tts.py | 10 +-- src/pipecat/services/settings.py | 26 +++---- src/pipecat/services/soniox/stt.py | 10 +-- src/pipecat/services/speechmatics/stt.py | 12 ++-- src/pipecat/services/stt_service.py | 53 ++++---------- src/pipecat/services/tts_service.py | 56 ++++----------- src/pipecat/services/ultravox/llm.py | 4 +- src/pipecat/services/whisper/base_stt.py | 10 +-- src/pipecat/services/whisper/stt.py | 4 +- src/pipecat/services/xtts/tts.py | 2 +- 57 files changed, 291 insertions(+), 407 deletions(-) diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index dd12929b9..be3b4e4a7 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -2113,13 +2113,13 @@ class TTSStoppedFrame(ControlFrame): class ServiceUpdateSettingsFrame(ControlFrame): """Base frame for updating service settings. - Supports both the legacy ``settings`` dict and the new typed ``update`` - object. When both are provided, ``update`` takes precedence. + Supports both a ``settings`` dict (for backward compatibility) and an + ``update`` object. When both are provided, ``update`` takes precedence. Parameters: settings: Dictionary of setting name to value mappings (legacy). - update: Typed :class:`~pipecat.services.settings.ServiceSettings` - object describing the delta to apply. + update: :class:`~pipecat.services.settings.ServiceSettings` object + describing the delta to apply. """ settings: Mapping[str, Any] = field(default_factory=dict) diff --git a/src/pipecat/services/ai_service.py b/src/pipecat/services/ai_service.py index 97b7b6443..2c6c10be4 100644 --- a/src/pipecat/services/ai_service.py +++ b/src/pipecat/services/ai_service.py @@ -10,7 +10,7 @@ Provides the foundation for all AI services in the Pipecat framework, including model management, settings handling, and frame processing lifecycle methods. """ -from typing import Any, AsyncGenerator, Dict, Mapping, Set +from typing import Any, AsyncGenerator, Dict, Set from loguru import logger @@ -43,7 +43,7 @@ class AIService(FrameProcessor): """ super().__init__(**kwargs) self._model_name: str = "" - self._settings: Dict[str, Any] | ServiceSettings = {} + self._settings: ServiceSettings = ServiceSettings() self._session_properties: Dict[str, Any] = {} @property @@ -97,71 +97,22 @@ class AIService(FrameProcessor): """ pass - async def _update_settings(self, settings: Mapping[str, Any]): - from pipecat.services.openai.realtime.events import SessionProperties + async def _update_settings(self, update: ServiceSettings) -> Set[str]: + """Apply a settings update and return the set of changed field names. - for key, value in settings.items(): - logger.debug("Update request for:", key, value) + The update is applied to ``_settings`` and the changed-field set is + returned. The ``model`` field is handled specially: when it changes, + ``set_model_name`` is called. - if key in self._settings: - logger.info(f"Updating LLM setting {key} to: [{value}]") - self._settings[key] = value - elif key in SessionProperties.model_fields: - logger.debug("Attempting to update", key, value) - - try: - from pipecat.services.openai.realtime.events import TurnDetection - - if isinstance(self._session_properties, SessionProperties): - current_properties = self._session_properties - else: - current_properties = SessionProperties(**self._session_properties) - - if key == "turn_detection" and isinstance(value, dict): - turn_detection = TurnDetection(**value) - setattr(current_properties, key, turn_detection) - else: - setattr(current_properties, key, value) - - validated_properties = SessionProperties.model_validate( - current_properties.model_dump() - ) - logger.info(f"Updating LLM setting {key} to: [{value}]") - self._session_properties = validated_properties.model_dump() - except Exception as e: - logger.warning(f"Unexpected error updating session property {key}: {e}") - elif key == "model": - logger.info(f"Updating LLM setting {key} to: [{value}]") - self.set_model_name(value) - else: - logger.warning(f"Unknown setting for {self.name} service: {key}") - - async def _update_settings_from_typed(self, update: ServiceSettings) -> Set[str]: - """Apply a typed settings update and return the set of changed field names. - - If ``_settings`` is a :class:`ServiceSettings` object, the update is - applied to it and the changed-field set is returned. The ``model`` - field is handled specially: when it changes, ``set_model_name`` is - called. - - Services that have been migrated to typed settings should override - this method (calling ``super()``) to react to specific changed fields - (e.g. reconnect on voice change). + Concrete services should override this method (calling ``super()``) + to react to specific changed fields (e.g. reconnect on voice change). Args: - update: A typed settings delta. + update: A settings delta. Returns: Set of field names whose values actually changed. """ - if not isinstance(self._settings, ServiceSettings): - logger.warning( - f"{self.name}: received typed settings update but _settings " - f"is not a ServiceSettings — falling back to dict-based update" - ) - await self._update_settings(update.to_dict()) - return set() - changed = self._settings.apply_update(update) if "model" in changed: diff --git a/src/pipecat/services/anthropic/llm.py b/src/pipecat/services/anthropic/llm.py index 25611d0d1..159b666d1 100644 --- a/src/pipecat/services/anthropic/llm.py +++ b/src/pipecat/services/anthropic/llm.py @@ -59,7 +59,7 @@ from pipecat.processors.aggregators.openai_llm_context import ( from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import FunctionCallFromLLM, LLMService from pipecat.services.settings import NOT_GIVEN as _NOT_GIVEN -from pipecat.services.settings import LLMSettings +from pipecat.services.settings import LLMSettings, is_given from pipecat.utils.tracing.service_decorators import traced_llm try: @@ -72,7 +72,7 @@ except ModuleNotFoundError as e: @dataclass class AnthropicLLMSettings(LLMSettings): - """Typed settings for Anthropic LLM services. + """Settings for Anthropic LLM services. Parameters: enable_prompt_caching: Whether to enable prompt caching. @@ -82,6 +82,18 @@ class AnthropicLLMSettings(LLMSettings): enable_prompt_caching: Any = field(default_factory=lambda: _NOT_GIVEN) thinking: Any = field(default_factory=lambda: _NOT_GIVEN) + @classmethod + def from_mapping(cls, settings): + """Convert a plain dict to settings, coercing thinking dicts. + + For backward compatibility, a ``thinking`` value that is a plain dict + is converted to a :class:`AnthropicLLMService.ThinkingConfig`. + """ + instance = super().from_mapping(settings) + if is_given(instance.thinking) and isinstance(instance.thinking, dict): + instance.thinking = AnthropicLLMService.ThinkingConfig(**instance.thinking) + return instance + @dataclass class AnthropicContextAggregatorPair: diff --git a/src/pipecat/services/assemblyai/stt.py b/src/pipecat/services/assemblyai/stt.py index 910d1e005..2e7b1230b 100644 --- a/src/pipecat/services/assemblyai/stt.py +++ b/src/pipecat/services/assemblyai/stt.py @@ -56,7 +56,7 @@ except ModuleNotFoundError as e: @dataclass class AssemblyAISTTSettings(STTSettings): - """Typed settings for the AssemblyAI STT service. + """Settings for the AssemblyAI STT service. See :class:`AssemblyAIConnectionParams` for detailed parameter descriptions. @@ -184,8 +184,8 @@ class AssemblyAISTTService(WebsocketSTTService): """ return True - async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: - """Apply a typed settings update and reconnect if anything changed. + async def _update_settings(self, update: STTSettings) -> set[str]: + """Apply a settings update and reconnect if anything changed. Any change triggers a WebSocket reconnect since all connection parameters are encoded in the WebSocket URL. @@ -196,7 +196,7 @@ class AssemblyAISTTService(WebsocketSTTService): Returns: Set of field names whose values actually changed. """ - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) if not changed: return changed diff --git a/src/pipecat/services/asyncai/tts.py b/src/pipecat/services/asyncai/tts.py index 2b0740956..05ba14113 100644 --- a/src/pipecat/services/asyncai/tts.py +++ b/src/pipecat/services/asyncai/tts.py @@ -76,7 +76,7 @@ def language_to_async_language(language: Language) -> Optional[str]: @dataclass class AsyncAITTSSettings(TTSSettings): - """Typed settings for Async AI TTS services. + """Settings for Async AI TTS services. Parameters: output_container: Audio container format (e.g. "raw"). diff --git a/src/pipecat/services/aws/llm.py b/src/pipecat/services/aws/llm.py index 50de0de2c..3fca8e374 100644 --- a/src/pipecat/services/aws/llm.py +++ b/src/pipecat/services/aws/llm.py @@ -73,7 +73,7 @@ except ModuleNotFoundError as e: @dataclass class AWSBedrockLLMSettings(LLMSettings): - """Typed settings for AWS Bedrock LLM services. + """Settings for AWS Bedrock LLM services. Parameters: latency: Performance mode - "standard" or "optimized". diff --git a/src/pipecat/services/aws/stt.py b/src/pipecat/services/aws/stt.py index cd8a7103c..6a91c2973 100644 --- a/src/pipecat/services/aws/stt.py +++ b/src/pipecat/services/aws/stt.py @@ -47,7 +47,7 @@ except ModuleNotFoundError as e: @dataclass class AWSTranscribeSTTSettings(STTSettings): - """Typed settings for the AWS Transcribe STT service. + """Settings for the AWS Transcribe STT service. Parameters: sample_rate: Audio sample rate in Hz (8000 or 16000). @@ -140,13 +140,13 @@ class AWSTranscribeSTTService(WebsocketSTTService): } return encoding_map.get(encoding, encoding) - async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: - """Apply a typed settings update, reconnecting if needed. + async def _update_settings(self, update: STTSettings) -> set[str]: + """Apply a settings update, reconnecting if needed. Any change to connection-relevant settings (model, language, etc.) triggers a WebSocket reconnect so the new configuration takes effect. """ - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) if changed and self._websocket: await self._disconnect() diff --git a/src/pipecat/services/aws/tts.py b/src/pipecat/services/aws/tts.py index b7f6386ca..47c524196 100644 --- a/src/pipecat/services/aws/tts.py +++ b/src/pipecat/services/aws/tts.py @@ -125,7 +125,7 @@ def language_to_aws_language(language: Language) -> Optional[str]: @dataclass class AWSPollyTTSSettings(TTSSettings): - """Typed settings for AWS Polly TTS service. + """Settings for AWS Polly TTS service. Parameters: engine: TTS engine to use ('standard', 'neural', etc.). diff --git a/src/pipecat/services/azure/stt.py b/src/pipecat/services/azure/stt.py index 95840bde9..319296a47 100644 --- a/src/pipecat/services/azure/stt.py +++ b/src/pipecat/services/azure/stt.py @@ -52,7 +52,7 @@ except ModuleNotFoundError as e: @dataclass class AzureSTTSettings(STTSettings): - """Typed settings for the Azure STT service. + """Settings for the Azure STT service. Parameters: region: Azure region for the Speech service. @@ -123,13 +123,13 @@ class AzureSTTService(STTService): """ return True - async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: - """Apply a typed settings update, reconfiguring the recognizer if needed. + async def _update_settings(self, update: STTSettings) -> set[str]: + """Apply a settings update, reconfiguring the recognizer if needed. When ``language`` changes the ``SpeechConfig`` is updated and the speech recognizer is restarted so that the new language takes effect. """ - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) if "language" in changed: # Convert Language enum to Azure language code if needed. diff --git a/src/pipecat/services/azure/tts.py b/src/pipecat/services/azure/tts.py index ba6d8ac13..b72b33901 100644 --- a/src/pipecat/services/azure/tts.py +++ b/src/pipecat/services/azure/tts.py @@ -69,7 +69,7 @@ def sample_rate_to_output_format(sample_rate: int) -> SpeechSynthesisOutputForma @dataclass class AzureTTSSettings(TTSSettings): - """Typed settings for Azure TTS services. + """Settings for Azure TTS services. Parameters: emphasis: Emphasis level for speech ("strong", "moderate", "reduced"). diff --git a/src/pipecat/services/camb/tts.py b/src/pipecat/services/camb/tts.py index c484a3c80..4176b4413 100644 --- a/src/pipecat/services/camb/tts.py +++ b/src/pipecat/services/camb/tts.py @@ -137,7 +137,7 @@ def _get_aligned_audio(buffer: bytes) -> tuple[bytes, bytes]: @dataclass class CambTTSSettings(TTSSettings): - """Typed settings for Camb.ai TTS service. + """Settings for Camb.ai TTS service. Parameters: user_instructions: Custom instructions for mars-instruct model only. diff --git a/src/pipecat/services/cartesia/stt.py b/src/pipecat/services/cartesia/stt.py index 7f684f886..5116965ec 100644 --- a/src/pipecat/services/cartesia/stt.py +++ b/src/pipecat/services/cartesia/stt.py @@ -46,7 +46,7 @@ except ModuleNotFoundError as e: @dataclass class CartesiaSTTSettings(STTSettings): - """Typed settings for the Cartesia STT service. + """Settings for the Cartesia STT service. Parameters: encoding: Audio encoding format (e.g. ``"pcm_s16le"``). @@ -294,8 +294,8 @@ class CartesiaSTTService(WebsocketSTTService): await self._disconnect_websocket() - async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: - """Apply a typed settings update and reconnect if anything changed. + async def _update_settings(self, update: STTSettings) -> set[str]: + """Apply a settings update and reconnect if anything changed. Args: update: A :class:`STTSettings` (or ``CartesiaSTTSettings``) delta. @@ -303,7 +303,7 @@ class CartesiaSTTService(WebsocketSTTService): Returns: Set of field names whose values actually changed. """ - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) if changed: await self._disconnect() await self._connect() diff --git a/src/pipecat/services/cartesia/tts.py b/src/pipecat/services/cartesia/tts.py index 117853e36..cff365443 100644 --- a/src/pipecat/services/cartesia/tts.py +++ b/src/pipecat/services/cartesia/tts.py @@ -195,7 +195,7 @@ class CartesiaEmotion(str, Enum): @dataclass class CartesiaTTSSettings(TTSSettings): - """Typed settings for Cartesia TTS services. + """Settings for Cartesia TTS services. Parameters: output_container: Audio container format (e.g. "raw"). diff --git a/src/pipecat/services/deepgram/stt.py b/src/pipecat/services/deepgram/stt.py index 2beaec80c..32759069b 100644 --- a/src/pipecat/services/deepgram/stt.py +++ b/src/pipecat/services/deepgram/stt.py @@ -49,7 +49,7 @@ except ModuleNotFoundError as e: @dataclass class DeepgramSTTSettings(STTSettings): - """Typed settings for the Deepgram STT service. + """Settings for the Deepgram STT service. Parameters: live_options: Deepgram ``LiveOptions`` for detailed configuration. @@ -195,8 +195,8 @@ class DeepgramSTTService(STTService): """ return True - async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: - """Apply a typed settings update, keeping ``live_options`` in sync. + async def _update_settings(self, update: STTSettings) -> set[str]: + """Apply a settings update, keeping ``live_options`` in sync. Top-level ``model`` and ``language`` are the source of truth. When they are given in *update* their values are propagated into @@ -213,7 +213,7 @@ class DeepgramSTTService(STTService): getattr(update, "language", NOT_GIVEN) ) - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) if not changed: return changed diff --git a/src/pipecat/services/deepgram/stt_sagemaker.py b/src/pipecat/services/deepgram/stt_sagemaker.py index 68ec9651b..e503592d7 100644 --- a/src/pipecat/services/deepgram/stt_sagemaker.py +++ b/src/pipecat/services/deepgram/stt_sagemaker.py @@ -51,7 +51,7 @@ except ModuleNotFoundError as e: @dataclass class DeepgramSageMakerSTTSettings(STTSettings): - """Typed settings for the Deepgram SageMaker STT service. + """Settings for the Deepgram SageMaker STT service. Parameters: live_options: Deepgram ``LiveOptions`` for detailed configuration. @@ -163,8 +163,8 @@ class DeepgramSageMakerSTTService(STTService): """ return True - async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: - """Apply a typed settings update, keeping ``live_options`` in sync. + async def _update_settings(self, update: STTSettings) -> set[str]: + """Apply a settings update, keeping ``live_options`` in sync. Top-level ``model`` and ``language`` are the source of truth. When they change their values are propagated into ``live_options``. @@ -178,7 +178,7 @@ class DeepgramSageMakerSTTService(STTService): getattr(update, "language", NOT_GIVEN) ) - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) if not changed: return changed diff --git a/src/pipecat/services/deepgram/tts.py b/src/pipecat/services/deepgram/tts.py index 7ae0fb9ac..bccbace26 100644 --- a/src/pipecat/services/deepgram/tts.py +++ b/src/pipecat/services/deepgram/tts.py @@ -47,7 +47,7 @@ except ModuleNotFoundError as e: @dataclass class DeepgramTTSSettings(TTSSettings): - """Typed settings for Deepgram TTS service. + """Settings for Deepgram TTS service. Parameters: encoding: Audio encoding format (linear16, mulaw, alaw). diff --git a/src/pipecat/services/elevenlabs/stt.py b/src/pipecat/services/elevenlabs/stt.py index b33fee710..d2b7a8f99 100644 --- a/src/pipecat/services/elevenlabs/stt.py +++ b/src/pipecat/services/elevenlabs/stt.py @@ -171,7 +171,7 @@ def language_to_elevenlabs_language(language: Language) -> Optional[str]: @dataclass class ElevenLabsSTTSettings(STTSettings): - """Typed settings for the ElevenLabs file-based STT service. + """Settings for the ElevenLabs file-based STT service. Parameters: tag_audio_events: Whether to include audio event tags in transcription. @@ -182,7 +182,7 @@ class ElevenLabsSTTSettings(STTSettings): @dataclass class ElevenLabsRealtimeSTTSettings(STTSettings): - """Typed settings for the ElevenLabs Realtime STT service. + """Settings for the ElevenLabs Realtime STT service. See ``ElevenLabsRealtimeSTTService.InputParams`` for detailed descriptions. @@ -294,8 +294,8 @@ class ElevenLabsSTTService(SegmentedSTTService): """ return language_to_elevenlabs_language(language) - async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: - """Apply a typed settings update. + async def _update_settings(self, update: STTSettings) -> set[str]: + """Apply a settings update. Converts language to ElevenLabs format before applying and keeps ``_model_id`` in sync with the model setting. @@ -312,7 +312,7 @@ class ElevenLabsSTTService(SegmentedSTTService): if converted is not None: update.language = converted - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) if "model" in changed: self._model_id = self._settings.model @@ -543,8 +543,8 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): """ return True - async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: - """Apply a typed settings update and reconnect if anything changed. + async def _update_settings(self, update: STTSettings) -> set[str]: + """Apply a settings update and reconnect if anything changed. Converts language to ElevenLabs format before applying and keeps ``_model_id`` in sync. @@ -561,7 +561,7 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): if converted is not None: update.language = converted - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) if not changed: return changed diff --git a/src/pipecat/services/elevenlabs/tts.py b/src/pipecat/services/elevenlabs/tts.py index cd3a99e1e..9643fa6ba 100644 --- a/src/pipecat/services/elevenlabs/tts.py +++ b/src/pipecat/services/elevenlabs/tts.py @@ -154,7 +154,7 @@ def build_elevenlabs_voice_settings( """Build voice settings dictionary for ElevenLabs based on provided settings. Args: - settings: Dictionary or typed settings containing voice settings parameters. + settings: Dictionary or settings containing voice settings parameters. Returns: Dictionary of voice settings or None if no valid settings are provided. @@ -186,7 +186,7 @@ class PronunciationDictionaryLocator(BaseModel): @dataclass class ElevenLabsTTSSettings(TTSSettings): - """Typed settings for the ElevenLabs WebSocket TTS service. + """Settings for the ElevenLabs WebSocket TTS service. Fields that appear in the WebSocket URL (``voice``, ``model``, ``language``) require a full reconnect when changed. Fields that @@ -230,7 +230,7 @@ class ElevenLabsTTSSettings(TTSSettings): @dataclass class ElevenLabsHttpTTSSettings(TTSSettings): - """Typed settings for the ElevenLabs HTTP TTS service. + """Settings for the ElevenLabs HTTP TTS service. Parameters: optimize_streaming_latency: Latency optimization level (0-4). @@ -471,8 +471,8 @@ class ElevenLabsTTSService(AudioContextWordTTSService): voice_settings[key] = val return voice_settings or None - async def _update_settings_from_typed(self, update: TTSSettings) -> set[str]: - """Apply a typed settings update, reconnecting as needed. + async def _update_settings(self, update: TTSSettings) -> set[str]: + """Apply a settings update, reconnecting as needed. Uses the declarative ``URL_FIELDS`` and ``VOICE_SETTINGS_FIELDS`` sets on :class:`ElevenLabsTTSSettings` to decide whether to @@ -484,7 +484,7 @@ class ElevenLabsTTSService(AudioContextWordTTSService): Returns: Set of field names whose values actually changed. """ - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) if not changed: return changed @@ -958,8 +958,8 @@ class ElevenLabsHttpTTSService(WordTTSService): def _set_voice_settings(self): return build_elevenlabs_voice_settings(self._settings) - async def _update_settings_from_typed(self, update: TTSSettings) -> set[str]: - """Apply a typed settings update and rebuild voice settings. + async def _update_settings(self, update: TTSSettings) -> set[str]: + """Apply a settings update and rebuild voice settings. Args: update: A :class:`TTSSettings` (or ``ElevenLabsHttpTTSSettings``) delta. @@ -967,7 +967,7 @@ class ElevenLabsHttpTTSService(WordTTSService): Returns: Set of field names whose values actually changed. """ - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) if changed: self._voice_settings = self._set_voice_settings() return changed diff --git a/src/pipecat/services/fal/stt.py b/src/pipecat/services/fal/stt.py index a459d15dd..ff6628f6c 100644 --- a/src/pipecat/services/fal/stt.py +++ b/src/pipecat/services/fal/stt.py @@ -150,7 +150,7 @@ def language_to_fal_language(language: Language) -> Optional[str]: @dataclass class FalSTTSettings(STTSettings): - """Typed settings for the Fal Wizper STT service. + """Settings for the Fal Wizper STT service. Parameters: task: Task to perform ('transcribe' or 'translate'). Defaults to @@ -251,9 +251,9 @@ class FalSTTService(SegmentedSTTService): """ return language_to_fal_language(language) - async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: - """Apply a typed settings update, converting language if changed.""" - changed = await super()._update_settings_from_typed(update) + async def _update_settings(self, update: STTSettings) -> set[str]: + """Apply a settings update, converting language if changed.""" + changed = await super()._update_settings(update) if "language" in changed: # Convert the Language enum to a Fal language code. diff --git a/src/pipecat/services/fish/tts.py b/src/pipecat/services/fish/tts.py index 5c56d0c91..daa884af8 100644 --- a/src/pipecat/services/fish/tts.py +++ b/src/pipecat/services/fish/tts.py @@ -49,7 +49,7 @@ FishAudioOutputFormat = Literal["opus", "mp3", "pcm", "wav"] @dataclass class FishAudioTTSSettings(TTSSettings): - """Typed settings for Fish Audio TTS service. + """Settings for Fish Audio TTS service. Parameters: fish_sample_rate: Audio sample rate sent to the API. @@ -184,8 +184,8 @@ class FishAudioTTSService(InterruptibleTTSService): """ return True - async def _update_settings_from_typed(self, update: TTSSettings) -> set[str]: - """Apply a typed settings update and reconnect if needed. + async def _update_settings(self, update: TTSSettings) -> set[str]: + """Apply a settings update and reconnect if needed. Any change to voice or model triggers a WebSocket reconnect. @@ -195,7 +195,7 @@ class FishAudioTTSService(InterruptibleTTSService): Returns: Set of field names whose values actually changed. """ - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) if changed: await self._disconnect() await self._connect() diff --git a/src/pipecat/services/gladia/stt.py b/src/pipecat/services/gladia/stt.py index d56150a29..bb8f05e61 100644 --- a/src/pipecat/services/gladia/stt.py +++ b/src/pipecat/services/gladia/stt.py @@ -182,7 +182,7 @@ class _InputParamsDescriptor: @dataclass class GladiaSTTSettings(STTSettings): - """Typed settings for Gladia STT service. + """Settings for Gladia STT service. Parameters: input_params: Gladia ``GladiaInputParams`` for detailed configuration. @@ -379,19 +379,19 @@ class GladiaSTTService(WebsocketSTTService): await super().start(frame) await self._connect() - async def _update_settings_from_typed(self, update: GladiaSTTSettings) -> set[str]: - """Apply typed settings update. + async def _update_settings(self, update: GladiaSTTSettings) -> set[str]: + """Apply settings update. Gladia sessions are fixed at creation time, so any change requires a full session teardown and reconnect. Args: - update: A typed settings delta. + update: A settings delta. Returns: Set of field names whose values actually changed. """ - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) if not changed: return changed diff --git a/src/pipecat/services/google/gemini_live/llm.py b/src/pipecat/services/google/gemini_live/llm.py index 2e8a2efbd..7a7aed08c 100644 --- a/src/pipecat/services/google/gemini_live/llm.py +++ b/src/pipecat/services/google/gemini_live/llm.py @@ -604,7 +604,7 @@ class InputParams(BaseModel): @dataclass class GeminiLiveLLMSettings(LLMSettings): - """Typed settings for Gemini Live LLM services. + """Settings for Gemini Live LLM services. Parameters: modalities: Response modalities. @@ -976,7 +976,8 @@ class GeminiLiveLLMService(LLMService): # (we have an example that does just that, actually). await self._create_single_response(frame.messages) elif isinstance(frame, LLMSetToolsFrame): - await self._update_settings() + # TODO: implement runtime tool updates for Gemini Live. + pass else: await self.push_frame(frame, direction) diff --git a/src/pipecat/services/google/llm.py b/src/pipecat/services/google/llm.py index 7f6dd724c..bf1958f66 100644 --- a/src/pipecat/services/google/llm.py +++ b/src/pipecat/services/google/llm.py @@ -58,7 +58,7 @@ from pipecat.services.openai.llm import ( OpenAIAssistantContextAggregator, OpenAIUserContextAggregator, ) -from pipecat.services.settings import NOT_GIVEN, LLMSettings +from pipecat.services.settings import NOT_GIVEN, LLMSettings, is_given from pipecat.utils.tracing.service_decorators import traced_llm # Suppress gRPC fork warnings @@ -675,7 +675,7 @@ class GoogleLLMContext(OpenAILLMContext): @dataclass class GoogleLLMSettings(LLMSettings): - """Typed settings for Google LLM services. + """Settings for Google LLM services. Parameters: thinking: Thinking configuration. @@ -683,6 +683,18 @@ class GoogleLLMSettings(LLMSettings): thinking: Any = field(default_factory=lambda: NOT_GIVEN) + @classmethod + def from_mapping(cls, settings): + """Convert a plain dict to settings, coercing thinking dicts. + + For backward compatibility, a ``thinking`` value that is a plain dict + is converted to a :class:`GoogleLLMService.ThinkingConfig`. + """ + instance = super().from_mapping(settings) + if is_given(instance.thinking) and isinstance(instance.thinking, dict): + instance.thinking = GoogleLLMService.ThinkingConfig(**instance.thinking) + return instance + class GoogleLLMService(LLMService): """Google AI (Gemini) LLM service implementation. @@ -1227,14 +1239,6 @@ class GoogleLLMService(LLMService): # Do nothing - we're shutting down anyway pass - async def _update_settings(self, settings): - """Override to handle ThinkingConfig validation.""" - # Convert thinking dict to ThinkingConfig if needed - if "thinking" in settings and isinstance(settings["thinking"], dict): - settings = dict(settings) # Make a copy to avoid modifying the original - settings["thinking"] = self.ThinkingConfig(**settings["thinking"]) - await super()._update_settings(settings) - def create_context_aggregator( self, context: OpenAILLMContext, diff --git a/src/pipecat/services/google/stt.py b/src/pipecat/services/google/stt.py index 585b766f4..d4ffb0d91 100644 --- a/src/pipecat/services/google/stt.py +++ b/src/pipecat/services/google/stt.py @@ -360,7 +360,7 @@ def language_to_google_stt_language(language: Language) -> Optional[str]: @dataclass class GoogleSTTSettings(STTSettings): - """Typed settings for Google Cloud Speech-to-Text V2. + """Settings for Google Cloud Speech-to-Text V2. Parameters: languages: List of ``Language`` enums for recognition @@ -628,10 +628,10 @@ class GoogleSTTService(STTService): DeprecationWarning, ) logger.debug(f"Switching STT languages to: {languages}") - await self._update_settings_from_typed(GoogleSTTSettings(languages=list(languages))) + await self._update_settings(GoogleSTTSettings(languages=list(languages))) - async def _update_settings_from_typed(self, update: GoogleSTTSettings) -> set[str]: - """Apply typed settings update and reconnect if anything changed. + async def _update_settings(self, update: GoogleSTTSettings) -> set[str]: + """Apply settings update and reconnect if anything changed. Handles ``language`` from base ``set_language`` by converting it to ``languages``. Emits a deprecation warning if ``language_codes`` is @@ -639,7 +639,7 @@ class GoogleSTTService(STTService): Reconnects the stream on any change. Args: - update: A typed settings delta. + update: A settings delta. Returns: Set of field names whose values actually changed. @@ -663,7 +663,7 @@ class GoogleSTTService(STTService): stacklevel=2, ) - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) if changed: await self._reconnect_if_needed() @@ -742,7 +742,7 @@ class GoogleSTTService(STTService): "GoogleSTTSettings(...) instead.", DeprecationWarning, ) - # Build a typed settings delta from the provided options + # Build a settings delta from the provided options update = GoogleSTTSettings() if languages is not None: @@ -770,7 +770,7 @@ class GoogleSTTService(STTService): logger.debug(f"Updating location to: {location}") self._location = location - await self._update_settings_from_typed(update) + await self._update_settings(update) async def _connect(self): """Initialize streaming recognition config and stream.""" diff --git a/src/pipecat/services/google/tts.py b/src/pipecat/services/google/tts.py index 2b9ada224..9769aa665 100644 --- a/src/pipecat/services/google/tts.py +++ b/src/pipecat/services/google/tts.py @@ -478,7 +478,7 @@ def language_to_gemini_tts_language(language: Language) -> Optional[str]: @dataclass class GoogleHttpTTSSettings(TTSSettings): - """Typed settings for Google HTTP TTS service. + """Settings for Google HTTP TTS service. Parameters: pitch: Voice pitch adjustment (e.g., "+2st", "-50%"). @@ -505,7 +505,7 @@ class GoogleHttpTTSSettings(TTSSettings): @dataclass class GoogleStreamTTSSettings(TTSSettings): - """Typed settings for Google streaming TTS service. + """Settings for Google streaming TTS service. Parameters: language: Language for synthesis. Defaults to English. @@ -518,7 +518,7 @@ class GoogleStreamTTSSettings(TTSSettings): @dataclass class GeminiTTSSettings(TTSSettings): - """Typed settings for Gemini TTS service. + """Settings for Gemini TTS service. Parameters: language: Language for synthesis. Defaults to English. @@ -680,11 +680,11 @@ class GoogleHttpTTSService(TTSService): """ return language_to_google_tts_language(language) - async def _update_settings_from_typed(self, update: TTSSettings) -> set[str]: + async def _update_settings(self, update: TTSSettings) -> set[str]: """Override to handle speaking_rate validation. Args: - update: Typed settings delta. Can include 'speaking_rate' (float). + update: Settings delta. Can include 'speaking_rate' (float). """ if isinstance(update, GoogleHttpTTSSettings) and is_given(update.speaking_rate): rate_value = float(update.speaking_rate) @@ -693,7 +693,7 @@ class GoogleHttpTTSService(TTSService): f"Invalid speaking_rate value: {rate_value}. Must be between 0.25 and 2.0" ) update.speaking_rate = NOT_GIVEN - return await super()._update_settings_from_typed(update) + return await super()._update_settings(update) def _construct_ssml(self, text: str) -> str: ssml = "" @@ -1024,11 +1024,11 @@ class GoogleTTSService(GoogleBaseTTSService): credentials, credentials_path ) - async def _update_settings_from_typed(self, update: TTSSettings) -> set[str]: + async def _update_settings(self, update: TTSSettings) -> set[str]: """Override to handle speaking_rate validation. Args: - update: Typed settings delta. Can include 'speaking_rate' (float). + update: Settings delta. Can include 'speaking_rate' (float). """ if isinstance(update, GoogleStreamTTSSettings) and is_given(update.speaking_rate): rate_value = float(update.speaking_rate) @@ -1037,7 +1037,7 @@ class GoogleTTSService(GoogleBaseTTSService): f"Invalid speaking_rate value: {rate_value}. Must be between 0.25 and 2.0" ) update.speaking_rate = NOT_GIVEN - return await super()._update_settings_from_typed(update) + return await super()._update_settings(update) @traced_tts async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: @@ -1259,11 +1259,11 @@ class GeminiTTSService(GoogleBaseTTSService): f"Current rate of {self.sample_rate}Hz may cause issues." ) - async def _update_settings_from_typed(self, update: TTSSettings) -> set[str]: - """Apply a typed settings update with voice validation. + async def _update_settings(self, update: TTSSettings) -> set[str]: + """Apply a settings update with voice validation. Args: - update: Typed settings delta. Can include 'voice', 'prompt', etc. + update: Settings delta. Can include 'voice', 'prompt', etc. Returns: Set of field names whose values actually changed. @@ -1271,7 +1271,7 @@ class GeminiTTSService(GoogleBaseTTSService): if is_given(update.voice) and update.voice not in self.AVAILABLE_VOICES: logger.warning(f"Voice '{update.voice}' not in known voices list. Using anyway.") - return await super()._update_settings_from_typed(update) + return await super()._update_settings(update) @traced_tts async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: diff --git a/src/pipecat/services/gradium/stt.py b/src/pipecat/services/gradium/stt.py index ff2002625..3b634cbd2 100644 --- a/src/pipecat/services/gradium/stt.py +++ b/src/pipecat/services/gradium/stt.py @@ -68,7 +68,7 @@ def language_to_gradium_language(language: Language) -> Optional[str]: @dataclass class GradiumSTTSettings(STTSettings): - """Typed settings for the Gradium STT service. + """Settings for the Gradium STT service. Parameters: delay_in_frames: Delay in audio frames (80ms each) before text is @@ -171,8 +171,8 @@ class GradiumSTTService(WebsocketSTTService): """ return True - async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: - """Apply a typed settings update, sync params, and reconnect. + async def _update_settings(self, update: STTSettings) -> set[str]: + """Apply a settings update, sync params, and reconnect. Args: update: A :class:`STTSettings` (or ``GradiumSTTSettings``) delta. @@ -180,7 +180,7 @@ class GradiumSTTService(WebsocketSTTService): Returns: Set of field names whose values actually changed. """ - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) if not changed: return changed diff --git a/src/pipecat/services/gradium/tts.py b/src/pipecat/services/gradium/tts.py index 5dc355b91..bc4945bcf 100644 --- a/src/pipecat/services/gradium/tts.py +++ b/src/pipecat/services/gradium/tts.py @@ -41,7 +41,7 @@ SAMPLE_RATE = 48000 @dataclass class GradiumTTSSettings(TTSSettings): - """Typed settings for the Gradium TTS service. + """Settings for the Gradium TTS service. Parameters: output_format: Audio output format. @@ -119,8 +119,8 @@ class GradiumTTSService(InterruptibleWordTTSService): """ return True - async def _update_settings_from_typed(self, update: TTSSettings) -> set[str]: - """Apply a typed settings update and reconnect if voice changed. + async def _update_settings(self, update: TTSSettings) -> set[str]: + """Apply a settings update and reconnect if voice changed. Args: update: A :class:`TTSSettings` (or ``GradiumTTSSettings``) delta. @@ -129,7 +129,7 @@ class GradiumTTSService(InterruptibleWordTTSService): Set of field names whose values actually changed. """ prev_voice = self._voice_id - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) if self._voice_id != prev_voice: await self._disconnect() await self._connect() diff --git a/src/pipecat/services/grok/realtime/llm.py b/src/pipecat/services/grok/realtime/llm.py index dfe039bbc..f31769774 100644 --- a/src/pipecat/services/grok/realtime/llm.py +++ b/src/pipecat/services/grok/realtime/llm.py @@ -88,7 +88,7 @@ class CurrentAudioResponse: @dataclass class GrokRealtimeLLMSettings(LLMSettings): - """Typed settings for Grok Realtime LLM services. + """Settings for Grok Realtime LLM services. Parameters: session_properties: Grok Realtime session configuration. @@ -349,13 +349,13 @@ class GrokRealtimeLLMService(LLMService): frame: The frame to process. direction: The direction of frame flow in the pipeline. """ - # Legacy dict path: frame.settings contains SessionProperties fields, - # not our Settings fields, so we construct SessionProperties directly. - # The new typed path (frame.update) falls through to super, which calls - # _update_settings_from_typed → our override handles the rest. + # Backward-compatible dict path: frame.settings contains SessionProperties + # fields, not our Settings fields, so we construct SessionProperties + # directly. The frame.update path falls through to super, which calls + # _update_settings → our override handles the rest. if isinstance(frame, LLMUpdateSettingsFrame) and frame.update is None: self._settings.session_properties = events.SessionProperties(**frame.settings) - await self._update_settings() + await self._send_session_update() await self.push_frame(frame, direction) return @@ -379,7 +379,7 @@ class GrokRealtimeLLMService(LLMService): elif isinstance(frame, LLMMessagesAppendFrame): await self._handle_messages_append(frame) elif isinstance(frame, LLMSetToolsFrame): - await self._update_settings() + await self._send_session_update() await self.push_frame(frame, direction) @@ -456,14 +456,14 @@ class GrokRealtimeLLMService(LLMService): return await self.push_error(error_msg=f"Error sending client event: {e}", exception=e) - async def _update_settings_from_typed(self, update): - """Apply a typed settings update, sending a session update if needed.""" - changed = await super()._update_settings_from_typed(update) + async def _update_settings(self, update): + """Apply a settings update, sending a session update if needed.""" + changed = await super()._update_settings(update) if "session_properties" in changed: - await self._update_settings() + await self._send_session_update() return changed - async def _update_settings(self): + async def _send_session_update(self): """Update session settings on the server.""" settings = self._settings.session_properties adapter: GrokRealtimeLLMAdapter = self.get_llm_adapter() @@ -543,7 +543,7 @@ class GrokRealtimeLLMService(LLMService): async def _handle_evt_conversation_created(self, evt): """Handle conversation.created event - first event after connecting.""" - await self._update_settings() + await self._send_session_update() async def _handle_evt_response_created(self, evt): """Handle response.created event - response generation started.""" @@ -746,7 +746,7 @@ class GrokRealtimeLLMService(LLMService): self._messages_added_manually[evt.item.id] = True await self.send_client_event(evt) - await self._update_settings() + await self._send_session_update() self._llm_needs_conversation_setup = False logger.debug("Creating Grok response") diff --git a/src/pipecat/services/groq/tts.py b/src/pipecat/services/groq/tts.py index 8e551725f..d0b5fbd7c 100644 --- a/src/pipecat/services/groq/tts.py +++ b/src/pipecat/services/groq/tts.py @@ -36,7 +36,7 @@ except ModuleNotFoundError as e: @dataclass class GroqTTSSettings(TTSSettings): - """Typed settings for the Groq TTS service. + """Settings for the Groq TTS service. Parameters: output_format: Audio output format. diff --git a/src/pipecat/services/hathora/stt.py b/src/pipecat/services/hathora/stt.py index 8af53958d..a620ed79a 100644 --- a/src/pipecat/services/hathora/stt.py +++ b/src/pipecat/services/hathora/stt.py @@ -31,7 +31,7 @@ from .utils import ConfigOption @dataclass class HathoraSTTSettings(STTSettings): - """Typed settings for the Hathora STT service. + """Settings for the Hathora STT service. Parameters: config: Some models support additional config, refer to diff --git a/src/pipecat/services/hathora/tts.py b/src/pipecat/services/hathora/tts.py index 2b3a5ddb1..f3524734a 100644 --- a/src/pipecat/services/hathora/tts.py +++ b/src/pipecat/services/hathora/tts.py @@ -49,7 +49,7 @@ def _decode_audio_payload( @dataclass class HathoraTTSSettings(TTSSettings): - """Typed settings for Hathora TTS service. + """Settings for Hathora TTS service. Parameters: speed: Speech speed multiplier (if supported by model). diff --git a/src/pipecat/services/inworld/tts.py b/src/pipecat/services/inworld/tts.py index 7f4374b93..2f6a13bd1 100644 --- a/src/pipecat/services/inworld/tts.py +++ b/src/pipecat/services/inworld/tts.py @@ -52,7 +52,7 @@ from pipecat.utils.tracing.service_decorators import traced_tts @dataclass class InworldTTSSettings(TTSSettings): - """Typed settings for Inworld TTS services. + """Settings for Inworld TTS services. Parameters: audio_encoding: Audio encoding format (e.g. LINEAR16). diff --git a/src/pipecat/services/kokoro/tts.py b/src/pipecat/services/kokoro/tts.py index 6d46441f6..b88511437 100644 --- a/src/pipecat/services/kokoro/tts.py +++ b/src/pipecat/services/kokoro/tts.py @@ -91,7 +91,7 @@ def language_to_kokoro_language(language: Language) -> str: @dataclass class KokoroTTSSettings(TTSSettings): - """Typed settings for the Kokoro TTS service. + """Settings for the Kokoro TTS service. Parameters: lang_code: Kokoro language code for synthesis. diff --git a/src/pipecat/services/llm_service.py b/src/pipecat/services/llm_service.py index 693448cd5..97d49192c 100644 --- a/src/pipecat/services/llm_service.py +++ b/src/pipecat/services/llm_service.py @@ -59,7 +59,7 @@ from pipecat.processors.aggregators.llm_response import ( from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.processors.frame_processor import FrameDirection from pipecat.services.ai_service import AIService -from pipecat.services.settings import LLMSettings, ServiceSettings, is_given +from pipecat.services.settings import LLMSettings, is_given from pipecat.turns.user_turn_completion_mixin import UserTurnCompletionLLMServiceMixin from pipecat.utils.context.llm_context_summarization import ( LLMContextSummarizationUtil, @@ -313,16 +313,16 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): await self._cancel_sequential_runner_task() await self._cancel_summary_task() - async def _update_settings_from_typed(self, update: LLMSettings) -> set[str]: - """Apply a typed settings update, handling turn-completion fields. + async def _update_settings(self, update: LLMSettings) -> set[str]: + """Apply a settings update, handling turn-completion fields. Args: - update: A typed LLM settings delta. + update: An LLM settings delta. Returns: Set of field names whose values actually changed. """ - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) if "filter_incomplete_user_turns" in changed: self._filter_incomplete_user_turns = self._settings.filter_incomplete_user_turns @@ -336,35 +336,6 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): return changed - async def _update_settings(self, settings: Mapping[str, Any]): - """Update LLM service settings. - - Handles turn completion settings specially since they are not model - parameters and should not be passed to the underlying LLM API. - - Args: - settings: Dictionary of settings to update. - """ - # Turn completion settings to extract (not model parameters) - turn_completion_keys = {"filter_incomplete_user_turns", "user_turn_completion_config"} - - # Handle turn completion settings - if "filter_incomplete_user_turns" in settings: - self._filter_incomplete_user_turns = settings["filter_incomplete_user_turns"] - logger.info( - f"{self}: Incomplete turn filtering {'enabled' if self._filter_incomplete_user_turns else 'disabled'}" - ) - - # Configure the mixin with config object - if self._filter_incomplete_user_turns and "user_turn_completion_config" in settings: - self.set_user_turn_completion_config(settings["user_turn_completion_config"]) - - # Remove turn completion settings before passing to parent - settings = {k: v for k, v in settings.items() if k not in turn_completion_keys} - - # Let the parent handle remaining model parameters - await super()._update_settings(settings) - async def process_frame(self, frame: Frame, direction: FrameDirection): """Process a frame. @@ -379,16 +350,12 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): elif isinstance(frame, LLMConfigureOutputFrame): self._skip_tts = frame.skip_tts elif isinstance(frame, LLMUpdateSettingsFrame): - # New path: typed settings update object. if frame.update is not None: - await self._update_settings_from_typed(frame.update) - # Legacy path: plain dict, but service uses typed settings — convert. - elif isinstance(self._settings, ServiceSettings): + await self._update_settings(frame.update) + elif frame.settings: + # Backward-compatible path: convert legacy dict to settings object. update = type(self._settings).from_mapping(frame.settings) - await self._update_settings_from_typed(update) - # Legacy path: plain dict, service still uses dict-based settings. - else: - await self._update_settings(frame.settings) + await self._update_settings(update) elif isinstance(frame, LLMContextSummaryRequestFrame): await self._handle_summary_request(frame) diff --git a/src/pipecat/services/lmnt/tts.py b/src/pipecat/services/lmnt/tts.py index 9fa727c5f..1b23c8ae2 100644 --- a/src/pipecat/services/lmnt/tts.py +++ b/src/pipecat/services/lmnt/tts.py @@ -75,7 +75,7 @@ def language_to_lmnt_language(language: Language) -> Optional[str]: @dataclass class LmntTTSSettings(TTSSettings): - """Typed settings for LMNT TTS service. + """Settings for LMNT TTS service. Parameters: format: Audio output format. Defaults to "raw". diff --git a/src/pipecat/services/minimax/tts.py b/src/pipecat/services/minimax/tts.py index db236d6e4..ab04925f3 100644 --- a/src/pipecat/services/minimax/tts.py +++ b/src/pipecat/services/minimax/tts.py @@ -89,7 +89,7 @@ def language_to_minimax_language(language: Language) -> Optional[str]: @dataclass class MiniMaxTTSSettings(TTSSettings): - """Typed settings for MiniMax TTS service. + """Settings for MiniMax TTS service. Parameters: stream: Whether to use streaming mode. diff --git a/src/pipecat/services/neuphonic/tts.py b/src/pipecat/services/neuphonic/tts.py index de92c48a2..0680de4f6 100644 --- a/src/pipecat/services/neuphonic/tts.py +++ b/src/pipecat/services/neuphonic/tts.py @@ -76,7 +76,7 @@ def language_to_neuphonic_lang_code(language: Language) -> Optional[str]: @dataclass class NeuphonicTTSSettings(TTSSettings): - """Typed settings for Neuphonic TTS service. + """Settings for Neuphonic TTS service. Parameters: lang_code: Neuphonic language code. @@ -181,9 +181,9 @@ class NeuphonicTTSService(InterruptibleTTSService): """ return language_to_neuphonic_lang_code(language) - async def _update_settings_from_typed(self, update: TTSSettings) -> set[str]: - """Apply a typed settings update and reconnect with new configuration.""" - changed = await super()._update_settings_from_typed(update) + async def _update_settings(self, update: TTSSettings) -> set[str]: + """Apply a settings update and reconnect with new configuration.""" + changed = await super()._update_settings(update) if changed: await self._disconnect() await self._connect() diff --git a/src/pipecat/services/nvidia/stt.py b/src/pipecat/services/nvidia/stt.py index 6190e169d..ff76a6900 100644 --- a/src/pipecat/services/nvidia/stt.py +++ b/src/pipecat/services/nvidia/stt.py @@ -93,14 +93,14 @@ def language_to_nvidia_riva_language(language: Language) -> Optional[str]: @dataclass class NvidiaSTTSettings(STTSettings): - """Typed settings for the NVIDIA Riva streaming STT service.""" + """Settings for the NVIDIA Riva streaming STT service.""" pass @dataclass class NvidiaSegmentedSTTSettings(STTSettings): - """Typed settings for the NVIDIA Riva segmented STT service. + """Settings for the NVIDIA Riva segmented STT service. Parameters: profanity_filter: Whether to filter profanity from results. @@ -579,8 +579,8 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): self._config = self._create_recognition_config() logger.debug(f"Initialized NvidiaSegmentedSTTService with model: {self.model_name}") - async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: - """Apply a typed settings update and sync internal state. + async def _update_settings(self, update: STTSettings) -> set[str]: + """Apply a settings update and sync internal state. Args: update: A :class:`STTSettings` (or ``NvidiaSegmentedSTTSettings``) delta. @@ -588,7 +588,7 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): Returns: Set of field names whose values actually changed. """ - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) if changed: self._config = self._create_recognition_config() diff --git a/src/pipecat/services/openai/base_llm.py b/src/pipecat/services/openai/base_llm.py index 454087508..13cbc07cb 100644 --- a/src/pipecat/services/openai/base_llm.py +++ b/src/pipecat/services/openai/base_llm.py @@ -49,7 +49,7 @@ from pipecat.utils.tracing.service_decorators import traced_llm @dataclass class OpenAILLMSettings(LLMSettings): - """Typed settings for OpenAI-compatible LLM services. + """Settings for OpenAI-compatible LLM services. Parameters: max_completion_tokens: Maximum completion tokens to generate. diff --git a/src/pipecat/services/openai/realtime/llm.py b/src/pipecat/services/openai/realtime/llm.py index 825639950..f6e8b1646 100644 --- a/src/pipecat/services/openai/realtime/llm.py +++ b/src/pipecat/services/openai/realtime/llm.py @@ -93,7 +93,7 @@ class CurrentAudioResponse: @dataclass class OpenAIRealtimeLLMSettings(LLMSettings): - """Typed settings for OpenAI Realtime LLM services. + """Settings for OpenAI Realtime LLM services. Parameters: session_properties: OpenAI Realtime session configuration. @@ -411,13 +411,13 @@ class OpenAIRealtimeLLMService(LLMService): frame: The frame to process. direction: The direction of frame flow in the pipeline. """ - # Legacy dict path: frame.settings contains SessionProperties fields, - # not our Settings fields, so we construct SessionProperties directly. - # The new typed path (frame.update) falls through to super, which calls - # _update_settings_from_typed → our override handles the rest. + # Backward-compatible dict path: frame.settings contains SessionProperties + # fields, not our Settings fields, so we construct SessionProperties + # directly. The frame.update path falls through to super, which calls + # _update_settings → our override handles the rest. if isinstance(frame, LLMUpdateSettingsFrame) and frame.update is None: self._settings.session_properties = events.SessionProperties(**frame.settings) - await self._update_settings() + await self._send_session_update() await self.push_frame(frame, direction) return @@ -449,7 +449,7 @@ class OpenAIRealtimeLLMService(LLMService): elif isinstance(frame, LLMMessagesAppendFrame): await self._handle_messages_append(frame) elif isinstance(frame, LLMSetToolsFrame): - await self._update_settings() + await self._send_session_update() await self.push_frame(frame, direction) @@ -534,14 +534,14 @@ class OpenAIRealtimeLLMService(LLMService): # treat a send-side error as fatal. await self.push_error(error_msg=f"Error sending client event: {e}", exception=e) - async def _update_settings_from_typed(self, update): - """Apply a typed settings update, sending a session update if needed.""" - changed = await super()._update_settings_from_typed(update) + async def _update_settings(self, update): + """Apply a settings update, sending a session update if needed.""" + changed = await super()._update_settings(update) if "session_properties" in changed: - await self._update_settings() + await self._send_session_update() return changed - async def _update_settings(self): + async def _send_session_update(self): settings = self._settings.session_properties adapter: OpenAIRealtimeLLMAdapter = self.get_llm_adapter() @@ -613,7 +613,7 @@ class OpenAIRealtimeLLMService(LLMService): async def _handle_evt_session_created(self, evt): # session.created is received right after connecting. Send a message # to configure the session properties. - await self._update_settings() + await self._send_session_update() async def _handle_evt_session_updated(self, evt): # If this is our first context frame, run the LLM @@ -896,7 +896,7 @@ class OpenAIRealtimeLLMService(LLMService): await self.send_client_event(evt) # Send new settings if needed - await self._update_settings() + await self._send_session_update() # We're done configuring the LLM for this session self._llm_needs_conversation_setup = False diff --git a/src/pipecat/services/openai/stt.py b/src/pipecat/services/openai/stt.py index 266b2964b..458f40133 100644 --- a/src/pipecat/services/openai/stt.py +++ b/src/pipecat/services/openai/stt.py @@ -127,7 +127,7 @@ _OPENAI_SAMPLE_RATE = 24000 @dataclass class OpenAIRealtimeSTTSettings(STTSettings): - """Typed settings for the OpenAI Realtime STT service. + """Settings for the OpenAI Realtime STT service. Parameters: prompt: Optional prompt text to guide transcription style. @@ -268,10 +268,10 @@ class OpenAIRealtimeSTTService(WebsocketSTTService): """ return True - async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: - """Apply a typed settings update and send session update if needed. + async def _update_settings(self, update: STTSettings) -> set[str]: + """Apply a settings update and send session update if needed. - Keeps ``_language_code`` and ``_prompt`` in sync with typed settings + Keeps ``_language_code`` and ``_prompt`` in sync with settings and sends a ``session.update`` to the server when the session is active. Args: @@ -280,7 +280,7 @@ class OpenAIRealtimeSTTService(WebsocketSTTService): Returns: Set of field names whose values actually changed. """ - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) if not changed: return changed diff --git a/src/pipecat/services/openai/tts.py b/src/pipecat/services/openai/tts.py index 3572d287e..f283a7912 100644 --- a/src/pipecat/services/openai/tts.py +++ b/src/pipecat/services/openai/tts.py @@ -64,7 +64,7 @@ VALID_VOICES: Dict[str, ValidVoice] = { @dataclass class OpenAITTSSettings(TTSSettings): - """Typed settings for OpenAI TTS service. + """Settings for OpenAI TTS service. Parameters: instructions: Instructions to guide voice synthesis behavior. diff --git a/src/pipecat/services/openai_realtime_beta/openai.py b/src/pipecat/services/openai_realtime_beta/openai.py index b7703ad98..b456ed0b8 100644 --- a/src/pipecat/services/openai_realtime_beta/openai.py +++ b/src/pipecat/services/openai_realtime_beta/openai.py @@ -94,7 +94,7 @@ class CurrentAudioResponse: @dataclass class OpenAIRealtimeBetaLLMSettings(LLMSettings): - """Typed settings for OpenAI Realtime Beta LLM services. + """Settings for OpenAI Realtime Beta LLM services. Parameters: session_properties: OpenAI Realtime session configuration. @@ -357,13 +357,13 @@ class OpenAIRealtimeBetaLLMService(LLMService): frame: The frame to process. direction: The direction of frame flow in the pipeline. """ - # Legacy dict path: frame.settings contains SessionProperties fields, - # not our Settings fields, so we construct SessionProperties directly. - # The new typed path (frame.update) falls through to super, which calls - # _update_settings_from_typed → our override handles the rest. + # Backward-compatible dict path: frame.settings contains SessionProperties + # fields, not our Settings fields, so we construct SessionProperties + # directly. The frame.update path falls through to super, which calls + # _update_settings → our override handles the rest. if isinstance(frame, LLMUpdateSettingsFrame) and frame.update is None: self._settings.session_properties = events.SessionProperties(**frame.settings) - await self._update_settings() + await self._send_session_update() await self.push_frame(frame, direction) return @@ -403,7 +403,7 @@ class OpenAIRealtimeBetaLLMService(LLMService): elif isinstance(frame, RealtimeMessagesUpdateFrame): self._context = frame.context elif isinstance(frame, LLMSetToolsFrame): - await self._update_settings() + await self._send_session_update() elif isinstance(frame, RealtimeFunctionCallResultFrame): await self._handle_function_call_result(frame.result_frame) @@ -478,14 +478,14 @@ class OpenAIRealtimeBetaLLMService(LLMService): # treat a send-side error as fatal. await self.push_error(error_msg=f"Error sending client event: {e}", exception=e) - async def _update_settings_from_typed(self, update): - """Apply a typed settings update, sending a session update if needed.""" - changed = await super()._update_settings_from_typed(update) + async def _update_settings(self, update): + """Apply a settings update, sending a session update if needed.""" + changed = await super()._update_settings(update) if "session_properties" in changed: - await self._update_settings() + await self._send_session_update() return changed - async def _update_settings(self): + async def _send_session_update(self): settings = self._settings.session_properties # tools given in the context override the tools in the session properties if self._context and self._context.tools: @@ -540,7 +540,7 @@ class OpenAIRealtimeBetaLLMService(LLMService): async def _handle_evt_session_created(self, evt): # session.created is received right after connecting. Send a message # to configure the session properties. - await self._update_settings() + await self._send_session_update() async def _handle_evt_session_updated(self, evt): # If this is our first context frame, run the LLM @@ -779,7 +779,7 @@ class OpenAIRealtimeBetaLLMService(LLMService): self._context.llm_needs_initial_messages = False if self._context.llm_needs_settings_update: - await self._update_settings() + await self._send_session_update() self._context.llm_needs_settings_update = False logger.debug(f"Creating response: {self._context.get_messages_for_logging()}") diff --git a/src/pipecat/services/playht/tts.py b/src/pipecat/services/playht/tts.py index 7d61ffd87..b63e5d648 100644 --- a/src/pipecat/services/playht/tts.py +++ b/src/pipecat/services/playht/tts.py @@ -101,7 +101,7 @@ def language_to_playht_language(language: Language) -> Optional[str]: @dataclass class PlayHTTTSSettings(TTSSettings): - """Typed settings for PlayHT TTS services. + """Settings for PlayHT TTS services. Parameters: output_format: Audio output format. diff --git a/src/pipecat/services/resembleai/tts.py b/src/pipecat/services/resembleai/tts.py index cd3da1767..08df23abe 100644 --- a/src/pipecat/services/resembleai/tts.py +++ b/src/pipecat/services/resembleai/tts.py @@ -42,7 +42,7 @@ except ModuleNotFoundError as e: @dataclass class ResembleAITTSSettings(TTSSettings): - """Typed settings for Resemble AI TTS service. + """Settings for Resemble AI TTS service. Parameters: precision: PCM bit depth (PCM_32, PCM_24, PCM_16, or MULAW). diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index 4af9fe63b..5f8a5cef6 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -72,7 +72,7 @@ def language_to_rime_language(language: Language) -> str: @dataclass class RimeTTSSettings(TTSSettings): - """Typed settings for Rime WS JSON and HTTP TTS services. + """Settings for Rime WS JSON and HTTP TTS services. Parameters: speaker: Voice speaker ID. @@ -101,7 +101,7 @@ class RimeTTSSettings(TTSSettings): @dataclass class RimeNonJsonTTSSettings(TTSSettings): - """Typed settings for Rime non-JSON WS TTS service. + """Settings for Rime non-JSON WS TTS service. Parameters: speaker: Voice speaker ID. @@ -271,10 +271,10 @@ class RimeTTSService(AudioContextWordTTSService): self._extra_msg_fields["inlineSpeedAlpha"] = ",".join(speed_vals + [str(speed)]) return f"[{text}]" - async def _update_settings_from_typed(self, update: TTSSettings) -> set[str]: - """Apply a typed settings update and reconnect if voice changed.""" + async def _update_settings(self, update: TTSSettings) -> set[str]: + """Apply a settings update and reconnect if voice changed.""" prev_voice = self._voice_id - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) if "voice" in changed: self._settings.speaker = self._voice_id await self._disconnect() @@ -975,13 +975,13 @@ class RimeNonJsonTTSService(InterruptibleTTSService): except Exception as e: yield ErrorFrame(error=f"Unknown error occurred: {e}") - async def _update_settings_from_typed(self, update: TTSSettings) -> set[str]: - """Apply a typed settings update and reconnect if necessary. + async def _update_settings(self, update: TTSSettings) -> set[str]: + """Apply a settings update and reconnect if necessary. Since all settings are WebSocket URL query parameters, any setting change requires reconnecting to apply the new values. """ - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) # Sync voice and model to settings dict fields if "voice" in changed: diff --git a/src/pipecat/services/sarvam/stt.py b/src/pipecat/services/sarvam/stt.py index 834171b32..68427fbc4 100644 --- a/src/pipecat/services/sarvam/stt.py +++ b/src/pipecat/services/sarvam/stt.py @@ -133,7 +133,7 @@ MODEL_CONFIGS: Dict[str, ModelConfig] = { @dataclass class SarvamSTTSettings(STTSettings): - """Typed settings for the Sarvam STT service. + """Settings for the Sarvam STT service. Parameters: prompt: Optional prompt to guide transcription/translation style. @@ -306,8 +306,8 @@ class SarvamSTTService(STTService): if self._socket_client: await self._socket_client.flush() - async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: - """Apply a typed settings update, validate, sync state, and reconnect. + async def _update_settings(self, update: STTSettings) -> set[str]: + """Apply a settings update, validate, sync state, and reconnect. Args: update: A :class:`STTSettings` (or ``SarvamSTTSettings``) delta. @@ -336,7 +336,7 @@ class SarvamSTTService(STTService): if not self._config.supports_mode: raise ValueError(f"Model '{self.model_name}' does not support mode parameter.") - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) if not changed: return changed diff --git a/src/pipecat/services/sarvam/tts.py b/src/pipecat/services/sarvam/tts.py index ec51a85d5..56ce3bef0 100644 --- a/src/pipecat/services/sarvam/tts.py +++ b/src/pipecat/services/sarvam/tts.py @@ -247,7 +247,7 @@ def language_to_sarvam_language(language: Language) -> Optional[str]: @dataclass class SarvamHttpTTSSettings(TTSSettings): - """Typed settings for Sarvam HTTP TTS service. + """Settings for Sarvam HTTP TTS service. Parameters: language: Sarvam language code. @@ -277,7 +277,7 @@ class SarvamHttpTTSSettings(TTSSettings): @dataclass class SarvamWSTTSSettings(TTSSettings): - """Typed settings for Sarvam WebSocket TTS service. + """Settings for Sarvam WebSocket TTS service. Parameters: target_language_code: Sarvam language code. @@ -953,9 +953,9 @@ class SarvamTTSService(InterruptibleTTSService): if isinstance(frame, (LLMFullResponseEndFrame, EndFrame)): await self.flush_audio() - async def _update_settings_from_typed(self, update: TTSSettings) -> set[str]: - """Apply a typed settings update and resend config if voice changed.""" - changed = await super()._update_settings_from_typed(update) + async def _update_settings(self, update: TTSSettings) -> set[str]: + """Apply a settings update and resend config if voice changed.""" + changed = await super()._update_settings(update) if "voice" in changed: await self._send_config() return changed diff --git a/src/pipecat/services/settings.py b/src/pipecat/services/settings.py index 718996984..d63e1f539 100644 --- a/src/pipecat/services/settings.py +++ b/src/pipecat/services/settings.py @@ -4,13 +4,12 @@ # SPDX-License-Identifier: BSD 2-Clause License # -"""Typed settings infrastructure for Pipecat AI services. +"""Settings infrastructure for Pipecat AI services. -This module provides typed dataclass-based settings objects that replace the -stringly-typed ``Mapping[str, Any]`` dictionaries previously used for service -configuration. Each service type has a corresponding settings class (e.g. -``TTSSettings``, ``LLMSettings``) whose fields use the ``NOT_GIVEN`` sentinel -to distinguish "leave unchanged" from an explicit ``None``. +This module provides dataclass-based settings objects for service configuration. +Each service type has a corresponding settings class (e.g. ``TTSSettings``, +``LLMSettings``) whose fields use the ``NOT_GIVEN`` sentinel to distinguish +"leave unchanged" from an explicit ``None``. Key concepts: @@ -21,7 +20,7 @@ Key concepts: ``NOT_GIVEN`` are simply skipped when applying an update. - **apply_update**: Applies a delta onto a target settings object and returns the set of field names that actually changed. -- **from_mapping**: Constructs a typed settings object from a plain dict, +- **from_mapping**: Constructs a settings object from a plain dict, supporting field aliases (e.g. ``"voice_id"`` → ``"voice"``). - **Extras**: Unknown keys land in the ``extra`` dict so services that have non-standard settings don't lose data. @@ -91,7 +90,7 @@ _S = TypeVar("_S", bound="ServiceSettings") @dataclass class ServiceSettings: - """Base class for typed service settings. + """Base class for service settings. Every AI service type (LLM, TTS, STT) extends this with its own fields. Fields default to ``NOT_GIVEN`` so that an instance can represent either @@ -188,7 +187,10 @@ class ServiceSettings: @classmethod def from_mapping(cls: Type[_S], settings: Mapping[str, Any]) -> _S: - """Construct a typed settings object from a plain dictionary. + """Construct a settings object from a plain dictionary. + + This exists for backward compatibility with code that passes plain + dicts via ``*UpdateSettingsFrame(settings={...})``. Keys are matched to dataclass fields by name. Keys listed in ``_aliases`` are translated to their canonical name first. Any @@ -250,7 +252,7 @@ class ServiceSettings: @dataclass class LLMSettings(ServiceSettings): - """Typed settings for LLM services. + """Settings for LLM services. Parameters: model: LLM model identifier. @@ -285,7 +287,7 @@ class LLMSettings(ServiceSettings): @dataclass class TTSSettings(ServiceSettings): - """Typed settings for TTS services. + """Settings for TTS services. Parameters: model: TTS model identifier. @@ -301,7 +303,7 @@ class TTSSettings(ServiceSettings): @dataclass class STTSettings(ServiceSettings): - """Typed settings for STT services. + """Settings for STT services. Parameters: model: STT model identifier. diff --git a/src/pipecat/services/soniox/stt.py b/src/pipecat/services/soniox/stt.py index c3d9638c1..a74a14ee9 100644 --- a/src/pipecat/services/soniox/stt.py +++ b/src/pipecat/services/soniox/stt.py @@ -138,7 +138,7 @@ def _prepare_language_hints( @dataclass class SonioxSTTSettings(STTSettings): - """Typed settings for Soniox STT service. + """Settings for Soniox STT service. Parameters: input_params: Soniox ``SonioxInputParams`` for detailed configuration. @@ -217,8 +217,8 @@ class SonioxSTTService(WebsocketSTTService): await super().start(frame) await self._connect() - async def _update_settings_from_typed(self, update: SonioxSTTSettings) -> set[str]: - """Apply a typed settings update, keeping ``input_params`` in sync. + async def _update_settings(self, update: SonioxSTTSettings) -> set[str]: + """Apply a settings update, keeping ``input_params`` in sync. Top-level ``model`` is the source of truth. When it is given in *update* its value is propagated into ``input_params``. When only @@ -228,14 +228,14 @@ class SonioxSTTService(WebsocketSTTService): Any change triggers a WebSocket reconnect. Args: - update: A typed settings delta. + update: A settings delta. Returns: Set of field names whose values actually changed. """ model_given = is_given(getattr(update, "model", NOT_GIVEN)) - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) if not changed: return changed diff --git a/src/pipecat/services/speechmatics/stt.py b/src/pipecat/services/speechmatics/stt.py index 462c2fc6f..5bacc208a 100644 --- a/src/pipecat/services/speechmatics/stt.py +++ b/src/pipecat/services/speechmatics/stt.py @@ -85,7 +85,7 @@ class TurnDetectionMode(str, Enum): @dataclass class SpeechmaticsSTTSettings(STTSettings): - """Typed settings for Speechmatics STT service. + """Settings for Speechmatics STT service. See ``SpeechmaticsSTTService.InputParams`` for detailed descriptions of each field. @@ -415,7 +415,7 @@ class SpeechmaticsSTTService(STTService): ) speaker_passive_format = params.speaker_passive_format or speaker_active_format - # Typed settings — seeded from InputParams + # Settings — seeded from InputParams self._settings = SpeechmaticsSTTSettings( language=params.language, domain=params.domain, @@ -480,8 +480,8 @@ class SpeechmaticsSTTService(STTService): await super().start(frame) await self._connect() - async def _update_settings_from_typed(self, update: SpeechmaticsSTTSettings) -> set[str]: - """Apply typed settings update, reconnecting only when necessary. + async def _update_settings(self, update: SpeechmaticsSTTSettings) -> set[str]: + """Apply settings update, reconnecting only when necessary. Fields are classified into three categories (see ``SpeechmaticsSTTSettings``): @@ -494,12 +494,12 @@ class SpeechmaticsSTTService(STTService): time and therefore require a full disconnect / reconnect. Args: - update: A typed settings delta. + update: A settings delta. Returns: Set of field names whose values actually changed. """ - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) if not changed: return changed diff --git a/src/pipecat/services/stt_service.py b/src/pipecat/services/stt_service.py index 3ce143d92..e92095297 100644 --- a/src/pipecat/services/stt_service.py +++ b/src/pipecat/services/stt_service.py @@ -12,7 +12,7 @@ import time import warnings import wave from abc import abstractmethod -from typing import Any, AsyncGenerator, Dict, Mapping, Optional +from typing import Any, AsyncGenerator, Optional from loguru import logger from websockets.protocol import State @@ -35,7 +35,7 @@ from pipecat.frames.frames import ( from pipecat.metrics.metrics import TTFBMetricsData from pipecat.processors.frame_processor import FrameDirection from pipecat.services.ai_service import AIService -from pipecat.services.settings import ServiceSettings, STTSettings +from pipecat.services.settings import STTSettings from pipecat.services.stt_latency import DEFAULT_TTFS_P99 from pipecat.services.websocket_service import WebsocketService from pipecat.transcriptions.language import Language @@ -183,11 +183,8 @@ class STTService(AIService): stacklevel=2, ) logger.info(f"Switching STT model to: [{model}]") - if isinstance(self._settings, ServiceSettings): - settings_cls = type(self._settings) - await self._update_settings_from_typed(settings_cls(model=model)) - else: - self.set_model_name(model) + settings_cls = type(self._settings) + await self._update_settings(settings_cls(model=model)) async def set_language(self, language: Language): """Set the language for speech recognition. @@ -206,11 +203,8 @@ class STTService(AIService): stacklevel=2, ) logger.info(f"Switching STT language to: [{language}]") - if isinstance(self._settings, ServiceSettings): - settings_cls = type(self._settings) - await self._update_settings_from_typed(settings_cls(language=language)) - else: - pass + settings_cls = type(self._settings) + await self._update_settings(settings_cls(language=language)) @abstractmethod async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]: @@ -242,23 +236,8 @@ class STTService(AIService): await super().cleanup() await self._cancel_ttfb_timeout() - async def _update_settings(self, settings: Mapping[str, Any]): - logger.info(f"Updating STT settings: {self._settings}") - for key, value in settings.items(): - if key in self._settings: - logger.info(f"Updating STT setting {key} to: [{value}]") - self._settings[key] = value - if key == "language": - await self.set_language(value) - elif key == "language": - await self.set_language(value) - elif key == "model": - self.set_model_name(value) - else: - logger.warning(f"Unknown setting for STT service: {key}") - - async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: - """Apply a typed STT settings update. + async def _update_settings(self, update: STTSettings) -> set[str]: + """Apply an STT settings update. Handles ``model`` (via parent). Does **not** call ``set_language`` — concrete services should override this method and handle language @@ -266,12 +245,12 @@ class STTService(AIService): changed-field set. Args: - update: A typed STT settings delta. + update: An STT settings delta. Returns: Set of field names whose values actually changed. """ - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) return changed async def process_audio_frame(self, frame: AudioRawFrame, direction: FrameDirection): @@ -335,16 +314,12 @@ class STTService(AIService): await self._handle_vad_user_stopped_speaking(frame) await self.push_frame(frame, direction) elif isinstance(frame, STTUpdateSettingsFrame): - # New path: typed settings update object. if frame.update is not None: - await self._update_settings_from_typed(frame.update) - # Legacy path: plain dict, but service uses typed settings — convert. - elif isinstance(self._settings, ServiceSettings): + await self._update_settings(frame.update) + elif frame.settings: + # Backward-compatible path: convert legacy dict to settings object. update = type(self._settings).from_mapping(frame.settings) - await self._update_settings_from_typed(update) - # Legacy path: plain dict, service still uses dict-based settings. - else: - await self._update_settings(frame.settings) + await self._update_settings(update) elif isinstance(frame, STTMuteFrame): self._muted = frame.mute logger.debug(f"STT service {'muted' if frame.mute else 'unmuted'}") diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index b16ebdb24..a696e538d 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -19,7 +19,6 @@ from typing import ( Callable, Dict, List, - Mapping, Optional, Sequence, Tuple, @@ -53,7 +52,7 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.ai_service import AIService -from pipecat.services.settings import ServiceSettings, TTSSettings, is_given +from pipecat.services.settings import TTSSettings, is_given from pipecat.services.websocket_service import WebsocketService from pipecat.transcriptions.language import Language from pipecat.utils.text.base_text_aggregator import BaseTextAggregator @@ -280,11 +279,8 @@ class TTSService(AIService): stacklevel=2, ) logger.info(f"Switching TTS model to: [{model}]") - if isinstance(self._settings, ServiceSettings): - settings_cls = type(self._settings) - await self._update_settings_from_typed(settings_cls(model=model)) - else: - self.set_model_name(model) + settings_cls = type(self._settings) + await self._update_settings(settings_cls(model=model)) async def set_voice(self, voice: str): """Set the voice for speech synthesis. @@ -303,11 +299,8 @@ class TTSService(AIService): stacklevel=2, ) logger.info(f"Switching TTS voice to: [{voice}]") - if isinstance(self._settings, ServiceSettings): - settings_cls = type(self._settings) - await self._update_settings_from_typed(settings_cls(voice=voice)) - else: - self._voice_id = voice + settings_cls = type(self._settings) + await self._update_settings(settings_cls(voice=voice)) def create_context_id(self) -> str: """Generate a unique context ID for a TTS request. @@ -439,25 +432,8 @@ class TTSService(AIService): if not (agg_type == aggregation_type and func == transform_function) ] - async def _update_settings(self, settings: Mapping[str, Any]): - for key, value in settings.items(): - if key in self._settings: - logger.info(f"Updating TTS setting {key} to: [{value}]") - self._settings[key] = value - if key == "language": - self._settings[key] = self.language_to_service_language(value) - elif key == "model": - self.set_model_name(value) - elif key == "voice" or key == "voice_id": - self._voice_id = value - elif key == "text_filter": - for filter in self._text_filters: - await filter.update_settings(value) - else: - logger.warning(f"Unknown setting for TTS service: {key}") - - async def _update_settings_from_typed(self, update: TTSSettings) -> set[str]: - """Apply a typed TTS settings update. + async def _update_settings(self, update: TTSSettings) -> set[str]: + """Apply a TTS settings update. Handles ``model`` (via parent) and syncs ``_voice_id`` when voice changes. Translates language values before applying. Does **not** @@ -466,7 +442,7 @@ class TTSService(AIService): returned changed-field set. Args: - update: A typed TTS settings delta. + update: A TTS settings delta. Returns: Set of field names whose values actually changed. @@ -477,10 +453,10 @@ class TTSService(AIService): if converted is not None: update.language = converted - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) # Keep _voice_id in sync for code that reads it directly - if "voice" in changed and isinstance(self._settings, TTSSettings): + if "voice" in changed: self._voice_id = self._settings.voice return changed @@ -566,16 +542,12 @@ class TTSService(AIService): await self.flush_audio() self._processing_text = processing_text elif isinstance(frame, TTSUpdateSettingsFrame): - # New path: typed settings update object. if frame.update is not None: - await self._update_settings_from_typed(frame.update) - # Legacy path: plain dict, but service uses typed settings — convert. - elif isinstance(self._settings, ServiceSettings): + await self._update_settings(frame.update) + elif frame.settings: + # Backward-compatible path: convert legacy dict to settings object. update = type(self._settings).from_mapping(frame.settings) - await self._update_settings_from_typed(update) - # Legacy path: plain dict, service still uses dict-based settings. - else: - await self._update_settings(frame.settings) + await self._update_settings(update) elif isinstance(frame, BotStoppedSpeakingFrame): await self._maybe_resume_frame_processing() await self.push_frame(frame, direction) diff --git a/src/pipecat/services/ultravox/llm.py b/src/pipecat/services/ultravox/llm.py index 6f5e5d2ee..434a54ab6 100644 --- a/src/pipecat/services/ultravox/llm.py +++ b/src/pipecat/services/ultravox/llm.py @@ -325,8 +325,8 @@ class UltravoxRealtimeLLMService(LLMService): await self.cancel_task(self._receive_task, timeout=1.0) self._receive_task = None - async def _update_settings_from_typed(self, update: UltravoxRealtimeLLMSettings): - changed = await super()._update_settings_from_typed(update) + async def _update_settings(self, update: UltravoxRealtimeLLMSettings): + changed = await super()._update_settings(update) if "output_medium" in changed: await self._update_output_medium(self._settings.output_medium) return changed diff --git a/src/pipecat/services/whisper/base_stt.py b/src/pipecat/services/whisper/base_stt.py index 6c35824a4..6ff85efeb 100644 --- a/src/pipecat/services/whisper/base_stt.py +++ b/src/pipecat/services/whisper/base_stt.py @@ -28,7 +28,7 @@ from pipecat.utils.tracing.service_decorators import traced_stt @dataclass class BaseWhisperSTTSettings(STTSettings): - """Typed settings for Whisper API-based STT services. + """Settings for Whisper API-based STT services. Parameters: base_url: API base URL. @@ -174,13 +174,13 @@ class BaseWhisperSTTService(SegmentedSTTService): def _create_client(self, api_key: Optional[str], base_url: Optional[str]): return AsyncOpenAI(api_key=api_key, base_url=base_url) - async def _update_settings_from_typed(self, update: STTSettings) -> set[str]: - """Apply a typed settings update, syncing instance variables. + async def _update_settings(self, update: STTSettings) -> set[str]: + """Apply a settings update, syncing instance variables. Keeps ``_language``, ``_prompt``, and ``_temperature`` in sync with - the typed settings fields. + the settings fields. """ - changed = await super()._update_settings_from_typed(update) + changed = await super()._update_settings(update) if "language" in changed: self._language = self.language_to_service_language(Language(self._settings.language)) diff --git a/src/pipecat/services/whisper/stt.py b/src/pipecat/services/whisper/stt.py index d5f4c3f1b..a96c26992 100644 --- a/src/pipecat/services/whisper/stt.py +++ b/src/pipecat/services/whisper/stt.py @@ -176,7 +176,7 @@ def language_to_whisper_language(language: Language) -> Optional[str]: @dataclass class WhisperSTTSettings(STTSettings): - """Typed settings for the local Whisper (Faster Whisper) STT service. + """Settings for the local Whisper (Faster Whisper) STT service. Parameters: device: Inference device ('cpu', 'cuda', or 'auto'). @@ -191,7 +191,7 @@ class WhisperSTTSettings(STTSettings): @dataclass class WhisperMLXSTTSettings(STTSettings): - """Typed settings for the MLX Whisper STT service. + """Settings for the MLX Whisper STT service. Parameters: no_speech_prob: Probability threshold for filtering non-speech segments. diff --git a/src/pipecat/services/xtts/tts.py b/src/pipecat/services/xtts/tts.py index 4415f9f53..3ba332138 100644 --- a/src/pipecat/services/xtts/tts.py +++ b/src/pipecat/services/xtts/tts.py @@ -72,7 +72,7 @@ def language_to_xtts_language(language: Language) -> Optional[str]: @dataclass class XTTSTTSSettings(TTSSettings): - """Typed settings for XTTS TTS service. + """Settings for XTTS TTS service. Parameters: base_url: Base URL of the XTTS streaming server. From 66b7b4a5d4232ba6a38be38c6247648cc5db99d6 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Fri, 13 Feb 2026 16:04:49 -0500 Subject: [PATCH 007/189] Update COMMUNITY_INTEGRATIONS.md for the new dataclass-based service settings pattern. --- COMMUNITY_INTEGRATIONS.md | 54 +++++++++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/COMMUNITY_INTEGRATIONS.md b/COMMUNITY_INTEGRATIONS.md index a26836a52..c169cb5ab 100644 --- a/COMMUNITY_INTEGRATIONS.md +++ b/COMMUNITY_INTEGRATIONS.md @@ -235,22 +235,54 @@ def can_generate_metrics(self) -> bool: ### Dynamic Settings Updates -STT, LLM, and TTS services support `ServiceUpdateSettingsFrame` for dynamic configuration changes. The base STTService has an `_update_settings()` method that handles settings, and the private `_settings` `Dict` is used to store settings and provide access to the subclass. +STT, LLM, and TTS services support runtime configuration changes via `*UpdateSettingsFrame`s (e.g. `STTUpdateSettingsFrame`, `TTSUpdateSettingsFrame`, `LLMUpdateSettingsFrame`). + +Each service declares a settings dataclass that extends the appropriate base (`STTSettings`, `TTSSettings`, `LLMSettings`). Fields default to `NOT_GIVEN` so that update objects can represent sparse deltas: ```python -async def set_language(self, language: Language): - """Set the recognition language and reconnect. +from dataclasses import dataclass, field - Args: - language: The language to use for speech recognition. +from pipecat.services.settings import STTSettings, NOT_GIVEN + +@dataclass +class MySTTSettings(STTSettings): + """Settings for my STT service. + + Parameters: + region: Cloud region for the service. """ - logger.info(f"Switching STT language to: [{language}]") - self._settings["language"] = language - await self._disconnect() - await self._connect() + + region: str = field(default_factory=lambda: NOT_GIVEN) ``` -Note that, in this example, Deepgram requires the websocket connection be disconnected and reconnected to reinitialize the service with the new value. Consider if your service requires reconnection. +The service stores its current settings in `self._settings` and declares the type with a class-level annotation for editor support: + +```python +class MySTTService(STTService): + + _settings: MySTTSettings + + def __init__(self, *, model: str, region: str, **kwargs): + super().__init__(**kwargs) + self._settings = MySTTSettings(model=model, region=region) +``` + +To react to runtime setting changes, override `_update_settings`. The base implementation applies the delta to `self._settings` and returns the set of field names that changed. Your override should call `super()` first, then act on the changed fields: + +```python +async def _update_settings(self, update: STTSettings) -> set[str]: + """Apply a settings update, reconfiguring the recognizer if needed.""" + changed = await super()._update_settings(update) + + if "language" in changed: + # Restart the recognizer with the new language. + await self._disconnect() + await self._connect() + + return changed +``` + +Note that, in this example, the service requires a reconnect to apply the new language. Consider whether your service requires reconnection or can apply changes in-place. ### Sample Rate Handling @@ -260,7 +292,7 @@ Sample rates are set via PipelineParams and passed to each frame processor at in async def start(self, frame: StartFrame): """Start the service.""" await super().start(frame) - self._settings["output_format"]["sample_rate"] = self.sample_rate + self._settings.output_sample_rate = self.sample_rate await self._connect() ``` From 3a77b4c1d8af4090fe35328f45fea1c432302869 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 17 Feb 2026 09:35:58 -0500 Subject: [PATCH 008/189] =?UTF-8?q?In=20services=20that=20don't=20handle?= =?UTF-8?q?=20runtime=20settings=20updates=E2=80=94or=20don't=20handle=20t?= =?UTF-8?q?hem=20for=20*all*=20available=20settings=E2=80=94log=20a=20warn?= =?UTF-8?q?ing=20about=20which=20fields=20specifically=20aren't=20handled.?= =?UTF-8?q?=20Revert=20new=20apply-settings-updates=20logic=20across=20var?= =?UTF-8?q?ious=20services,=20to=20reduce=20PR=20testing=20scope.=20This?= =?UTF-8?q?=20logic=20can=20be=20added=20service=20by=20service=20graduall?= =?UTF-8?q?y=20as=20future=20work.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Note that for services that previously handled applying updates (through methods like `set_model` and `set_language`), we're keeping the update-applying logic (some or most of which is already well-tested) and expanding it to cover all relevant settings fields. Services under this bucket are: - Deepgram STT - Deepgram Sagemaker STT - Elevenlabs STT - Google STT - Gradium STT - OpenAI STT - Speechmatics STT --- src/pipecat/services/ai_service.py | 14 +++++++++++ src/pipecat/services/assemblyai/stt.py | 24 ++++++++++--------- src/pipecat/services/aws/nova_sonic/llm.py | 24 +++++++++++++++++++ src/pipecat/services/aws/stt.py | 18 +++++++++----- src/pipecat/services/azure/stt.py | 21 +++++++++------- src/pipecat/services/cartesia/stt.py | 14 +++++++---- src/pipecat/services/cartesia/tts.py | 19 +++++++++++++++ src/pipecat/services/deepgram/flux/stt.py | 20 ++++++++++++++++ src/pipecat/services/elevenlabs/tts.py | 6 +++++ src/pipecat/services/gladia/stt.py | 16 +++++++------ .../services/google/gemini_live/llm.py | 19 +++++++++++++++ src/pipecat/services/gradium/tts.py | 2 ++ src/pipecat/services/inworld/tts.py | 19 +++++++++++++++ src/pipecat/services/playht/tts.py | 19 +++++++++++++++ src/pipecat/services/rime/tts.py | 2 ++ src/pipecat/services/sarvam/stt.py | 13 ++++++---- src/pipecat/services/sarvam/tts.py | 1 + src/pipecat/services/soniox/stt.py | 10 +++++--- src/pipecat/services/ultravox/llm.py | 1 + 19 files changed, 218 insertions(+), 44 deletions(-) diff --git a/src/pipecat/services/ai_service.py b/src/pipecat/services/ai_service.py index 2c6c10be4..64af3b4b2 100644 --- a/src/pipecat/services/ai_service.py +++ b/src/pipecat/services/ai_service.py @@ -123,6 +123,20 @@ class AIService(FrameProcessor): return changed + def _warn_unhandled_updated_settings(self, unhandled: Set[str]): + """Log a warning for settings changes that won't take effect at runtime. + + Convenience helper for ``_update_settings`` overrides. Call with the + set of field names that changed but that the service does not (yet) + apply at runtime. + + Args: + unhandled: Field names that changed but are not applied. + """ + if unhandled: + fields = ", ".join(sorted(unhandled)) + logger.warning(f"{self.name}: runtime update of [{fields}] is not currently supported") + async def process_frame(self, frame: Frame, direction: FrameDirection): """Process frames and handle service lifecycle. diff --git a/src/pipecat/services/assemblyai/stt.py b/src/pipecat/services/assemblyai/stt.py index 2e7b1230b..a291e6903 100644 --- a/src/pipecat/services/assemblyai/stt.py +++ b/src/pipecat/services/assemblyai/stt.py @@ -185,10 +185,9 @@ class AssemblyAISTTService(WebsocketSTTService): return True async def _update_settings(self, update: STTSettings) -> set[str]: - """Apply a settings update and reconnect if anything changed. + """Apply a settings update. - Any change triggers a WebSocket reconnect since all connection - parameters are encoded in the WebSocket URL. + Settings are stored but not applied to the active connection. Args: update: A :class:`STTSettings` (or ``AssemblyAISTTSettings``) delta. @@ -201,15 +200,18 @@ class AssemblyAISTTService(WebsocketSTTService): if not changed: return changed - # Re-apply manual turn mode config if vad_force_turn_endpoint is active - # and connection_params were updated. - if self._vad_force_turn_endpoint and "connection_params" in changed: - self._settings.connection_params = self._configure_manual_turn_mode( - self._settings.connection_params - ) + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # # Re-apply manual turn mode config if vad_force_turn_endpoint is active + # # and connection_params were updated. + # if self._vad_force_turn_endpoint and "connection_params" in changed: + # self._settings.connection_params = self._configure_manual_turn_mode( + # self._settings.connection_params + # ) + # await self._disconnect() + # await self._connect() - await self._disconnect() - await self._connect() + self._warn_unhandled_updated_settings(changed) return changed diff --git a/src/pipecat/services/aws/nova_sonic/llm.py b/src/pipecat/services/aws/nova_sonic/llm.py index 05baba2bd..92bd0ea1f 100644 --- a/src/pipecat/services/aws/nova_sonic/llm.py +++ b/src/pipecat/services/aws/nova_sonic/llm.py @@ -60,6 +60,7 @@ from pipecat.processors.aggregators.openai_llm_context import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import LLMService +from pipecat.services.settings import LLMSettings from pipecat.utils.time import time_now_iso8601 try: @@ -302,6 +303,29 @@ class AWSNovaSonicLLMService(LLMService): with wave.open(file_path.open("rb"), "rb") as wav_file: self._assistant_response_trigger_audio = wav_file.readframes(wav_file.getnframes()) + # + # settings + # + + async def _update_settings(self, update: LLMSettings) -> set[str]: + """Apply a settings update. + + Settings are stored but not applied to the active connection. + """ + changed = await super()._update_settings(update) + + if not changed: + return changed + + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # await self._disconnect() + # await self._connect() + + self._warn_unhandled_updated_settings(changed) + + return changed + # # standard AIService frame handling # diff --git a/src/pipecat/services/aws/stt.py b/src/pipecat/services/aws/stt.py index 6a91c2973..dda58e2ba 100644 --- a/src/pipecat/services/aws/stt.py +++ b/src/pipecat/services/aws/stt.py @@ -141,16 +141,22 @@ class AWSTranscribeSTTService(WebsocketSTTService): return encoding_map.get(encoding, encoding) async def _update_settings(self, update: STTSettings) -> set[str]: - """Apply a settings update, reconnecting if needed. + """Apply a settings update. - Any change to connection-relevant settings (model, language, etc.) - triggers a WebSocket reconnect so the new configuration takes effect. + Settings are stored but not applied to the active connection. """ changed = await super()._update_settings(update) - if changed and self._websocket: - await self._disconnect() - await self._connect() + if not changed: + return changed + + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # if changed and self._websocket: + # await self._disconnect() + # await self._connect() + + self._warn_unhandled_updated_settings(changed) return changed diff --git a/src/pipecat/services/azure/stt.py b/src/pipecat/services/azure/stt.py index 319296a47..a9ecd67e7 100644 --- a/src/pipecat/services/azure/stt.py +++ b/src/pipecat/services/azure/stt.py @@ -124,25 +124,28 @@ class AzureSTTService(STTService): return True async def _update_settings(self, update: STTSettings) -> set[str]: - """Apply a settings update, reconfiguring the recognizer if needed. + """Apply a settings update. - When ``language`` changes the ``SpeechConfig`` is updated and the - speech recognizer is restarted so that the new language takes effect. + Settings are stored but not applied to the active recognizer. """ changed = await super()._update_settings(update) if "language" in changed: - # Convert Language enum to Azure language code if needed. + # Convert Language enum to Azure language code for consistency. lang = self._settings.language if isinstance(lang, Language): lang = language_to_azure_language(lang) self._settings.language = lang - self._speech_config.speech_recognition_language = lang - # Restart the recognizer with the new config. - if self._speech_recognizer: - self._speech_recognizer.stop_continuous_recognition_async() - self._speech_recognizer.start_continuous_recognition_async() + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # if "language" in changed: + # self._speech_config.speech_recognition_language = self._settings.language + # if self._speech_recognizer: + # self._speech_recognizer.stop_continuous_recognition_async() + # self._speech_recognizer.start_continuous_recognition_async() + + self._warn_unhandled_updated_settings(changed) return changed diff --git a/src/pipecat/services/cartesia/stt.py b/src/pipecat/services/cartesia/stt.py index 5116965ec..a069dface 100644 --- a/src/pipecat/services/cartesia/stt.py +++ b/src/pipecat/services/cartesia/stt.py @@ -295,7 +295,7 @@ class CartesiaSTTService(WebsocketSTTService): await self._disconnect_websocket() async def _update_settings(self, update: STTSettings) -> set[str]: - """Apply a settings update and reconnect if anything changed. + """Apply a settings update. Args: update: A :class:`STTSettings` (or ``CartesiaSTTSettings``) delta. @@ -304,9 +304,15 @@ class CartesiaSTTService(WebsocketSTTService): Set of field names whose values actually changed. """ changed = await super()._update_settings(update) - if changed: - await self._disconnect() - await self._connect() + + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # if changed: + # await self._disconnect() + # await self._connect() + + self._warn_unhandled_updated_settings(changed) + return changed async def _connect_websocket(self): diff --git a/src/pipecat/services/cartesia/tts.py b/src/pipecat/services/cartesia/tts.py index cff365443..00620fdb0 100644 --- a/src/pipecat/services/cartesia/tts.py +++ b/src/pipecat/services/cartesia/tts.py @@ -344,6 +344,25 @@ class CartesiaTTSService(AudioContextWordTTSService): """ return True + async def _update_settings(self, update: TTSSettings) -> set[str]: + """Apply a settings update. + + Settings are stored but not applied to the active connection. + """ + changed = await super()._update_settings(update) + + if not changed: + return changed + + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # await self._disconnect() + # await self._connect() + + self._warn_unhandled_updated_settings(changed) + + return changed + def language_to_service_language(self, language: Language) -> Optional[str]: """Convert a Language enum to Cartesia language format. diff --git a/src/pipecat/services/deepgram/flux/stt.py b/src/pipecat/services/deepgram/flux/stt.py index 547eca0de..14e77c2d3 100644 --- a/src/pipecat/services/deepgram/flux/stt.py +++ b/src/pipecat/services/deepgram/flux/stt.py @@ -27,6 +27,7 @@ from pipecat.frames.frames import ( UserStartedSpeakingFrame, UserStoppedSpeakingFrame, ) +from pipecat.services.settings import STTSettings from pipecat.services.stt_service import WebsocketSTTService from pipecat.transcriptions.language import Language from pipecat.utils.time import time_now_iso8601 @@ -329,6 +330,25 @@ class DeepgramFluxSTTService(WebsocketSTTService): """ return True + async def _update_settings(self, update: STTSettings) -> set[str]: + """Apply a settings update. + + Settings are stored but not applied to the active connection. + """ + changed = await super()._update_settings(update) + + if not changed: + return changed + + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # await self._disconnect() + # await self._connect() + + self._warn_unhandled_updated_settings(changed) + + return changed + async def start(self, frame: StartFrame): """Start the Deepgram Flux STT service. diff --git a/src/pipecat/services/elevenlabs/tts.py b/src/pipecat/services/elevenlabs/tts.py index 9643fa6ba..79f05bbf8 100644 --- a/src/pipecat/services/elevenlabs/tts.py +++ b/src/pipecat/services/elevenlabs/tts.py @@ -516,6 +516,12 @@ class ElevenLabsTTSService(AudioContextWordTTSService): await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e) self._context_id = None + if not url_changed: + # Reconnect applies all settings; only warn about fields not handled + # by voice settings or URL changes. + handled = ElevenLabsTTSSettings.URL_FIELDS | ElevenLabsTTSSettings.VOICE_SETTINGS_FIELDS + self._warn_unhandled_updated_settings(changed - handled) + return changed async def start(self, frame: StartFrame): diff --git a/src/pipecat/services/gladia/stt.py b/src/pipecat/services/gladia/stt.py index bb8f05e61..500b88052 100644 --- a/src/pipecat/services/gladia/stt.py +++ b/src/pipecat/services/gladia/stt.py @@ -382,8 +382,7 @@ class GladiaSTTService(WebsocketSTTService): async def _update_settings(self, update: GladiaSTTSettings) -> set[str]: """Apply settings update. - Gladia sessions are fixed at creation time, so any change requires - a full session teardown and reconnect. + Settings are stored but not applied to the active session. Args: update: A settings delta. @@ -396,11 +395,14 @@ class GladiaSTTService(WebsocketSTTService): if not changed: return changed - # Gladia sessions are fixed — need to tear down and recreate - self._session_url = None - self._session_id = None - await self._disconnect() - await self._connect() + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # self._session_url = None + # self._session_id = None + # await self._disconnect() + # await self._connect() + + self._warn_unhandled_updated_settings(changed) return changed diff --git a/src/pipecat/services/google/gemini_live/llm.py b/src/pipecat/services/google/gemini_live/llm.py index 7a7aed08c..ecca52396 100644 --- a/src/pipecat/services/google/gemini_live/llm.py +++ b/src/pipecat/services/google/gemini_live/llm.py @@ -804,6 +804,25 @@ class GeminiLiveLLMService(LLMService): """ return True + async def _update_settings(self, update: LLMSettings) -> set[str]: + """Apply a settings update. + + Settings are stored but not applied to the active connection. + """ + changed = await super()._update_settings(update) + + if not changed: + return changed + + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # await self._disconnect() + # await self._connect() + + self._warn_unhandled_updated_settings(changed) + + return changed + def set_audio_input_paused(self, paused: bool): """Set the audio input pause state. diff --git a/src/pipecat/services/gradium/tts.py b/src/pipecat/services/gradium/tts.py index bc4945bcf..947404baa 100644 --- a/src/pipecat/services/gradium/tts.py +++ b/src/pipecat/services/gradium/tts.py @@ -133,6 +133,8 @@ class GradiumTTSService(InterruptibleWordTTSService): if self._voice_id != prev_voice: await self._disconnect() await self._connect() + else: + self._warn_unhandled_updated_settings(changed) return changed def _build_msg(self, text: str = "") -> dict: diff --git a/src/pipecat/services/inworld/tts.py b/src/pipecat/services/inworld/tts.py index 2f6a13bd1..80d95aef9 100644 --- a/src/pipecat/services/inworld/tts.py +++ b/src/pipecat/services/inworld/tts.py @@ -165,6 +165,25 @@ class InworldHttpTTSService(WordTTSService): """ return True + async def _update_settings(self, update: TTSSettings) -> set[str]: + """Apply a settings update. + + Settings are stored but not applied to the active connection. + """ + changed = await super()._update_settings(update) + + if not changed: + return changed + + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # await self._disconnect() + # await self._connect() + + self._warn_unhandled_updated_settings(changed) + + return changed + async def start(self, frame: StartFrame): """Start the Inworld TTS service. diff --git a/src/pipecat/services/playht/tts.py b/src/pipecat/services/playht/tts.py index b63e5d648..ece3f2c17 100644 --- a/src/pipecat/services/playht/tts.py +++ b/src/pipecat/services/playht/tts.py @@ -215,6 +215,25 @@ class PlayHTTTSService(InterruptibleTTSService): """ return True + async def _update_settings(self, update: TTSSettings) -> set[str]: + """Apply a settings update. + + Settings are stored but not applied to the active connection. + """ + changed = await super()._update_settings(update) + + if not changed: + return changed + + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # await self._disconnect() + # await self._connect() + + self._warn_unhandled_updated_settings(changed) + + return changed + def language_to_service_language(self, language: Language) -> Optional[str]: """Convert a Language enum to PlayHT service language format. diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index 5f8a5cef6..e87cf9f4f 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -279,6 +279,8 @@ class RimeTTSService(AudioContextWordTTSService): self._settings.speaker = self._voice_id await self._disconnect() await self._connect() + else: + self._warn_unhandled_updated_settings(changed) return changed def _build_msg(self, text: str = "") -> dict: diff --git a/src/pipecat/services/sarvam/stt.py b/src/pipecat/services/sarvam/stt.py index 68427fbc4..271b12ffe 100644 --- a/src/pipecat/services/sarvam/stt.py +++ b/src/pipecat/services/sarvam/stt.py @@ -338,11 +338,16 @@ class SarvamSTTService(STTService): changed = await super()._update_settings(update) - if not changed: - return changed + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # if not changed: + # return changed + + # await self._disconnect() + # await self._connect() + + self._warn_unhandled_updated_settings(changed) - await self._disconnect() - await self._connect() return changed async def set_prompt(self, prompt: Optional[str]): diff --git a/src/pipecat/services/sarvam/tts.py b/src/pipecat/services/sarvam/tts.py index 56ce3bef0..6842eda35 100644 --- a/src/pipecat/services/sarvam/tts.py +++ b/src/pipecat/services/sarvam/tts.py @@ -958,6 +958,7 @@ class SarvamTTSService(InterruptibleTTSService): changed = await super()._update_settings(update) if "voice" in changed: await self._send_config() + self._warn_unhandled_updated_settings(changed - {"voice"}) return changed async def _connect(self): diff --git a/src/pipecat/services/soniox/stt.py b/src/pipecat/services/soniox/stt.py index a74a14ee9..29ca33ad5 100644 --- a/src/pipecat/services/soniox/stt.py +++ b/src/pipecat/services/soniox/stt.py @@ -225,7 +225,7 @@ class SonioxSTTService(WebsocketSTTService): ``input_params`` is given, its ``model`` is propagated *up* to the top-level field. - Any change triggers a WebSocket reconnect. + Settings are stored but not applied to the active connection. Args: update: A settings delta. @@ -249,8 +249,12 @@ class SonioxSTTService(WebsocketSTTService): self._settings.model = self._settings.input_params.model self.set_model_name(self._settings.model) - await self._disconnect() - await self._connect() + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # await self._disconnect() + # await self._connect() + + self._warn_unhandled_updated_settings(changed) return changed diff --git a/src/pipecat/services/ultravox/llm.py b/src/pipecat/services/ultravox/llm.py index 434a54ab6..07dc107eb 100644 --- a/src/pipecat/services/ultravox/llm.py +++ b/src/pipecat/services/ultravox/llm.py @@ -329,6 +329,7 @@ class UltravoxRealtimeLLMService(LLMService): changed = await super()._update_settings(update) if "output_medium" in changed: await self._update_output_medium(self._settings.output_medium) + self._warn_unhandled_updated_settings(changed - {"output_medium"}) return changed # From fa6a6dabee584138a9744b190bb8d7321c5c751c Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 17 Feb 2026 11:02:13 -0500 Subject: [PATCH 009/189] Fix `DeepgramSageMakerSTTService._update_settings` live_options sync to match `DeepgramSTTService` pattern. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add missing reverse sync (live_options → top-level model/language) and `set_model_name()` call. --- .../services/deepgram/stt_sagemaker.py | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/pipecat/services/deepgram/stt_sagemaker.py b/src/pipecat/services/deepgram/stt_sagemaker.py index e503592d7..08acc6530 100644 --- a/src/pipecat/services/deepgram/stt_sagemaker.py +++ b/src/pipecat/services/deepgram/stt_sagemaker.py @@ -167,10 +167,13 @@ class DeepgramSageMakerSTTService(STTService): """Apply a settings update, keeping ``live_options`` in sync. Top-level ``model`` and ``language`` are the source of truth. When - they change their values are propagated into ``live_options``. + they are given in *update* their values are propagated into + ``live_options``. When only ``live_options`` is given, its ``model`` + and ``language`` are propagated *up* to the top-level fields. Any change triggers a reconnect. """ + # Determine which top-level fields are explicitly provided. model_given = isinstance(update, DeepgramSageMakerSTTSettings) and is_given( getattr(update, "model", NOT_GIVEN) ) @@ -183,16 +186,23 @@ class DeepgramSageMakerSTTService(STTService): if not changed: return changed - # Sync model into live_options - if model_given and "model" in changed: + # --- Sync model -------------------------------------------------- + if model_given: + # Top-level model wins → push into live_options. self._settings.live_options.model = self._settings.model + elif "live_options" in changed and self._settings.live_options.model is not None: + # Only live_options was given → pull model up. + self._settings.model = self._settings.live_options.model + self.set_model_name(self._settings.model) - # Sync language into live_options - if language_given and "language" in changed: + # --- Sync language ----------------------------------------------- + if language_given: lang = self._settings.language if isinstance(lang, Language): lang = lang.value self._settings.live_options.language = lang + elif "live_options" in changed and self._settings.live_options.language is not None: + self._settings.language = self._settings.live_options.language await self._disconnect() await self._connect() From 02c2778b8d77a5b9f5877db05395a2159cf13f4a Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 17 Feb 2026 11:07:27 -0500 Subject: [PATCH 010/189] Document `_warn_unhandled_updated_settings` pattern in COMMUNITY_INTEGRATIONS.md. --- COMMUNITY_INTEGRATIONS.md | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/COMMUNITY_INTEGRATIONS.md b/COMMUNITY_INTEGRATIONS.md index c169cb5ab..a7cfa6103 100644 --- a/COMMUNITY_INTEGRATIONS.md +++ b/COMMUNITY_INTEGRATIONS.md @@ -282,7 +282,22 @@ async def _update_settings(self, update: STTSettings) -> set[str]: return changed ``` -Note that, in this example, the service requires a reconnect to apply the new language. Consider whether your service requires reconnection or can apply changes in-place. +Note that, in this example, the service requires a reconnect to apply the new language. Consider, for each setting, whether your service requires reconnection or can apply changes in-place. + +If your service can't yet apply certain settings at runtime, call `self._warn_unhandled_updated_settings(changed)` with the set of unhandled field names so users get a clear log message: + +```python +async def _update_settings(self, update: STTSettings) -> set[str]: + changed = await super()._update_settings(update) + + if not changed: + return changed + + # TODO: someday we could reconnect here to apply updated settings. + self._warn_unhandled_updated_settings(changed) + + return changed +``` ### Sample Rate Handling From 3b1ba57452111c69afbbab023fb59c5bd0cb9594 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 17 Feb 2026 11:49:15 -0500 Subject: [PATCH 011/189] Change `apply_update` / `_update_settings` return type from `set[str]` to `dict[str, Any]`. The dict maps each changed field name to its pre-update value, enabling services to do granular diffing of complex settings objects. Existing call-site patterns (`"field" in changed`, `if changed`, iteration) work unchanged; set-difference sites use `changed.keys() - {...}`. --- COMMUNITY_INTEGRATIONS.md | 10 +++++--- src/pipecat/services/ai_service.py | 24 +++++++++--------- src/pipecat/services/assemblyai/stt.py | 4 +-- src/pipecat/services/aws/nova_sonic/llm.py | 2 +- src/pipecat/services/aws/stt.py | 4 +-- src/pipecat/services/azure/stt.py | 4 +-- src/pipecat/services/cartesia/stt.py | 6 ++--- src/pipecat/services/cartesia/tts.py | 2 +- src/pipecat/services/deepgram/flux/stt.py | 2 +- src/pipecat/services/deepgram/stt.py | 4 +-- .../services/deepgram/stt_sagemaker.py | 4 +-- src/pipecat/services/elevenlabs/stt.py | 10 ++++---- src/pipecat/services/elevenlabs/tts.py | 10 ++++---- src/pipecat/services/fal/stt.py | 4 +-- src/pipecat/services/fish/tts.py | 6 ++--- src/pipecat/services/gladia/stt.py | 4 +-- .../services/google/gemini_live/llm.py | 2 +- src/pipecat/services/google/stt.py | 4 +-- src/pipecat/services/google/tts.py | 10 ++++---- src/pipecat/services/gradium/stt.py | 6 ++--- src/pipecat/services/gradium/tts.py | 6 ++--- src/pipecat/services/inworld/tts.py | 2 +- src/pipecat/services/llm_service.py | 4 +-- src/pipecat/services/neuphonic/tts.py | 4 +-- src/pipecat/services/nvidia/stt.py | 6 ++--- src/pipecat/services/openai/stt.py | 6 ++--- src/pipecat/services/playht/tts.py | 4 +-- src/pipecat/services/rime/tts.py | 6 ++--- src/pipecat/services/sarvam/stt.py | 6 ++--- src/pipecat/services/sarvam/tts.py | 6 ++--- src/pipecat/services/settings.py | 20 ++++++++------- src/pipecat/services/soniox/stt.py | 4 +-- src/pipecat/services/speechmatics/stt.py | 6 ++--- src/pipecat/services/stt_service.py | 6 ++--- src/pipecat/services/tts_service.py | 6 ++--- src/pipecat/services/ultravox/llm.py | 2 +- src/pipecat/services/whisper/base_stt.py | 4 +-- tests/test_settings.py | 25 +++++++++++++------ 38 files changed, 129 insertions(+), 116 deletions(-) diff --git a/COMMUNITY_INTEGRATIONS.md b/COMMUNITY_INTEGRATIONS.md index a7cfa6103..f52e30b58 100644 --- a/COMMUNITY_INTEGRATIONS.md +++ b/COMMUNITY_INTEGRATIONS.md @@ -267,10 +267,10 @@ class MySTTService(STTService): self._settings = MySTTSettings(model=model, region=region) ``` -To react to runtime setting changes, override `_update_settings`. The base implementation applies the delta to `self._settings` and returns the set of field names that changed. Your override should call `super()` first, then act on the changed fields: +To react to runtime setting changes, override `_update_settings`. The base implementation applies the delta to `self._settings` and returns a `dict` mapping each changed field name to its **pre-update** value. Your override should call `super()` first, then act on the changed fields: ```python -async def _update_settings(self, update: STTSettings) -> set[str]: +async def _update_settings(self, update: STTSettings) -> dict[str, Any]: """Apply a settings update, reconfiguring the recognizer if needed.""" changed = await super()._update_settings(update) @@ -282,12 +282,14 @@ async def _update_settings(self, update: STTSettings) -> set[str]: return changed ``` +The dict keys work like a set for membership tests (`"language" in changed`) and truthiness (`if changed`). Use `changed.keys() - {"language"}` for set difference, or `changed["language"]` to inspect the previous value of a field. + Note that, in this example, the service requires a reconnect to apply the new language. Consider, for each setting, whether your service requires reconnection or can apply changes in-place. -If your service can't yet apply certain settings at runtime, call `self._warn_unhandled_updated_settings(changed)` with the set of unhandled field names so users get a clear log message: +If your service can't yet apply certain settings at runtime, call `self._warn_unhandled_updated_settings(changed)` with the unhandled field names so users get a clear log message: ```python -async def _update_settings(self, update: STTSettings) -> set[str]: +async def _update_settings(self, update: STTSettings) -> dict[str, Any]: changed = await super()._update_settings(update) if not changed: diff --git a/src/pipecat/services/ai_service.py b/src/pipecat/services/ai_service.py index 64af3b4b2..ec78549c2 100644 --- a/src/pipecat/services/ai_service.py +++ b/src/pipecat/services/ai_service.py @@ -10,7 +10,7 @@ Provides the foundation for all AI services in the Pipecat framework, including model management, settings handling, and frame processing lifecycle methods. """ -from typing import Any, AsyncGenerator, Dict, Set +from typing import Any, AsyncGenerator, Dict from loguru import logger @@ -97,12 +97,13 @@ class AIService(FrameProcessor): """ pass - async def _update_settings(self, update: ServiceSettings) -> Set[str]: - """Apply a settings update and return the set of changed field names. + async def _update_settings(self, update: ServiceSettings) -> Dict[str, Any]: + """Apply a settings update and return the changed fields. - The update is applied to ``_settings`` and the changed-field set is - returned. The ``model`` field is handled specially: when it changes, - ``set_model_name`` is called. + The update is applied to ``_settings`` and a dict mapping each changed + field name to its **pre-update** value is returned. The ``model`` + field is handled specially: when it changes, ``set_model_name`` is + called. Concrete services should override this method (calling ``super()``) to react to specific changed fields (e.g. reconnect on voice change). @@ -111,7 +112,7 @@ class AIService(FrameProcessor): update: A settings delta. Returns: - Set of field names whose values actually changed. + Dict mapping changed field names to their previous values. """ changed = self._settings.apply_update(update) @@ -119,16 +120,15 @@ class AIService(FrameProcessor): self.set_model_name(self._settings.model) if changed: - logger.info(f"{self.name}: updated settings fields: {changed}") + logger.info(f"{self.name}: updated settings fields: {set(changed)}") return changed - def _warn_unhandled_updated_settings(self, unhandled: Set[str]): + def _warn_unhandled_updated_settings(self, unhandled): """Log a warning for settings changes that won't take effect at runtime. - Convenience helper for ``_update_settings`` overrides. Call with the - set of field names that changed but that the service does not (yet) - apply at runtime. + Convenience helper for ``_update_settings`` overrides. Accepts any + iterable of field names (a ``dict``, ``set``, ``dict_keys``, etc.). Args: unhandled: Field names that changed but are not applied. diff --git a/src/pipecat/services/assemblyai/stt.py b/src/pipecat/services/assemblyai/stt.py index a291e6903..23b7d149b 100644 --- a/src/pipecat/services/assemblyai/stt.py +++ b/src/pipecat/services/assemblyai/stt.py @@ -184,7 +184,7 @@ class AssemblyAISTTService(WebsocketSTTService): """ return True - async def _update_settings(self, update: STTSettings) -> set[str]: + async def _update_settings(self, update: STTSettings) -> dict[str, Any]: """Apply a settings update. Settings are stored but not applied to the active connection. @@ -193,7 +193,7 @@ class AssemblyAISTTService(WebsocketSTTService): update: A :class:`STTSettings` (or ``AssemblyAISTTSettings``) delta. Returns: - Set of field names whose values actually changed. + Dict mapping changed field names to their previous values. """ changed = await super()._update_settings(update) diff --git a/src/pipecat/services/aws/nova_sonic/llm.py b/src/pipecat/services/aws/nova_sonic/llm.py index 92bd0ea1f..91c2374e3 100644 --- a/src/pipecat/services/aws/nova_sonic/llm.py +++ b/src/pipecat/services/aws/nova_sonic/llm.py @@ -307,7 +307,7 @@ class AWSNovaSonicLLMService(LLMService): # settings # - async def _update_settings(self, update: LLMSettings) -> set[str]: + async def _update_settings(self, update: LLMSettings) -> dict[str, Any]: """Apply a settings update. Settings are stored but not applied to the active connection. diff --git a/src/pipecat/services/aws/stt.py b/src/pipecat/services/aws/stt.py index dda58e2ba..ae502e8be 100644 --- a/src/pipecat/services/aws/stt.py +++ b/src/pipecat/services/aws/stt.py @@ -15,7 +15,7 @@ import os import random import string from dataclasses import dataclass, field -from typing import AsyncGenerator, Optional +from typing import Any, AsyncGenerator, Optional from loguru import logger @@ -140,7 +140,7 @@ class AWSTranscribeSTTService(WebsocketSTTService): } return encoding_map.get(encoding, encoding) - async def _update_settings(self, update: STTSettings) -> set[str]: + async def _update_settings(self, update: STTSettings) -> dict[str, Any]: """Apply a settings update. Settings are stored but not applied to the active connection. diff --git a/src/pipecat/services/azure/stt.py b/src/pipecat/services/azure/stt.py index a9ecd67e7..8a5b09e26 100644 --- a/src/pipecat/services/azure/stt.py +++ b/src/pipecat/services/azure/stt.py @@ -12,7 +12,7 @@ Speech SDK for real-time audio transcription. import asyncio from dataclasses import dataclass, field -from typing import AsyncGenerator, Optional +from typing import Any, AsyncGenerator, Optional from loguru import logger @@ -123,7 +123,7 @@ class AzureSTTService(STTService): """ return True - async def _update_settings(self, update: STTSettings) -> set[str]: + async def _update_settings(self, update: STTSettings) -> dict[str, Any]: """Apply a settings update. Settings are stored but not applied to the active recognizer. diff --git a/src/pipecat/services/cartesia/stt.py b/src/pipecat/services/cartesia/stt.py index a069dface..6629d05bb 100644 --- a/src/pipecat/services/cartesia/stt.py +++ b/src/pipecat/services/cartesia/stt.py @@ -13,7 +13,7 @@ the Cartesia Live transcription API for real-time speech recognition. import json import urllib.parse from dataclasses import dataclass, field -from typing import AsyncGenerator, Optional +from typing import Any, AsyncGenerator, Optional from loguru import logger @@ -294,14 +294,14 @@ class CartesiaSTTService(WebsocketSTTService): await self._disconnect_websocket() - async def _update_settings(self, update: STTSettings) -> set[str]: + async def _update_settings(self, update: STTSettings) -> dict[str, Any]: """Apply a settings update. Args: update: A :class:`STTSettings` (or ``CartesiaSTTSettings``) delta. Returns: - Set of field names whose values actually changed. + Dict mapping changed field names to their previous values. """ changed = await super()._update_settings(update) diff --git a/src/pipecat/services/cartesia/tts.py b/src/pipecat/services/cartesia/tts.py index 00620fdb0..2544d3b98 100644 --- a/src/pipecat/services/cartesia/tts.py +++ b/src/pipecat/services/cartesia/tts.py @@ -344,7 +344,7 @@ class CartesiaTTSService(AudioContextWordTTSService): """ return True - async def _update_settings(self, update: TTSSettings) -> set[str]: + async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: """Apply a settings update. Settings are stored but not applied to the active connection. diff --git a/src/pipecat/services/deepgram/flux/stt.py b/src/pipecat/services/deepgram/flux/stt.py index 14e77c2d3..dcb5e3429 100644 --- a/src/pipecat/services/deepgram/flux/stt.py +++ b/src/pipecat/services/deepgram/flux/stt.py @@ -330,7 +330,7 @@ class DeepgramFluxSTTService(WebsocketSTTService): """ return True - async def _update_settings(self, update: STTSettings) -> set[str]: + async def _update_settings(self, update: STTSettings) -> dict[str, Any]: """Apply a settings update. Settings are stored but not applied to the active connection. diff --git a/src/pipecat/services/deepgram/stt.py b/src/pipecat/services/deepgram/stt.py index 32759069b..f52932b2c 100644 --- a/src/pipecat/services/deepgram/stt.py +++ b/src/pipecat/services/deepgram/stt.py @@ -7,7 +7,7 @@ """Deepgram speech-to-text service implementation.""" from dataclasses import dataclass, field -from typing import AsyncGenerator, Dict, Optional +from typing import Any, AsyncGenerator, Dict, Optional from loguru import logger @@ -195,7 +195,7 @@ class DeepgramSTTService(STTService): """ return True - async def _update_settings(self, update: STTSettings) -> set[str]: + async def _update_settings(self, update: STTSettings) -> dict[str, Any]: """Apply a settings update, keeping ``live_options`` in sync. Top-level ``model`` and ``language`` are the source of truth. When diff --git a/src/pipecat/services/deepgram/stt_sagemaker.py b/src/pipecat/services/deepgram/stt_sagemaker.py index 08acc6530..870ded11f 100644 --- a/src/pipecat/services/deepgram/stt_sagemaker.py +++ b/src/pipecat/services/deepgram/stt_sagemaker.py @@ -15,7 +15,7 @@ languages, and various Deepgram features. import asyncio import json from dataclasses import dataclass, field -from typing import AsyncGenerator, Optional +from typing import Any, AsyncGenerator, Optional from loguru import logger @@ -163,7 +163,7 @@ class DeepgramSageMakerSTTService(STTService): """ return True - async def _update_settings(self, update: STTSettings) -> set[str]: + async def _update_settings(self, update: STTSettings) -> dict[str, Any]: """Apply a settings update, keeping ``live_options`` in sync. Top-level ``model`` and ``language`` are the source of truth. When diff --git a/src/pipecat/services/elevenlabs/stt.py b/src/pipecat/services/elevenlabs/stt.py index d2b7a8f99..fd938c12e 100644 --- a/src/pipecat/services/elevenlabs/stt.py +++ b/src/pipecat/services/elevenlabs/stt.py @@ -16,7 +16,7 @@ import io import json from dataclasses import dataclass, field from enum import Enum -from typing import AsyncGenerator, Optional +from typing import Any, AsyncGenerator, Optional import aiohttp from loguru import logger @@ -294,7 +294,7 @@ class ElevenLabsSTTService(SegmentedSTTService): """ return language_to_elevenlabs_language(language) - async def _update_settings(self, update: STTSettings) -> set[str]: + async def _update_settings(self, update: STTSettings) -> dict[str, Any]: """Apply a settings update. Converts language to ElevenLabs format before applying and keeps @@ -304,7 +304,7 @@ class ElevenLabsSTTService(SegmentedSTTService): update: A :class:`STTSettings` (or ``ElevenLabsSTTSettings``) delta. Returns: - Set of field names whose values actually changed. + Dict mapping changed field names to their previous values. """ # Convert language to ElevenLabs format before applying if is_given(update.language) and isinstance(update.language, Language): @@ -543,7 +543,7 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): """ return True - async def _update_settings(self, update: STTSettings) -> set[str]: + async def _update_settings(self, update: STTSettings) -> dict[str, Any]: """Apply a settings update and reconnect if anything changed. Converts language to ElevenLabs format before applying and keeps @@ -553,7 +553,7 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): update: A :class:`STTSettings` (or ``ElevenLabsRealtimeSTTSettings``) delta. Returns: - Set of field names whose values actually changed. + Dict mapping changed field names to their previous values. """ # Convert language to ElevenLabs format before applying if is_given(update.language) and isinstance(update.language, Language): diff --git a/src/pipecat/services/elevenlabs/tts.py b/src/pipecat/services/elevenlabs/tts.py index 79f05bbf8..022b08b94 100644 --- a/src/pipecat/services/elevenlabs/tts.py +++ b/src/pipecat/services/elevenlabs/tts.py @@ -471,7 +471,7 @@ class ElevenLabsTTSService(AudioContextWordTTSService): voice_settings[key] = val return voice_settings or None - async def _update_settings(self, update: TTSSettings) -> set[str]: + async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: """Apply a settings update, reconnecting as needed. Uses the declarative ``URL_FIELDS`` and ``VOICE_SETTINGS_FIELDS`` @@ -482,7 +482,7 @@ class ElevenLabsTTSService(AudioContextWordTTSService): update: A :class:`TTSSettings` (or ``ElevenLabsTTSSettings``) delta. Returns: - Set of field names whose values actually changed. + Dict mapping changed field names to their previous values. """ changed = await super()._update_settings(update) @@ -520,7 +520,7 @@ class ElevenLabsTTSService(AudioContextWordTTSService): # Reconnect applies all settings; only warn about fields not handled # by voice settings or URL changes. handled = ElevenLabsTTSSettings.URL_FIELDS | ElevenLabsTTSSettings.VOICE_SETTINGS_FIELDS - self._warn_unhandled_updated_settings(changed - handled) + self._warn_unhandled_updated_settings(changed.keys() - handled) return changed @@ -964,14 +964,14 @@ class ElevenLabsHttpTTSService(WordTTSService): def _set_voice_settings(self): return build_elevenlabs_voice_settings(self._settings) - async def _update_settings(self, update: TTSSettings) -> set[str]: + async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: """Apply a settings update and rebuild voice settings. Args: update: A :class:`TTSSettings` (or ``ElevenLabsHttpTTSSettings``) delta. Returns: - Set of field names whose values actually changed. + Dict mapping changed field names to their previous values. """ changed = await super()._update_settings(update) if changed: diff --git a/src/pipecat/services/fal/stt.py b/src/pipecat/services/fal/stt.py index ff6628f6c..28b611865 100644 --- a/src/pipecat/services/fal/stt.py +++ b/src/pipecat/services/fal/stt.py @@ -12,7 +12,7 @@ transcription using segmented audio processing. import os from dataclasses import dataclass, field -from typing import AsyncGenerator, Optional +from typing import Any, AsyncGenerator, Optional from loguru import logger from pydantic import BaseModel @@ -251,7 +251,7 @@ class FalSTTService(SegmentedSTTService): """ return language_to_fal_language(language) - async def _update_settings(self, update: STTSettings) -> set[str]: + async def _update_settings(self, update: STTSettings) -> dict[str, Any]: """Apply a settings update, converting language if changed.""" changed = await super()._update_settings(update) diff --git a/src/pipecat/services/fish/tts.py b/src/pipecat/services/fish/tts.py index daa884af8..4da4b6673 100644 --- a/src/pipecat/services/fish/tts.py +++ b/src/pipecat/services/fish/tts.py @@ -12,7 +12,7 @@ for streaming text-to-speech synthesis with customizable voice parameters. import uuid from dataclasses import dataclass, field -from typing import AsyncGenerator, Literal, Optional +from typing import Any, AsyncGenerator, Literal, Optional from loguru import logger from pydantic import BaseModel @@ -184,7 +184,7 @@ class FishAudioTTSService(InterruptibleTTSService): """ return True - async def _update_settings(self, update: TTSSettings) -> set[str]: + async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: """Apply a settings update and reconnect if needed. Any change to voice or model triggers a WebSocket reconnect. @@ -193,7 +193,7 @@ class FishAudioTTSService(InterruptibleTTSService): update: A :class:`TTSSettings` (or ``FishAudioTTSSettings``) delta. Returns: - Set of field names whose values actually changed. + Dict mapping changed field names to their previous values. """ changed = await super()._update_settings(update) if changed: diff --git a/src/pipecat/services/gladia/stt.py b/src/pipecat/services/gladia/stt.py index 500b88052..25922e7aa 100644 --- a/src/pipecat/services/gladia/stt.py +++ b/src/pipecat/services/gladia/stt.py @@ -379,7 +379,7 @@ class GladiaSTTService(WebsocketSTTService): await super().start(frame) await self._connect() - async def _update_settings(self, update: GladiaSTTSettings) -> set[str]: + async def _update_settings(self, update: GladiaSTTSettings) -> dict[str, Any]: """Apply settings update. Settings are stored but not applied to the active session. @@ -388,7 +388,7 @@ class GladiaSTTService(WebsocketSTTService): update: A settings delta. Returns: - Set of field names whose values actually changed. + Dict mapping changed field names to their previous values. """ changed = await super()._update_settings(update) diff --git a/src/pipecat/services/google/gemini_live/llm.py b/src/pipecat/services/google/gemini_live/llm.py index ecca52396..3047e258d 100644 --- a/src/pipecat/services/google/gemini_live/llm.py +++ b/src/pipecat/services/google/gemini_live/llm.py @@ -804,7 +804,7 @@ class GeminiLiveLLMService(LLMService): """ return True - async def _update_settings(self, update: LLMSettings) -> set[str]: + async def _update_settings(self, update: LLMSettings) -> dict[str, Any]: """Apply a settings update. Settings are stored but not applied to the active connection. diff --git a/src/pipecat/services/google/stt.py b/src/pipecat/services/google/stt.py index d4ffb0d91..cdd583c8e 100644 --- a/src/pipecat/services/google/stt.py +++ b/src/pipecat/services/google/stt.py @@ -630,7 +630,7 @@ class GoogleSTTService(STTService): logger.debug(f"Switching STT languages to: {languages}") await self._update_settings(GoogleSTTSettings(languages=list(languages))) - async def _update_settings(self, update: GoogleSTTSettings) -> set[str]: + async def _update_settings(self, update: GoogleSTTSettings) -> dict[str, Any]: """Apply settings update and reconnect if anything changed. Handles ``language`` from base ``set_language`` by converting it to @@ -642,7 +642,7 @@ class GoogleSTTService(STTService): update: A settings delta. Returns: - Set of field names whose values actually changed. + Dict mapping changed field names to their previous values. """ from pipecat.services.settings import is_given diff --git a/src/pipecat/services/google/tts.py b/src/pipecat/services/google/tts.py index 9769aa665..e47aa384a 100644 --- a/src/pipecat/services/google/tts.py +++ b/src/pipecat/services/google/tts.py @@ -24,7 +24,7 @@ from pipecat.utils.tracing.service_decorators import traced_tts os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "false" from dataclasses import dataclass, field -from typing import AsyncGenerator, List, Literal, Optional +from typing import Any, AsyncGenerator, List, Literal, Optional from loguru import logger from pydantic import BaseModel @@ -680,7 +680,7 @@ class GoogleHttpTTSService(TTSService): """ return language_to_google_tts_language(language) - async def _update_settings(self, update: TTSSettings) -> set[str]: + async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: """Override to handle speaking_rate validation. Args: @@ -1024,7 +1024,7 @@ class GoogleTTSService(GoogleBaseTTSService): credentials, credentials_path ) - async def _update_settings(self, update: TTSSettings) -> set[str]: + async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: """Override to handle speaking_rate validation. Args: @@ -1259,14 +1259,14 @@ class GeminiTTSService(GoogleBaseTTSService): f"Current rate of {self.sample_rate}Hz may cause issues." ) - async def _update_settings(self, update: TTSSettings) -> set[str]: + async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: """Apply a settings update with voice validation. Args: update: Settings delta. Can include 'voice', 'prompt', etc. Returns: - Set of field names whose values actually changed. + Dict mapping changed field names to their previous values. """ if is_given(update.voice) and update.voice not in self.AVAILABLE_VOICES: logger.warning(f"Voice '{update.voice}' not in known voices list. Using anyway.") diff --git a/src/pipecat/services/gradium/stt.py b/src/pipecat/services/gradium/stt.py index 3b634cbd2..381f76884 100644 --- a/src/pipecat/services/gradium/stt.py +++ b/src/pipecat/services/gradium/stt.py @@ -13,7 +13,7 @@ WebSocket API for streaming audio transcription. import base64 import json from dataclasses import dataclass, field -from typing import AsyncGenerator, Optional +from typing import Any, AsyncGenerator, Optional from loguru import logger from pydantic import BaseModel @@ -171,14 +171,14 @@ class GradiumSTTService(WebsocketSTTService): """ return True - async def _update_settings(self, update: STTSettings) -> set[str]: + async def _update_settings(self, update: STTSettings) -> dict[str, Any]: """Apply a settings update, sync params, and reconnect. Args: update: A :class:`STTSettings` (or ``GradiumSTTSettings``) delta. Returns: - Set of field names whose values actually changed. + Dict mapping changed field names to their previous values. """ changed = await super()._update_settings(update) if not changed: diff --git a/src/pipecat/services/gradium/tts.py b/src/pipecat/services/gradium/tts.py index 947404baa..3bffbb5bf 100644 --- a/src/pipecat/services/gradium/tts.py +++ b/src/pipecat/services/gradium/tts.py @@ -7,7 +7,7 @@ import base64 import json from dataclasses import dataclass, field -from typing import AsyncGenerator, Optional +from typing import Any, AsyncGenerator, Optional from loguru import logger from pydantic import BaseModel @@ -119,14 +119,14 @@ class GradiumTTSService(InterruptibleWordTTSService): """ return True - async def _update_settings(self, update: TTSSettings) -> set[str]: + async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: """Apply a settings update and reconnect if voice changed. Args: update: A :class:`TTSSettings` (or ``GradiumTTSSettings``) delta. Returns: - Set of field names whose values actually changed. + Dict mapping changed field names to their previous values. """ prev_voice = self._voice_id changed = await super()._update_settings(update) diff --git a/src/pipecat/services/inworld/tts.py b/src/pipecat/services/inworld/tts.py index 80d95aef9..c291f3156 100644 --- a/src/pipecat/services/inworld/tts.py +++ b/src/pipecat/services/inworld/tts.py @@ -165,7 +165,7 @@ class InworldHttpTTSService(WordTTSService): """ return True - async def _update_settings(self, update: TTSSettings) -> set[str]: + async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: """Apply a settings update. Settings are stored but not applied to the active connection. diff --git a/src/pipecat/services/llm_service.py b/src/pipecat/services/llm_service.py index 97d49192c..860a472c9 100644 --- a/src/pipecat/services/llm_service.py +++ b/src/pipecat/services/llm_service.py @@ -313,14 +313,14 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): await self._cancel_sequential_runner_task() await self._cancel_summary_task() - async def _update_settings(self, update: LLMSettings) -> set[str]: + async def _update_settings(self, update: LLMSettings) -> dict[str, Any]: """Apply a settings update, handling turn-completion fields. Args: update: An LLM settings delta. Returns: - Set of field names whose values actually changed. + Dict mapping changed field names to their previous values. """ changed = await super()._update_settings(update) diff --git a/src/pipecat/services/neuphonic/tts.py b/src/pipecat/services/neuphonic/tts.py index 0680de4f6..0797f9b1b 100644 --- a/src/pipecat/services/neuphonic/tts.py +++ b/src/pipecat/services/neuphonic/tts.py @@ -14,7 +14,7 @@ import asyncio import base64 import json from dataclasses import dataclass, field -from typing import AsyncGenerator, Optional +from typing import Any, AsyncGenerator, Optional import aiohttp from loguru import logger @@ -181,7 +181,7 @@ class NeuphonicTTSService(InterruptibleTTSService): """ return language_to_neuphonic_lang_code(language) - async def _update_settings(self, update: TTSSettings) -> set[str]: + async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: """Apply a settings update and reconnect with new configuration.""" changed = await super()._update_settings(update) if changed: diff --git a/src/pipecat/services/nvidia/stt.py b/src/pipecat/services/nvidia/stt.py index ff76a6900..b0d11fc2b 100644 --- a/src/pipecat/services/nvidia/stt.py +++ b/src/pipecat/services/nvidia/stt.py @@ -9,7 +9,7 @@ import asyncio from concurrent.futures import CancelledError as FuturesCancelledError from dataclasses import dataclass, field -from typing import AsyncGenerator, List, Mapping, Optional +from typing import Any, AsyncGenerator, List, Mapping, Optional from loguru import logger from pydantic import BaseModel @@ -579,14 +579,14 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): self._config = self._create_recognition_config() logger.debug(f"Initialized NvidiaSegmentedSTTService with model: {self.model_name}") - async def _update_settings(self, update: STTSettings) -> set[str]: + async def _update_settings(self, update: STTSettings) -> dict[str, Any]: """Apply a settings update and sync internal state. Args: update: A :class:`STTSettings` (or ``NvidiaSegmentedSTTSettings``) delta. Returns: - Set of field names whose values actually changed. + Dict mapping changed field names to their previous values. """ changed = await super()._update_settings(update) diff --git a/src/pipecat/services/openai/stt.py b/src/pipecat/services/openai/stt.py index 458f40133..6daefd1da 100644 --- a/src/pipecat/services/openai/stt.py +++ b/src/pipecat/services/openai/stt.py @@ -17,7 +17,7 @@ Provides two STT services: import base64 import json from dataclasses import dataclass, field -from typing import AsyncGenerator, Literal, Optional, Union +from typing import Any, AsyncGenerator, Literal, Optional, Union from loguru import logger @@ -268,7 +268,7 @@ class OpenAIRealtimeSTTService(WebsocketSTTService): """ return True - async def _update_settings(self, update: STTSettings) -> set[str]: + async def _update_settings(self, update: STTSettings) -> dict[str, Any]: """Apply a settings update and send session update if needed. Keeps ``_language_code`` and ``_prompt`` in sync with settings @@ -278,7 +278,7 @@ class OpenAIRealtimeSTTService(WebsocketSTTService): update: A :class:`STTSettings` (or ``OpenAIRealtimeSTTSettings``) delta. Returns: - Set of field names whose values actually changed. + Dict mapping changed field names to their previous values. """ changed = await super()._update_settings(update) diff --git a/src/pipecat/services/playht/tts.py b/src/pipecat/services/playht/tts.py index ece3f2c17..1965c9ea3 100644 --- a/src/pipecat/services/playht/tts.py +++ b/src/pipecat/services/playht/tts.py @@ -15,7 +15,7 @@ import json import struct import warnings from dataclasses import dataclass, field -from typing import AsyncGenerator, Optional +from typing import Any, AsyncGenerator, Optional import aiohttp from loguru import logger @@ -215,7 +215,7 @@ class PlayHTTTSService(InterruptibleTTSService): """ return True - async def _update_settings(self, update: TTSSettings) -> set[str]: + async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: """Apply a settings update. Settings are stored but not applied to the active connection. diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index e87cf9f4f..9e916025c 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -13,7 +13,7 @@ using Rime's API for streaming and batch audio synthesis. import base64 import json from dataclasses import dataclass, field -from typing import AsyncGenerator, Optional +from typing import Any, AsyncGenerator, Optional import aiohttp from loguru import logger @@ -271,7 +271,7 @@ class RimeTTSService(AudioContextWordTTSService): self._extra_msg_fields["inlineSpeedAlpha"] = ",".join(speed_vals + [str(speed)]) return f"[{text}]" - async def _update_settings(self, update: TTSSettings) -> set[str]: + async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: """Apply a settings update and reconnect if voice changed.""" prev_voice = self._voice_id changed = await super()._update_settings(update) @@ -977,7 +977,7 @@ class RimeNonJsonTTSService(InterruptibleTTSService): except Exception as e: yield ErrorFrame(error=f"Unknown error occurred: {e}") - async def _update_settings(self, update: TTSSettings) -> set[str]: + async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: """Apply a settings update and reconnect if necessary. Since all settings are WebSocket URL query parameters, diff --git a/src/pipecat/services/sarvam/stt.py b/src/pipecat/services/sarvam/stt.py index 271b12ffe..80e6d6ca2 100644 --- a/src/pipecat/services/sarvam/stt.py +++ b/src/pipecat/services/sarvam/stt.py @@ -13,7 +13,7 @@ can handle multiple audio formats for Indian language speech recognition. import base64 from dataclasses import dataclass, field -from typing import AsyncGenerator, Dict, Literal, Optional +from typing import Any, AsyncGenerator, Dict, Literal, Optional from loguru import logger from pydantic import BaseModel @@ -306,14 +306,14 @@ class SarvamSTTService(STTService): if self._socket_client: await self._socket_client.flush() - async def _update_settings(self, update: STTSettings) -> set[str]: + async def _update_settings(self, update: STTSettings) -> dict[str, Any]: """Apply a settings update, validate, sync state, and reconnect. Args: update: A :class:`STTSettings` (or ``SarvamSTTSettings``) delta. Returns: - Set of field names whose values actually changed. + Dict mapping changed field names to their previous values. Raises: ValueError: If a setting is not supported by the current model. diff --git a/src/pipecat/services/sarvam/tts.py b/src/pipecat/services/sarvam/tts.py index 6842eda35..99a0827f5 100644 --- a/src/pipecat/services/sarvam/tts.py +++ b/src/pipecat/services/sarvam/tts.py @@ -42,7 +42,7 @@ import base64 import json from dataclasses import dataclass, field from enum import Enum -from typing import AsyncGenerator, Dict, List, Optional, Tuple +from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple import aiohttp from loguru import logger @@ -953,12 +953,12 @@ class SarvamTTSService(InterruptibleTTSService): if isinstance(frame, (LLMFullResponseEndFrame, EndFrame)): await self.flush_audio() - async def _update_settings(self, update: TTSSettings) -> set[str]: + async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: """Apply a settings update and resend config if voice changed.""" changed = await super()._update_settings(update) if "voice" in changed: await self._send_config() - self._warn_unhandled_updated_settings(changed - {"voice"}) + self._warn_unhandled_updated_settings(changed.keys() - {"voice"}) return changed async def _connect(self): diff --git a/src/pipecat/services/settings.py b/src/pipecat/services/settings.py index d63e1f539..eae120842 100644 --- a/src/pipecat/services/settings.py +++ b/src/pipecat/services/settings.py @@ -19,7 +19,7 @@ Key concepts: service's current settings *and* for update objects. Fields set to ``NOT_GIVEN`` are simply skipped when applying an update. - **apply_update**: Applies a delta onto a target settings object and returns - the set of field names that actually changed. + a dict mapping each changed field name to its previous value. - **from_mapping**: Constructs a settings object from a plain dict, supporting field aliases (e.g. ``"voice_id"`` → ``"voice"``). - **Extras**: Unknown keys land in the ``extra`` dict so services that have @@ -30,7 +30,7 @@ from __future__ import annotations import copy from dataclasses import dataclass, field, fields -from typing import TYPE_CHECKING, Any, ClassVar, Dict, Mapping, Optional, Set, Type, TypeVar +from typing import TYPE_CHECKING, Any, ClassVar, Dict, Mapping, Optional, Type, TypeVar from loguru import logger @@ -140,8 +140,8 @@ class ServiceSettings: result.update(self.extra) return result - def apply_update(self: _S, update: _S) -> Set[str]: - """Apply *update* onto this settings object, returning changed field names. + def apply_update(self: _S, update: _S) -> Dict[str, Any]: + """Apply *update* onto this settings object, returning changed fields. Only fields in *update* that are **given** (i.e. not ``NOT_GIVEN``) are considered. A field is "changed" if its new value differs from @@ -154,17 +154,19 @@ class ServiceSettings: update: A settings object of the same type containing the delta. Returns: - The set of field names whose values actually changed. + A dict mapping each changed field name to its **pre-update** value. + Use ``changed.keys()`` for the set of names, or index with + ``changed["field"]`` to inspect the old value. Examples:: current = TTSSettings(voice="alice", language="en") delta = TTSSettings(voice="bob") changed = current.apply_update(delta) - # changed == {"voice"} + # changed == {"voice": "alice"} # current.voice == "bob", current.language == "en" """ - changed: Set[str] = set() + changed: Dict[str, Any] = {} for f in fields(self): if f.name == "extra": continue @@ -174,14 +176,14 @@ class ServiceSettings: old_val = getattr(self, f.name) if old_val != new_val: setattr(self, f.name, new_val) - changed.add(f.name) + changed[f.name] = old_val # Merge extra for key, new_val in update.extra.items(): old_val = self.extra.get(key, NOT_GIVEN) if old_val != new_val: self.extra[key] = new_val - changed.add(key) + changed[key] = old_val return changed diff --git a/src/pipecat/services/soniox/stt.py b/src/pipecat/services/soniox/stt.py index 29ca33ad5..5c4b49cbe 100644 --- a/src/pipecat/services/soniox/stt.py +++ b/src/pipecat/services/soniox/stt.py @@ -217,7 +217,7 @@ class SonioxSTTService(WebsocketSTTService): await super().start(frame) await self._connect() - async def _update_settings(self, update: SonioxSTTSettings) -> set[str]: + async def _update_settings(self, update: SonioxSTTSettings) -> dict[str, Any]: """Apply a settings update, keeping ``input_params`` in sync. Top-level ``model`` is the source of truth. When it is given in @@ -231,7 +231,7 @@ class SonioxSTTService(WebsocketSTTService): update: A settings delta. Returns: - Set of field names whose values actually changed. + Dict mapping changed field names to their previous values. """ model_given = is_given(getattr(update, "model", NOT_GIVEN)) diff --git a/src/pipecat/services/speechmatics/stt.py b/src/pipecat/services/speechmatics/stt.py index 5bacc208a..c6fe0d16e 100644 --- a/src/pipecat/services/speechmatics/stt.py +++ b/src/pipecat/services/speechmatics/stt.py @@ -480,7 +480,7 @@ class SpeechmaticsSTTService(STTService): await super().start(frame) await self._connect() - async def _update_settings(self, update: SpeechmaticsSTTSettings) -> set[str]: + async def _update_settings(self, update: SpeechmaticsSTTSettings) -> dict[str, Any]: """Apply settings update, reconnecting only when necessary. Fields are classified into three categories (see @@ -497,7 +497,7 @@ class SpeechmaticsSTTService(STTService): update: A settings delta. Returns: - Set of field names whose values actually changed. + Dict mapping changed field names to their previous values. """ changed = await super()._update_settings(update) @@ -505,7 +505,7 @@ class SpeechmaticsSTTService(STTService): return changed no_reconnect = SpeechmaticsSTTSettings.HOT_FIELDS | SpeechmaticsSTTSettings.LOCAL_FIELDS - needs_reconnect = bool(changed - no_reconnect) + needs_reconnect = bool(changed.keys() - no_reconnect) if needs_reconnect: # Connection-level fields changed — rebuild the SDK config diff --git a/src/pipecat/services/stt_service.py b/src/pipecat/services/stt_service.py index e92095297..d6dd31824 100644 --- a/src/pipecat/services/stt_service.py +++ b/src/pipecat/services/stt_service.py @@ -236,19 +236,19 @@ class STTService(AIService): await super().cleanup() await self._cancel_ttfb_timeout() - async def _update_settings(self, update: STTSettings) -> set[str]: + async def _update_settings(self, update: STTSettings) -> dict[str, Any]: """Apply an STT settings update. Handles ``model`` (via parent). Does **not** call ``set_language`` — concrete services should override this method and handle language changes (including any reconnect logic) based on the returned - changed-field set. + changed-field dict. Args: update: An STT settings delta. Returns: - Set of field names whose values actually changed. + Dict mapping changed field names to their previous values. """ changed = await super()._update_settings(update) return changed diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index a696e538d..bb5cff69f 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -432,20 +432,20 @@ class TTSService(AIService): if not (agg_type == aggregation_type and func == transform_function) ] - async def _update_settings(self, update: TTSSettings) -> set[str]: + async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: """Apply a TTS settings update. Handles ``model`` (via parent) and syncs ``_voice_id`` when voice changes. Translates language values before applying. Does **not** call ``set_voice`` or ``set_model`` directly — concrete services should override this method and handle reconnect logic based on the - returned changed-field set. + returned changed-field dict. Args: update: A TTS settings delta. Returns: - Set of field names whose values actually changed. + Dict mapping changed field names to their previous values. """ # Translate language *before* applying so the stored value is canonical if is_given(update.language) and update.language is not None: diff --git a/src/pipecat/services/ultravox/llm.py b/src/pipecat/services/ultravox/llm.py index 07dc107eb..ef8baacb4 100644 --- a/src/pipecat/services/ultravox/llm.py +++ b/src/pipecat/services/ultravox/llm.py @@ -329,7 +329,7 @@ class UltravoxRealtimeLLMService(LLMService): changed = await super()._update_settings(update) if "output_medium" in changed: await self._update_output_medium(self._settings.output_medium) - self._warn_unhandled_updated_settings(changed - {"output_medium"}) + self._warn_unhandled_updated_settings(changed.keys() - {"output_medium"}) return changed # diff --git a/src/pipecat/services/whisper/base_stt.py b/src/pipecat/services/whisper/base_stt.py index 6ff85efeb..a67ad1cbc 100644 --- a/src/pipecat/services/whisper/base_stt.py +++ b/src/pipecat/services/whisper/base_stt.py @@ -11,7 +11,7 @@ interface, including language mapping, metrics generation, and error handling. """ from dataclasses import dataclass, field -from typing import AsyncGenerator, Optional +from typing import Any, AsyncGenerator, Optional from loguru import logger from openai import AsyncOpenAI @@ -174,7 +174,7 @@ class BaseWhisperSTTService(SegmentedSTTService): def _create_client(self, api_key: Optional[str], base_url: Optional[str]): return AsyncOpenAI(api_key=api_key, base_url=base_url) - async def _update_settings(self, update: STTSettings) -> set[str]: + async def _update_settings(self, update: STTSettings) -> dict[str, Any]: """Apply a settings update, syncing instance variables. Keeps ``_language``, ``_prompt``, and ``_temperature`` in sync with diff --git a/tests/test_settings.py b/tests/test_settings.py index 62583b00b..71d66fc35 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -100,7 +100,8 @@ class TestApplyUpdate: current = TTSSettings(voice="alice", language="en") delta = TTSSettings(voice="bob") changed = current.apply_update(delta) - assert changed == {"voice"} + assert changed.keys() == {"voice"} + assert changed["voice"] == "alice" # old value assert current.voice == "bob" assert current.language == "en" @@ -108,14 +109,14 @@ class TestApplyUpdate: current = TTSSettings(voice="alice", language="en") delta = TTSSettings(voice="alice") changed = current.apply_update(delta) - assert changed == set() + assert changed == {} assert current.voice == "alice" def test_apply_update_not_given_skipped(self): current = TTSSettings(voice="alice", language="en") delta = TTSSettings() # all NOT_GIVEN changed = current.apply_update(delta) - assert changed == set() + assert changed == {} assert current.voice == "alice" assert current.language == "en" @@ -123,7 +124,9 @@ class TestApplyUpdate: current = LLMSettings(temperature=0.7, max_tokens=100) delta = LLMSettings(temperature=0.9, max_tokens=200, top_p=0.95) changed = current.apply_update(delta) - assert changed == {"temperature", "max_tokens", "top_p"} + assert changed.keys() == {"temperature", "max_tokens", "top_p"} + assert changed["temperature"] == 0.7 + assert changed["max_tokens"] == 100 assert current.temperature == 0.9 assert current.max_tokens == 200 assert current.top_p == 0.95 @@ -135,6 +138,7 @@ class TestApplyUpdate: delta.extra = {"speed": 1.2} changed = current.apply_update(delta) assert "speed" in changed + assert changed["speed"] == 1.0 # old value assert current.extra == {"speed": 1.2, "stability": 0.5} def test_apply_update_extra_no_change(self): @@ -143,13 +147,14 @@ class TestApplyUpdate: delta = TTSSettings() delta.extra = {"speed": 1.0} changed = current.apply_update(delta) - assert changed == set() + assert changed == {} def test_apply_update_model_field(self): current = ServiceSettings(model="old-model") delta = ServiceSettings(model="new-model") changed = current.apply_update(delta) - assert changed == {"model"} + assert changed.keys() == {"model"} + assert changed["model"] == "old-model" assert current.model == "new-model" def test_apply_update_none_is_a_valid_value(self): @@ -165,6 +170,7 @@ class TestApplyUpdate: delta = TTSSettings(language="en") changed = current.apply_update(delta) assert "language" in changed + assert changed["language"] is None # old value was None assert current.language == "en" @@ -293,7 +299,9 @@ class TestRoundtrip: delta = TTSSettings.from_mapping(raw) changed = current.apply_update(delta) - assert changed == {"voice", "speed"} + assert changed.keys() == {"voice", "speed"} + assert changed["voice"] == "alice" + assert changed["speed"] == 1.0 assert current.voice == "bob" assert current.language == "en" assert current.extra["speed"] == 1.2 @@ -303,6 +311,7 @@ class TestRoundtrip: current = LLMSettings(model="gpt-4o", temperature=0.7) delta = LLMSettings.from_mapping({"model": "gpt-4o-mini", "temperature": 0.9}) changed = current.apply_update(delta) - assert changed == {"model", "temperature"} + assert changed.keys() == {"model", "temperature"} + assert changed["model"] == "gpt-4o" assert current.model == "gpt-4o-mini" assert current.temperature == 0.9 From d2372c127add23aa9f19832fe65401fea7328e17 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 17 Feb 2026 11:56:37 -0500 Subject: [PATCH 012/189] Add specific type annotations to `ServiceSettings` fields, replacing `Any` with `str`, `float`, `int` unions as appropriate. --- src/pipecat/services/settings.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/pipecat/services/settings.py b/src/pipecat/services/settings.py index eae120842..54a25124b 100644 --- a/src/pipecat/services/settings.py +++ b/src/pipecat/services/settings.py @@ -104,7 +104,7 @@ class ServiceSettings: # -- common fields ------------------------------------------------------- - model: Any = field(default_factory=lambda: NOT_GIVEN) + model: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) """AI model identifier (e.g. ``"gpt-4o"``, ``"eleven_turbo_v2_5"``).""" extra: Dict[str, Any] = field(default_factory=dict) @@ -274,13 +274,13 @@ class LLMSettings(ServiceSettings): and prompts for incomplete turns. """ - temperature: Any = field(default_factory=lambda: NOT_GIVEN) - max_tokens: Any = field(default_factory=lambda: NOT_GIVEN) - top_p: Any = field(default_factory=lambda: NOT_GIVEN) - top_k: Any = field(default_factory=lambda: NOT_GIVEN) - frequency_penalty: Any = field(default_factory=lambda: NOT_GIVEN) - presence_penalty: Any = field(default_factory=lambda: NOT_GIVEN) - seed: Any = field(default_factory=lambda: NOT_GIVEN) + temperature: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + max_tokens: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + top_p: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + top_k: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + frequency_penalty: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + presence_penalty: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + seed: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) filter_incomplete_user_turns: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) user_turn_completion_config: UserTurnCompletionConfig | _NotGiven = field( default_factory=lambda: NOT_GIVEN @@ -297,7 +297,7 @@ class TTSSettings(ServiceSettings): language: Language for speech synthesis. """ - voice: Any = field(default_factory=lambda: NOT_GIVEN) + voice: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) language: Any = field(default_factory=lambda: NOT_GIVEN) _aliases: ClassVar[Dict[str, str]] = {"voice_id": "voice"} From 7dc16b1d9210c9be2c464d47182b6840766de21a Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 17 Feb 2026 14:49:26 -0500 Subject: [PATCH 013/189] Type `language` fields and centralize conversion in STT services. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change `TTSSettings.language` and `STTSettings.language` from `Any` to `Language | str | _NotGiven`. Add `language_to_service_language` base method and centralized `isinstance`-guarded conversion in `STTService._update_settings` (mirroring TTS). Update the TTS guard from `is not None` to `isinstance(…, Language)` so raw strings pass through unchanged. Remove now-redundant per-service language conversion from `_update_settings` overrides (ElevenLabs, Azure, Fal, Whisper). Add `language_to_service_language` to Azure STT so the centralized conversion picks it up. Fix AWS and NVIDIA STT `__init__` to convert language at construction time, then simplify their runtime accessors to read `_settings.language` directly. --- src/pipecat/services/aws/stt.py | 6 +++--- src/pipecat/services/azure/stt.py | 18 +++++++++++------- src/pipecat/services/elevenlabs/stt.py | 14 +------------- src/pipecat/services/fal/stt.py | 9 +-------- src/pipecat/services/nvidia/stt.py | 7 ++++--- src/pipecat/services/settings.py | 14 ++++++++++---- src/pipecat/services/stt_service.py | 24 +++++++++++++++++++++--- src/pipecat/services/tts_service.py | 2 +- src/pipecat/services/whisper/base_stt.py | 2 +- src/pipecat/services/whisper/stt.py | 6 ++---- 10 files changed, 55 insertions(+), 47 deletions(-) diff --git a/src/pipecat/services/aws/stt.py b/src/pipecat/services/aws/stt.py index ae502e8be..21220e646 100644 --- a/src/pipecat/services/aws/stt.py +++ b/src/pipecat/services/aws/stt.py @@ -102,7 +102,7 @@ class AWSTranscribeSTTService(WebsocketSTTService): super().__init__(ttfs_p99_latency=ttfs_p99_latency, **kwargs) self._settings = AWSTranscribeSTTSettings( - language=language, + language=self.language_to_service_language(language) or "en-US", sample_rate=sample_rate, media_encoding="linear16", number_of_channels=1, @@ -251,9 +251,9 @@ class AWSTranscribeSTTService(WebsocketSTTService): logger.debug("Connecting to AWS Transcribe WebSocket") - language_code = self.language_to_service_language(Language(self._settings.language)) + language_code = self._settings.language if not language_code: - raise ValueError(f"Unsupported language: {self._settings.language}") + raise ValueError(f"Unsupported language: {language_code}") # Generate random websocket key websocket_key = "".join( diff --git a/src/pipecat/services/azure/stt.py b/src/pipecat/services/azure/stt.py index 8a5b09e26..7f9d3f1ba 100644 --- a/src/pipecat/services/azure/stt.py +++ b/src/pipecat/services/azure/stt.py @@ -123,6 +123,17 @@ class AzureSTTService(STTService): """ return True + def language_to_service_language(self, language: Language) -> Optional[str]: + """Convert a Language enum to Azure service-specific language code. + + Args: + language: The language to convert. + + Returns: + The Azure-specific language identifier, or None if not supported. + """ + return language_to_azure_language(language) + async def _update_settings(self, update: STTSettings) -> dict[str, Any]: """Apply a settings update. @@ -130,13 +141,6 @@ class AzureSTTService(STTService): """ changed = await super()._update_settings(update) - if "language" in changed: - # Convert Language enum to Azure language code for consistency. - lang = self._settings.language - if isinstance(lang, Language): - lang = language_to_azure_language(lang) - self._settings.language = lang - # TODO: someday we could reconnect here to apply updated settings. # Code might look something like the below: # if "language" in changed: diff --git a/src/pipecat/services/elevenlabs/stt.py b/src/pipecat/services/elevenlabs/stt.py index fd938c12e..0ef137006 100644 --- a/src/pipecat/services/elevenlabs/stt.py +++ b/src/pipecat/services/elevenlabs/stt.py @@ -34,7 +34,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, STTSettings, is_given +from pipecat.services.settings import NOT_GIVEN, STTSettings from pipecat.services.stt_latency import ELEVENLABS_REALTIME_TTFS_P99, ELEVENLABS_TTFS_P99 from pipecat.services.stt_service import SegmentedSTTService, WebsocketSTTService from pipecat.transcriptions.language import Language, resolve_language @@ -306,12 +306,6 @@ class ElevenLabsSTTService(SegmentedSTTService): Returns: Dict mapping changed field names to their previous values. """ - # Convert language to ElevenLabs format before applying - if is_given(update.language) and isinstance(update.language, Language): - converted = self.language_to_service_language(update.language) - if converted is not None: - update.language = converted - changed = await super()._update_settings(update) if "model" in changed: @@ -555,12 +549,6 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): Returns: Dict mapping changed field names to their previous values. """ - # Convert language to ElevenLabs format before applying - if is_given(update.language) and isinstance(update.language, Language): - converted = language_to_elevenlabs_language(update.language) - if converted is not None: - update.language = converted - changed = await super()._update_settings(update) if not changed: diff --git a/src/pipecat/services/fal/stt.py b/src/pipecat/services/fal/stt.py index 28b611865..a29d8d70d 100644 --- a/src/pipecat/services/fal/stt.py +++ b/src/pipecat/services/fal/stt.py @@ -252,15 +252,8 @@ class FalSTTService(SegmentedSTTService): return language_to_fal_language(language) async def _update_settings(self, update: STTSettings) -> dict[str, Any]: - """Apply a settings update, converting language if changed.""" + """Apply a settings update.""" changed = await super()._update_settings(update) - - if "language" in changed: - # Convert the Language enum to a Fal language code. - lang = self._settings.language - if isinstance(lang, Language): - self._settings.language = self.language_to_service_language(lang) - return changed @traced_stt diff --git a/src/pipecat/services/nvidia/stt.py b/src/pipecat/services/nvidia/stt.py index b0d11fc2b..8e1babec7 100644 --- a/src/pipecat/services/nvidia/stt.py +++ b/src/pipecat/services/nvidia/stt.py @@ -488,7 +488,8 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): self._config = None self._asr_service = None self._settings = NvidiaSegmentedSTTSettings( - language=params.language or Language.EN_US, + language=self.language_to_service_language(params.language or Language.EN_US) + or "en-US", profanity_filter=params.profanity_filter, automatic_punctuation=params.automatic_punctuation, verbatim_transcripts=params.verbatim_transcripts, @@ -523,8 +524,8 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): self._asr_service = riva.client.ASRService(auth) def _get_language_code(self) -> str: - """Resolve the current language enum to an NVIDIA Riva language code string.""" - return self.language_to_service_language(self._settings.language) or "en-US" + """Get the current NVIDIA Riva language code string.""" + return self._settings.language or "en-US" def _create_recognition_config(self): """Create the NVIDIA Riva ASR recognition configuration.""" diff --git a/src/pipecat/services/settings.py b/src/pipecat/services/settings.py index 54a25124b..fdc1c15e6 100644 --- a/src/pipecat/services/settings.py +++ b/src/pipecat/services/settings.py @@ -34,6 +34,8 @@ from typing import TYPE_CHECKING, Any, ClassVar, Dict, Mapping, Optional, Type, from loguru import logger +from pipecat.transcriptions.language import Language + if TYPE_CHECKING: from pipecat.turns.user_turn_completion_mixin import UserTurnCompletionConfig @@ -294,11 +296,13 @@ class TTSSettings(ServiceSettings): Parameters: model: TTS model identifier. voice: Voice identifier or name. - language: Language for speech synthesis. + language: Language for speech synthesis. Accepts a ``Language`` enum + (converted to a service-specific string) or a raw string (stored + as-is). """ voice: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - language: Any = field(default_factory=lambda: NOT_GIVEN) + language: Language | str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) _aliases: ClassVar[Dict[str, str]] = {"voice_id": "voice"} @@ -309,7 +313,9 @@ class STTSettings(ServiceSettings): Parameters: model: STT model identifier. - language: Language for speech recognition. + language: Language for speech recognition. Accepts a ``Language`` enum + (converted to a service-specific string) or a raw string (stored + as-is). """ - language: Any = field(default_factory=lambda: NOT_GIVEN) + language: Language | str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) diff --git a/src/pipecat/services/stt_service.py b/src/pipecat/services/stt_service.py index d6dd31824..ae04ed33f 100644 --- a/src/pipecat/services/stt_service.py +++ b/src/pipecat/services/stt_service.py @@ -35,7 +35,7 @@ from pipecat.frames.frames import ( from pipecat.metrics.metrics import TTFBMetricsData from pipecat.processors.frame_processor import FrameDirection from pipecat.services.ai_service import AIService -from pipecat.services.settings import STTSettings +from pipecat.services.settings import STTSettings, is_given from pipecat.services.stt_latency import DEFAULT_TTFS_P99 from pipecat.services.websocket_service import WebsocketService from pipecat.transcriptions.language import Language @@ -206,6 +206,17 @@ class STTService(AIService): settings_cls = type(self._settings) await self._update_settings(settings_cls(language=language)) + def language_to_service_language(self, language: Language) -> Optional[str]: + """Convert a language to the service-specific language format. + + Args: + language: The language to convert. + + Returns: + The service-specific language identifier, or None if not supported. + """ + return Language(language) + @abstractmethod async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]: """Run speech-to-text on the provided audio data. @@ -239,8 +250,9 @@ class STTService(AIService): async def _update_settings(self, update: STTSettings) -> dict[str, Any]: """Apply an STT settings update. - Handles ``model`` (via parent). Does **not** call ``set_language`` - — concrete services should override this method and handle language + Handles ``model`` (via parent). Translates ``Language`` enum values + before applying so the stored value is a service-specific string. + Concrete services should override this method and handle language changes (including any reconnect logic) based on the returned changed-field dict. @@ -250,6 +262,12 @@ class STTService(AIService): Returns: Dict mapping changed field names to their previous values. """ + # Translate language *before* applying so the stored value is canonical + if is_given(update.language) and isinstance(update.language, Language): + converted = self.language_to_service_language(update.language) + if converted is not None: + update.language = converted + changed = await super()._update_settings(update) return changed diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index bb5cff69f..4b4b47a50 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -448,7 +448,7 @@ class TTSService(AIService): Dict mapping changed field names to their previous values. """ # Translate language *before* applying so the stored value is canonical - if is_given(update.language) and update.language is not None: + if is_given(update.language) and isinstance(update.language, Language): converted = self.language_to_service_language(update.language) if converted is not None: update.language = converted diff --git a/src/pipecat/services/whisper/base_stt.py b/src/pipecat/services/whisper/base_stt.py index a67ad1cbc..d50c24eb2 100644 --- a/src/pipecat/services/whisper/base_stt.py +++ b/src/pipecat/services/whisper/base_stt.py @@ -183,7 +183,7 @@ class BaseWhisperSTTService(SegmentedSTTService): changed = await super()._update_settings(update) if "language" in changed: - self._language = self.language_to_service_language(Language(self._settings.language)) + self._language = self._settings.language if "prompt" in changed: self._prompt = self._settings.prompt if "temperature" in changed: diff --git a/src/pipecat/services/whisper/stt.py b/src/pipecat/services/whisper/stt.py index a96c26992..d4efcb166 100644 --- a/src/pipecat/services/whisper/stt.py +++ b/src/pipecat/services/whisper/stt.py @@ -319,9 +319,8 @@ class WhisperSTTService(SegmentedSTTService): # Divide by 32768 because we have signed 16-bit data. audio_float = np.frombuffer(audio, dtype=np.int16).astype(np.float32) / 32768.0 - whisper_lang = self.language_to_service_language(self._settings.language) segments, _ = await asyncio.to_thread( - self._model.transcribe, audio_float, language=whisper_lang + self._model.transcribe, audio_float, language=self._settings.language ) text: str = "" for segment in segments: @@ -419,13 +418,12 @@ class WhisperSTTServiceMLX(WhisperSTTService): # Divide by 32768 because we have signed 16-bit data. audio_float = np.frombuffer(audio, dtype=np.int16).astype(np.float32) / 32768.0 - whisper_lang = self.language_to_service_language(self._settings.language) chunk = await asyncio.to_thread( mlx_whisper.transcribe, audio_float, path_or_hf_repo=self.model_name, temperature=self._temperature, - language=whisper_lang, + language=self._settings.language, ) text: str = "" for segment in chunk.get("segments", []): From 1cec8d119dd16343e0f7294e2727ebd6045cc7b6 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 17 Feb 2026 14:57:38 -0500 Subject: [PATCH 014/189] Expand `language` field docstrings to clarify storage invariant. The union type reflects the input side; after construction and `_update_settings`, the stored value is always a service-specific string. --- src/pipecat/services/settings.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/pipecat/services/settings.py b/src/pipecat/services/settings.py index fdc1c15e6..056621845 100644 --- a/src/pipecat/services/settings.py +++ b/src/pipecat/services/settings.py @@ -296,9 +296,14 @@ class TTSSettings(ServiceSettings): Parameters: model: TTS model identifier. voice: Voice identifier or name. - language: Language for speech synthesis. Accepts a ``Language`` enum - (converted to a service-specific string) or a raw string (stored - as-is). + language: Language for speech synthesis. The union type reflects the + *input* side: callers may pass a ``Language`` enum or a raw string. + However, the **stored** value is always a service-specific string + — ``TTSService._update_settings`` converts ``Language`` enums via + ``language_to_service_language()`` before writing, and ``__init__`` + methods do the same at construction time. Code that reads + ``self._settings.language`` after initialisation can treat it as + ``str``. """ voice: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) @@ -313,9 +318,14 @@ class STTSettings(ServiceSettings): Parameters: model: STT model identifier. - language: Language for speech recognition. Accepts a ``Language`` enum - (converted to a service-specific string) or a raw string (stored - as-is). + language: Language for speech recognition. The union type reflects the + *input* side: callers may pass a ``Language`` enum or a raw string. + However, the **stored** value is always a service-specific string + — ``STTService._update_settings`` converts ``Language`` enums via + ``language_to_service_language()`` before writing, and ``__init__`` + methods do the same at construction time. Code that reads + ``self._settings.language`` after initialisation can treat it as + ``str``. """ language: Language | str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) From 1cad4210ce0094258db0da783aa2bc4d41f69e98 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 17 Feb 2026 15:07:45 -0500 Subject: [PATCH 015/189] Deprecate dict-based `*UpdateSettingsFrame(settings={...})` code path in STT, TTS, and LLM services. The dataclass-based API (`*UpdateSettingsFrame(update=*Settings(...))`) is the preferred path since 0.0.103. The dict path still works but now emits a `DeprecationWarning`. --- src/pipecat/frames/frames.py | 6 +++++- src/pipecat/services/llm_service.py | 8 ++++++++ src/pipecat/services/stt_service.py | 8 ++++++++ src/pipecat/services/tts_service.py | 8 ++++++++ 4 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index be3b4e4a7..0e9127130 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -2117,7 +2117,11 @@ class ServiceUpdateSettingsFrame(ControlFrame): ``update`` object. When both are provided, ``update`` takes precedence. Parameters: - settings: Dictionary of setting name to value mappings (legacy). + settings: Dictionary of setting name to value mappings. + + .. deprecated:: 0.0.103 + Use ``update`` with a typed settings object instead. + update: :class:`~pipecat.services.settings.ServiceSettings` object describing the delta to apply. """ diff --git a/src/pipecat/services/llm_service.py b/src/pipecat/services/llm_service.py index 860a472c9..83d60defb 100644 --- a/src/pipecat/services/llm_service.py +++ b/src/pipecat/services/llm_service.py @@ -354,6 +354,14 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): await self._update_settings(frame.update) elif frame.settings: # Backward-compatible path: convert legacy dict to settings object. + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "Passing a dict via LLMUpdateSettingsFrame(settings={...}) is deprecated " + "since 0.0.103, use LLMUpdateSettingsFrame(update=LLMSettings(...)) instead.", + DeprecationWarning, + stacklevel=2, + ) update = type(self._settings).from_mapping(frame.settings) await self._update_settings(update) elif isinstance(frame, LLMContextSummaryRequestFrame): diff --git a/src/pipecat/services/stt_service.py b/src/pipecat/services/stt_service.py index ae04ed33f..fdcefcbd5 100644 --- a/src/pipecat/services/stt_service.py +++ b/src/pipecat/services/stt_service.py @@ -336,6 +336,14 @@ class STTService(AIService): await self._update_settings(frame.update) elif frame.settings: # Backward-compatible path: convert legacy dict to settings object. + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "Passing a dict via STTUpdateSettingsFrame(settings={...}) is deprecated " + "since 0.0.103, use STTUpdateSettingsFrame(update=STTSettings(...)) instead.", + DeprecationWarning, + stacklevel=2, + ) update = type(self._settings).from_mapping(frame.settings) await self._update_settings(update) elif isinstance(frame, STTMuteFrame): diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index 4b4b47a50..996a780ed 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -546,6 +546,14 @@ class TTSService(AIService): await self._update_settings(frame.update) elif frame.settings: # Backward-compatible path: convert legacy dict to settings object. + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "Passing a dict via TTSUpdateSettingsFrame(settings={...}) is deprecated " + "since 0.0.103, use TTSUpdateSettingsFrame(update=TTSSettings(...)) instead.", + DeprecationWarning, + stacklevel=2, + ) update = type(self._settings).from_mapping(frame.settings) await self._update_settings(update) elif isinstance(frame, BotStoppedSpeakingFrame): From 94a651cee2e375676a95d5097bb26ae1e2a877de Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 17 Feb 2026 15:15:18 -0500 Subject: [PATCH 016/189] Remove dead `ServiceSettings.to_dict` method --- src/pipecat/services/settings.py | 12 ------------ tests/test_settings.py | 4 ---- 2 files changed, 16 deletions(-) diff --git a/src/pipecat/services/settings.py b/src/pipecat/services/settings.py index 056621845..0721cf3cd 100644 --- a/src/pipecat/services/settings.py +++ b/src/pipecat/services/settings.py @@ -228,18 +228,6 @@ class ServiceSettings: instance.extra = extra return instance - def to_dict(self) -> Dict[str, Any]: - """Serialize to a flat dictionary, including extra. - - Only given (non-``NOT_GIVEN``) values are included. This is the - inverse of ``from_mapping`` and useful for passing settings to APIs - that expect plain dicts. - - Returns: - A flat dictionary of all given settings. - """ - return self.given_fields() - def copy(self: _S) -> _S: """Return a deep copy of this settings instance. diff --git a/tests/test_settings.py b/tests/test_settings.py index 71d66fc35..85f89987c 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -75,10 +75,6 @@ class TestServiceSettings: result = s.given_fields() assert result == {"model": "gpt-4o", "custom_key": 42} - def test_to_dict(self): - s = ServiceSettings(model="gpt-4o") - assert s.to_dict() == {"model": "gpt-4o"} - def test_copy_is_deep(self): s = ServiceSettings(model="gpt-4o") s.extra = {"nested": {"a": 1}} From 5ea2d47d3998c9e5d85c21f3e525e36b3d7f7516 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radek=20Sedl=C3=A1k?= Date: Tue, 17 Feb 2026 21:36:19 +0100 Subject: [PATCH 017/189] feat: Add support for private endpoint in Azure STT --- changelog/3764.added.md | 1 + src/pipecat/services/azure/stt.py | 4 ++++ 2 files changed, 5 insertions(+) create mode 100644 changelog/3764.added.md diff --git a/changelog/3764.added.md b/changelog/3764.added.md new file mode 100644 index 000000000..5da82f0c1 --- /dev/null +++ b/changelog/3764.added.md @@ -0,0 +1 @@ +- Added support for specifying private endpoints for Azure Speech-to-Text, enabling use in private networks behind firewalls. \ No newline at end of file diff --git a/src/pipecat/services/azure/stt.py b/src/pipecat/services/azure/stt.py index 1bc7ec70a..cc4fcb7ae 100644 --- a/src/pipecat/services/azure/stt.py +++ b/src/pipecat/services/azure/stt.py @@ -63,6 +63,7 @@ class AzureSTTService(STTService): region: str, language: Language = Language.EN_US, sample_rate: Optional[int] = None, + private_endpoint: Optional[str] = None, endpoint_id: Optional[str] = None, ttfs_p99_latency: Optional[float] = AZURE_TTFS_P99, **kwargs, @@ -74,6 +75,8 @@ class AzureSTTService(STTService): region: Azure region for the Speech service (e.g., 'eastus'). language: Language for speech recognition. Defaults to English (US). sample_rate: Audio sample rate in Hz. If None, uses service default. + private_endpoint: Private endpoint for STT behind firewall. + See https://docs.azure.cn/en-us/ai-services/speech-service/speech-services-private-link?tabs=portal endpoint_id: Custom model endpoint id. ttfs_p99_latency: P99 latency from speech end to final transcript in seconds. Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark @@ -85,6 +88,7 @@ class AzureSTTService(STTService): subscription=api_key, region=region, speech_recognition_language=language_to_azure_language(language), + endpoint=private_endpoint, ) if endpoint_id: From 68ebd3d063eb0f0fbc39b8b70f33028a88410b7f Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 17 Feb 2026 15:44:41 -0500 Subject: [PATCH 018/189] Migrate HumeTTSService to standard `TTSSettings` pattern and remove dead `TTSService.update_setting` HumeTTSService now stores its params (description, speed, trailing_silence) in a proper `HumeTTSSettings` dataclass instead of a separate `_params` Pydantic model, making it work with `TTSUpdateSettingsFrame(update=...)`. The old `update_setting(key, value)` method is kept but deprecated. Also removes the unused no-op `TTSService.update_setting` base method, which was never called by the `TTSUpdateSettingsFrame` pipeline. --- src/pipecat/services/hume/tts.py | 83 +++++++++++++++++++++-------- src/pipecat/services/tts_service.py | 9 ---- 2 files changed, 60 insertions(+), 32 deletions(-) diff --git a/src/pipecat/services/hume/tts.py b/src/pipecat/services/hume/tts.py index 3b45cc249..27c4b417e 100644 --- a/src/pipecat/services/hume/tts.py +++ b/src/pipecat/services/hume/tts.py @@ -6,6 +6,8 @@ import base64 import os +import warnings +from dataclasses import dataclass, field from typing import Any, AsyncGenerator, Optional import httpx @@ -24,6 +26,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import WordTTSService from pipecat.utils.tracing.service_decorators import traced_tts @@ -46,6 +49,21 @@ DEFAULT_HEADERS = { } +@dataclass +class HumeTTSSettings(TTSSettings): + """Settings for Hume TTS service. + + Parameters: + description: Natural-language acting directions (up to 100 characters). + speed: Speaking-rate multiplier (0.5-2.0). + trailing_silence: Seconds of silence to append at the end (0-5). + """ + + description: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speed: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + trailing_silence: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class HumeTTSService(WordTTSService): """Hume Octave Text-to-Speech service. @@ -61,6 +79,8 @@ class HumeTTSService(WordTTSService): - Provides metrics for Time To First Byte (TTFB) and TTS usage. """ + _settings: HumeTTSSettings + class InputParams(BaseModel): """Optional synthesis parameters for Hume TTS. @@ -114,9 +134,14 @@ class HumeTTSService(WordTTSService): self._http_client = httpx.AsyncClient(headers=DEFAULT_HEADERS) self._client = AsyncHumeClient(api_key=api_key, httpx_client=self._http_client) - self._params = params or HumeTTSService.InputParams() - # Store voice in the base class (mirrors other services) + params = params or HumeTTSService.InputParams() + self._settings = HumeTTSSettings( + voice=voice_id, + description=params.description, + speed=params.speed, + trailing_silence=params.trailing_silence, + ) self._voice_id = voice_id self._audio_bytes = b"" @@ -183,7 +208,10 @@ class HumeTTSService(WordTTSService): await self.add_word_timestamps([("Reset", 0)]) async def update_setting(self, key: str, value: Any) -> None: - """Runtime updates via `TTSUpdateSettingsFrame`. + """Runtime updates via key/value pair. + + .. deprecated:: 0.0.103 + Use ``TTSUpdateSettingsFrame(update=HumeTTSSettings(...))`` instead. Args: key: The name of the setting to update. Recognized keys are: @@ -193,20 +221,29 @@ class HumeTTSService(WordTTSService): - "trailing_silence" value: The new value for the setting. """ - key_l = (key or "").lower() + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "'update_setting' is deprecated, use " + "'TTSUpdateSettingsFrame(update=HumeTTSSettings(...))' instead.", + DeprecationWarning, + stacklevel=2, + ) - if key_l == "voice_id": - await self.set_voice(str(value)) - logger.debug(f"HumeTTSService voice_id set to: {self.voice}") - elif key_l == "description": - self._params.description = None if value is None else str(value) - elif key_l == "speed": - self._params.speed = None if value is None else float(value) - elif key_l == "trailing_silence": - self._params.trailing_silence = None if value is None else float(value) - else: - # Defer unknown keys to the base class - await super().update_setting(key, value) + key_l = (key or "").lower() + known_keys = {"voice_id", "voice", "description", "speed", "trailing_silence"} + + if key_l in known_keys: + kwargs: dict[str, Any] = {} + if key_l in ("voice_id", "voice"): + kwargs["voice"] = str(value) + elif key_l == "description": + kwargs["description"] = None if value is None else str(value) + elif key_l == "speed": + kwargs["speed"] = None if value is None else float(value) + elif key_l == "trailing_silence": + kwargs["trailing_silence"] = None if value is None else float(value) + await self._update_settings(HumeTTSSettings(**kwargs)) @traced_tts async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: @@ -228,12 +265,12 @@ class HumeTTSService(WordTTSService): "text": text, "voice": PostedUtteranceVoiceWithId(id=self._voice_id), } - if self._params.description is not None: - utterance_kwargs["description"] = self._params.description - if self._params.speed is not None: - utterance_kwargs["speed"] = self._params.speed - if self._params.trailing_silence is not None: - utterance_kwargs["trailing_silence"] = self._params.trailing_silence + if self._settings.description is not None: + utterance_kwargs["description"] = self._settings.description + if self._settings.speed is not None: + utterance_kwargs["speed"] = self._settings.speed + if self._settings.trailing_silence is not None: + utterance_kwargs["trailing_silence"] = self._settings.trailing_silence utterance = PostedUtterance(**utterance_kwargs) @@ -257,7 +294,7 @@ class HumeTTSService(WordTTSService): # Use version "2" by default if no description is provided # Version "1" is needed when description is used - version = "1" if self._params.description is not None else "2" + version = "1" if self._settings.description is not None else "2" # Track the duration of this utterance based on the last timestamp utterance_duration = 0.0 diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index 996a780ed..6cd33b3e4 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -352,15 +352,6 @@ class TTSService(AIService): return text + " " return text - async def update_setting(self, key: str, value: Any): - """Update a service-specific setting. - - Args: - key: The setting key to update. - value: The new value for the setting. - """ - pass - async def flush_audio(self): """Flush any buffered audio data.""" pass From ce51df677c7a3aebfd5e0505f4633a9b41bf4bed Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 17 Feb 2026 17:07:14 -0500 Subject: [PATCH 019/189] Add backward-compat `_aliases` and `from_mapping` overrides to TTS settings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The migration from plain-dict `self._settings` to typed dataclasses renamed keys and flattened nested dicts. The deprecated dict-based `TTSUpdateSettingsFrame(settings={...})` code path calls `from_mapping`, which silently dropped old keys into `extra`. - Add `_aliases` so renamed flat keys (e.g. `sample_rate` → `fish_sample_rate`, camelCase Inworld keys) resolve correctly. - Override `from_mapping` to destructure nested dicts (`output_format`, `prosody`, `audioConfig`, `voice_setting`, `audio_setting`) into their flat field equivalents. - Fix AsyncAI constructor bug passing `output_format={...}` dict instead of individual `output_container`/`output_encoding`/`output_sample_rate` fields. --- src/pipecat/services/asyncai/tts.py | 21 ++++++++++++----- src/pipecat/services/cartesia/tts.py | 13 ++++++++++- src/pipecat/services/fish/tts.py | 14 +++++++++++- src/pipecat/services/groq/tts.py | 4 +++- src/pipecat/services/inworld/tts.py | 21 ++++++++++++++++- src/pipecat/services/minimax/tts.py | 31 +++++++++++++++++++++++++- src/pipecat/services/resembleai/tts.py | 7 +++++- 7 files changed, 99 insertions(+), 12 deletions(-) diff --git a/src/pipecat/services/asyncai/tts.py b/src/pipecat/services/asyncai/tts.py index 05ba14113..489d7cbff 100644 --- a/src/pipecat/services/asyncai/tts.py +++ b/src/pipecat/services/asyncai/tts.py @@ -10,7 +10,7 @@ import asyncio import base64 import json from dataclasses import dataclass, field -from typing import AsyncGenerator, Optional +from typing import Any, AsyncGenerator, ClassVar, Dict, Mapping, Optional import aiohttp from loguru import logger @@ -88,6 +88,17 @@ class AsyncAITTSSettings(TTSSettings): output_encoding: str = field(default_factory=lambda: NOT_GIVEN) output_sample_rate: int = field(default_factory=lambda: NOT_GIVEN) + @classmethod + def from_mapping(cls, settings: Mapping[str, Any]) -> "AsyncAITTSSettings": + """Construct settings from a plain dict, destructuring legacy nested ``output_format``.""" + flat = dict(settings) + nested = flat.pop("output_format", None) + if isinstance(nested, dict): + flat.setdefault("output_container", nested.get("container")) + flat.setdefault("output_encoding", nested.get("encoding")) + flat.setdefault("output_sample_rate", nested.get("sample_rate")) + return super().from_mapping(flat) + class AsyncAITTSService(AudioContextTTSService): """Async TTS service with WebSocket streaming. @@ -153,11 +164,9 @@ class AsyncAITTSService(AudioContextTTSService): self._settings = AsyncAITTSSettings( model=model, voice=voice_id, - output_format={ - "container": container, - "encoding": encoding, - "sample_rate": 0, - }, + output_container=container, + output_encoding=encoding, + output_sample_rate=0, language=self.language_to_service_language(params.language) if params.language else None, diff --git a/src/pipecat/services/cartesia/tts.py b/src/pipecat/services/cartesia/tts.py index 2544d3b98..edee9e2ea 100644 --- a/src/pipecat/services/cartesia/tts.py +++ b/src/pipecat/services/cartesia/tts.py @@ -11,7 +11,7 @@ import json import warnings from dataclasses import dataclass, field from enum import Enum -from typing import Any, AsyncGenerator, List, Literal, Optional +from typing import Any, AsyncGenerator, ClassVar, Dict, List, Literal, Mapping, Optional from loguru import logger from pydantic import BaseModel, Field @@ -217,6 +217,17 @@ class CartesiaTTSSettings(TTSSettings): generation_config: GenerationConfig = field(default_factory=lambda: NOT_GIVEN) pronunciation_dict_id: str = field(default_factory=lambda: NOT_GIVEN) + @classmethod + def from_mapping(cls, settings: Mapping[str, Any]) -> "CartesiaTTSSettings": + """Construct settings from a plain dict, destructuring legacy nested ``output_format``.""" + flat = dict(settings) + nested = flat.pop("output_format", None) + if isinstance(nested, dict): + flat.setdefault("output_container", nested.get("container")) + flat.setdefault("output_encoding", nested.get("encoding")) + flat.setdefault("output_sample_rate", nested.get("sample_rate")) + return super().from_mapping(flat) + class CartesiaTTSService(AudioContextWordTTSService): """Cartesia TTS service with WebSocket streaming and word timestamps. diff --git a/src/pipecat/services/fish/tts.py b/src/pipecat/services/fish/tts.py index 4da4b6673..7dd06d705 100644 --- a/src/pipecat/services/fish/tts.py +++ b/src/pipecat/services/fish/tts.py @@ -12,7 +12,7 @@ for streaming text-to-speech synthesis with customizable voice parameters. import uuid from dataclasses import dataclass, field -from typing import Any, AsyncGenerator, Literal, Optional +from typing import Any, AsyncGenerator, ClassVar, Dict, Literal, Mapping, Optional from loguru import logger from pydantic import BaseModel @@ -69,6 +69,18 @@ class FishAudioTTSSettings(TTSSettings): prosody_volume: int = field(default_factory=lambda: NOT_GIVEN) reference_id: str = field(default_factory=lambda: NOT_GIVEN) + _aliases: ClassVar[Dict[str, str]] = {"voice_id": "voice", "sample_rate": "fish_sample_rate"} + + @classmethod + def from_mapping(cls, settings: Mapping[str, Any]) -> "FishAudioTTSSettings": + """Construct settings from a plain dict, destructuring legacy nested ``prosody``.""" + flat = dict(settings) + nested = flat.pop("prosody", None) + if isinstance(nested, dict): + flat.setdefault("prosody_speed", nested.get("speed")) + flat.setdefault("prosody_volume", nested.get("volume")) + return super().from_mapping(flat) + class FishAudioTTSService(InterruptibleTTSService): """Fish Audio text-to-speech service with WebSocket streaming. diff --git a/src/pipecat/services/groq/tts.py b/src/pipecat/services/groq/tts.py index d0b5fbd7c..e4c10f2e9 100644 --- a/src/pipecat/services/groq/tts.py +++ b/src/pipecat/services/groq/tts.py @@ -9,7 +9,7 @@ import io import wave from dataclasses import dataclass, field -from typing import AsyncGenerator, Optional +from typing import AsyncGenerator, ClassVar, Dict, Optional from loguru import logger from pydantic import BaseModel @@ -48,6 +48,8 @@ class GroqTTSSettings(TTSSettings): speed: float = field(default_factory=lambda: NOT_GIVEN) groq_sample_rate: int = field(default_factory=lambda: NOT_GIVEN) + _aliases: ClassVar[Dict[str, str]] = {"voice_id": "voice", "sample_rate": "groq_sample_rate"} + class GroqTTSService(TTSService): """Groq text-to-speech service implementation. diff --git a/src/pipecat/services/inworld/tts.py b/src/pipecat/services/inworld/tts.py index c291f3156..acc6187cb 100644 --- a/src/pipecat/services/inworld/tts.py +++ b/src/pipecat/services/inworld/tts.py @@ -17,7 +17,7 @@ import asyncio import base64 import json from dataclasses import dataclass, field -from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple +from typing import Any, AsyncGenerator, ClassVar, Dict, List, Mapping, Optional, Tuple import aiohttp import websockets @@ -74,6 +74,25 @@ class InworldTTSSettings(TTSSettings): auto_mode: bool = field(default_factory=lambda: NOT_GIVEN) apply_text_normalization: str = field(default_factory=lambda: NOT_GIVEN) + _aliases: ClassVar[Dict[str, str]] = { + "voice_id": "voice", + "voiceId": "voice", + "modelId": "model", + "applyTextNormalization": "apply_text_normalization", + "autoMode": "auto_mode", + } + + @classmethod + def from_mapping(cls, settings: Mapping[str, Any]) -> "InworldTTSSettings": + """Construct settings from a plain dict, destructuring legacy nested ``audioConfig``.""" + flat = dict(settings) + nested = flat.pop("audioConfig", None) + if isinstance(nested, dict): + flat.setdefault("audio_encoding", nested.get("audioEncoding")) + flat.setdefault("audio_sample_rate", nested.get("sampleRateHertz")) + flat.setdefault("speaking_rate", nested.get("speakingRate")) + return super().from_mapping(flat) + class InworldHttpTTSService(WordTTSService): """Inworld AI HTTP-based TTS service. diff --git a/src/pipecat/services/minimax/tts.py b/src/pipecat/services/minimax/tts.py index ab04925f3..6a107d950 100644 --- a/src/pipecat/services/minimax/tts.py +++ b/src/pipecat/services/minimax/tts.py @@ -12,7 +12,7 @@ for streaming text-to-speech synthesis. import json from dataclasses import dataclass, field -from typing import AsyncGenerator, Optional +from typing import Any, AsyncGenerator, ClassVar, Dict, Mapping, Optional import aiohttp from loguru import logger @@ -120,6 +120,35 @@ class MiniMaxTTSSettings(TTSSettings): audio_sample_rate: int = field(default_factory=lambda: NOT_GIVEN) language_boost: str = field(default_factory=lambda: NOT_GIVEN) + _aliases: ClassVar[Dict[str, str]] = {"voice_id": "voice"} + + @classmethod + def from_mapping(cls, settings: Mapping[str, Any]) -> "MiniMaxTTSSettings": + """Construct settings from a plain dict, destructuring legacy nested dicts. + + Handles ``voice_setting`` (with ``vol`` → ``volume`` rename) and + ``audio_setting`` (with prefixed field mapping). + """ + flat = dict(settings) + + voice = flat.pop("voice_setting", None) + if isinstance(voice, dict): + flat.setdefault("speed", voice.get("speed")) + flat.setdefault("volume", voice.get("vol")) + flat.setdefault("pitch", voice.get("pitch")) + flat.setdefault("emotion", voice.get("emotion")) + flat.setdefault("text_normalization", voice.get("text_normalization")) + flat.setdefault("latex_read", voice.get("latex_read")) + + audio = flat.pop("audio_setting", None) + if isinstance(audio, dict): + flat.setdefault("audio_bitrate", audio.get("bitrate")) + flat.setdefault("audio_format", audio.get("format")) + flat.setdefault("audio_channel", audio.get("channel")) + flat.setdefault("audio_sample_rate", audio.get("sample_rate")) + + return super().from_mapping(flat) + class MiniMaxHttpTTSService(TTSService): """Text-to-speech service using MiniMax's T2A (Text-to-Audio) API. diff --git a/src/pipecat/services/resembleai/tts.py b/src/pipecat/services/resembleai/tts.py index 08df23abe..acba883e4 100644 --- a/src/pipecat/services/resembleai/tts.py +++ b/src/pipecat/services/resembleai/tts.py @@ -9,7 +9,7 @@ import base64 import json from dataclasses import dataclass, field -from typing import AsyncGenerator, Optional +from typing import AsyncGenerator, ClassVar, Dict, Optional from loguru import logger @@ -54,6 +54,11 @@ class ResembleAITTSSettings(TTSSettings): output_format: str = field(default_factory=lambda: NOT_GIVEN) resemble_sample_rate: int = field(default_factory=lambda: NOT_GIVEN) + _aliases: ClassVar[Dict[str, str]] = { + "voice_id": "voice", + "sample_rate": "resemble_sample_rate", + } + class ResembleAITTSService(AudioContextWordTTSService): """Resemble AI TTS service with WebSocket streaming and word timestamps. From d7d94a29f044be4065a35a0cc2cc119b1efe5687 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 09:46:23 -0500 Subject: [PATCH 020/189] Add foundational examples (55) for runtime settings updates via `*UpdateSettingsFrame` 42 examples covering STT (13), TTS (21), LLM (4), and realtime (4) services. Each demonstrates updating service settings 10 seconds after client connects, verifying the typed settings machinery end-to-end for every provider. --- .../55a-update-settings-deepgram-stt.py | 131 +++++++++++++++++ .../55b-update-settings-azure-stt.py | 134 +++++++++++++++++ .../55c-update-settings-google-stt.py | 131 +++++++++++++++++ .../55d-update-settings-assemblyai-stt.py | 131 +++++++++++++++++ .../55e-update-settings-gladia-stt.py | 131 +++++++++++++++++ ...update-settings-elevenlabs-realtime-stt.py | 134 +++++++++++++++++ .../55g-update-settings-elevenlabs-stt.py | 136 +++++++++++++++++ .../55h-update-settings-speechmatics-stt.py | 131 +++++++++++++++++ .../55i-update-settings-whisper-api-stt.py | 137 ++++++++++++++++++ .../55j-update-settings-sarvam-stt.py | 131 +++++++++++++++++ .../55k-update-settings-soniox-stt.py | 131 +++++++++++++++++ .../55l-update-settings-aws-transcribe-stt.py | 131 +++++++++++++++++ .../55m-update-settings-cartesia-stt.py | 131 +++++++++++++++++ .../55n-update-settings-cartesia-tts.py | 131 +++++++++++++++++ .../55o-update-settings-elevenlabs-tts.py | 130 +++++++++++++++++ .../55p-update-settings-openai-tts.py | 126 ++++++++++++++++ .../55q-update-settings-deepgram-tts.py | 127 ++++++++++++++++ .../55r-update-settings-azure-tts.py | 130 +++++++++++++++++ .../55s-update-settings-google-http-tts.py | 127 ++++++++++++++++ .../55t-update-settings-playht-tts.py | 129 +++++++++++++++++ .../55u-update-settings-rime-tts.py | 128 ++++++++++++++++ .../55v-update-settings-lmnt-tts.py | 128 ++++++++++++++++ .../55w-update-settings-fish-tts.py | 127 ++++++++++++++++ .../55x-update-settings-minimax-tts.py | 133 +++++++++++++++++ .../55y-update-settings-groq-tts.py | 125 ++++++++++++++++ .../55z-update-settings-hume-tts.py | 134 +++++++++++++++++ .../55za-update-settings-neuphonic-tts.py | 125 ++++++++++++++++ .../55zb-update-settings-inworld-tts.py | 129 +++++++++++++++++ .../55zc-update-settings-gemini-tts.py | 129 +++++++++++++++++ .../55zd-update-settings-aws-polly-tts.py | 127 ++++++++++++++++ .../55ze-update-settings-sarvam-tts.py | 125 ++++++++++++++++ .../55zf-update-settings-camb-tts.py | 129 +++++++++++++++++ .../55zg-update-settings-hathora-tts.py | 128 ++++++++++++++++ .../55zh-update-settings-resembleai-tts.py | 132 +++++++++++++++++ .../55zi-update-settings-openai-llm.py | 131 +++++++++++++++++ .../55zj-update-settings-anthropic-llm.py | 130 +++++++++++++++++ .../55zk-update-settings-google-llm.py | 130 +++++++++++++++++ .../55zl-update-settings-openai-realtime.py | 118 +++++++++++++++ .../55zm-update-settings-gemini-live.py | 118 +++++++++++++++ .../55zn-update-settings-ultravox-realtime.py | 124 ++++++++++++++++ .../55zo-update-settings-grok-realtime.py | 118 +++++++++++++++ .../55zp-update-settings-aws-bedrock-llm.py | 130 +++++++++++++++++ 42 files changed, 5418 insertions(+) create mode 100644 examples/foundational/55a-update-settings-deepgram-stt.py create mode 100644 examples/foundational/55b-update-settings-azure-stt.py create mode 100644 examples/foundational/55c-update-settings-google-stt.py create mode 100644 examples/foundational/55d-update-settings-assemblyai-stt.py create mode 100644 examples/foundational/55e-update-settings-gladia-stt.py create mode 100644 examples/foundational/55f-update-settings-elevenlabs-realtime-stt.py create mode 100644 examples/foundational/55g-update-settings-elevenlabs-stt.py create mode 100644 examples/foundational/55h-update-settings-speechmatics-stt.py create mode 100644 examples/foundational/55i-update-settings-whisper-api-stt.py create mode 100644 examples/foundational/55j-update-settings-sarvam-stt.py create mode 100644 examples/foundational/55k-update-settings-soniox-stt.py create mode 100644 examples/foundational/55l-update-settings-aws-transcribe-stt.py create mode 100644 examples/foundational/55m-update-settings-cartesia-stt.py create mode 100644 examples/foundational/55n-update-settings-cartesia-tts.py create mode 100644 examples/foundational/55o-update-settings-elevenlabs-tts.py create mode 100644 examples/foundational/55p-update-settings-openai-tts.py create mode 100644 examples/foundational/55q-update-settings-deepgram-tts.py create mode 100644 examples/foundational/55r-update-settings-azure-tts.py create mode 100644 examples/foundational/55s-update-settings-google-http-tts.py create mode 100644 examples/foundational/55t-update-settings-playht-tts.py create mode 100644 examples/foundational/55u-update-settings-rime-tts.py create mode 100644 examples/foundational/55v-update-settings-lmnt-tts.py create mode 100644 examples/foundational/55w-update-settings-fish-tts.py create mode 100644 examples/foundational/55x-update-settings-minimax-tts.py create mode 100644 examples/foundational/55y-update-settings-groq-tts.py create mode 100644 examples/foundational/55z-update-settings-hume-tts.py create mode 100644 examples/foundational/55za-update-settings-neuphonic-tts.py create mode 100644 examples/foundational/55zb-update-settings-inworld-tts.py create mode 100644 examples/foundational/55zc-update-settings-gemini-tts.py create mode 100644 examples/foundational/55zd-update-settings-aws-polly-tts.py create mode 100644 examples/foundational/55ze-update-settings-sarvam-tts.py create mode 100644 examples/foundational/55zf-update-settings-camb-tts.py create mode 100644 examples/foundational/55zg-update-settings-hathora-tts.py create mode 100644 examples/foundational/55zh-update-settings-resembleai-tts.py create mode 100644 examples/foundational/55zi-update-settings-openai-llm.py create mode 100644 examples/foundational/55zj-update-settings-anthropic-llm.py create mode 100644 examples/foundational/55zk-update-settings-google-llm.py create mode 100644 examples/foundational/55zl-update-settings-openai-realtime.py create mode 100644 examples/foundational/55zm-update-settings-gemini-live.py create mode 100644 examples/foundational/55zn-update-settings-ultravox-realtime.py create mode 100644 examples/foundational/55zo-update-settings-grok-realtime.py create mode 100644 examples/foundational/55zp-update-settings-aws-bedrock-llm.py diff --git a/examples/foundational/55a-update-settings-deepgram-stt.py b/examples/foundational/55a-update-settings-deepgram-stt.py new file mode 100644 index 000000000..bf1247ed6 --- /dev/null +++ b/examples/foundational/55a-update-settings-deepgram-stt.py @@ -0,0 +1,131 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService, DeepgramSTTSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating Deepgram STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=DeepgramSTTSettings(language=Language.ES)) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55b-update-settings-azure-stt.py b/examples/foundational/55b-update-settings-azure-stt.py new file mode 100644 index 000000000..9ff2a5af7 --- /dev/null +++ b/examples/foundational/55b-update-settings-azure-stt.py @@ -0,0 +1,134 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.azure.stt import AzureSTTService, AzureSTTSettings +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = AzureSTTService( + api_key=os.getenv("AZURE_SPEECH_API_KEY"), + region=os.getenv("AZURE_SPEECH_REGION"), + ) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating Azure STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=AzureSTTSettings(language=Language.ES)) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55c-update-settings-google-stt.py b/examples/foundational/55c-update-settings-google-stt.py new file mode 100644 index 000000000..b25046fe3 --- /dev/null +++ b/examples/foundational/55c-update-settings-google-stt.py @@ -0,0 +1,131 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.google.stt import GoogleSTTService, GoogleSTTSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = GoogleSTTService(credentials=os.getenv("GOOGLE_TEST_CREDENTIALS")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating Google STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=GoogleSTTSettings(language=Language.ES)) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55d-update-settings-assemblyai-stt.py b/examples/foundational/55d-update-settings-assemblyai-stt.py new file mode 100644 index 000000000..488f17f54 --- /dev/null +++ b/examples/foundational/55d-update-settings-assemblyai-stt.py @@ -0,0 +1,131 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.assemblyai.stt import AssemblyAISTTService, AssemblyAISTTSettings +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = AssemblyAISTTService(api_key=os.getenv("ASSEMBLYAI_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating AssemblyAI STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=AssemblyAISTTSettings(language=Language.ES)) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55e-update-settings-gladia-stt.py b/examples/foundational/55e-update-settings-gladia-stt.py new file mode 100644 index 000000000..75e524d08 --- /dev/null +++ b/examples/foundational/55e-update-settings-gladia-stt.py @@ -0,0 +1,131 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.gladia.stt import GladiaSTTService, GladiaSTTSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = GladiaSTTService(api_key=os.getenv("GLADIA_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating Gladia STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=GladiaSTTSettings(language=Language.ES)) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55f-update-settings-elevenlabs-realtime-stt.py b/examples/foundational/55f-update-settings-elevenlabs-realtime-stt.py new file mode 100644 index 000000000..f74ea709e --- /dev/null +++ b/examples/foundational/55f-update-settings-elevenlabs-realtime-stt.py @@ -0,0 +1,134 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.elevenlabs.stt import ( + ElevenLabsRealtimeSTTService, + ElevenLabsRealtimeSTTSettings, +) +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = ElevenLabsRealtimeSTTService(api_key=os.getenv("ELEVENLABS_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating ElevenLabs Realtime STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=ElevenLabsRealtimeSTTSettings(language=Language.ES)) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55g-update-settings-elevenlabs-stt.py b/examples/foundational/55g-update-settings-elevenlabs-stt.py new file mode 100644 index 000000000..dd5191218 --- /dev/null +++ b/examples/foundational/55g-update-settings-elevenlabs-stt.py @@ -0,0 +1,136 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +import aiohttp +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.elevenlabs.stt import ElevenLabsSTTService, ElevenLabsSTTSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + async with aiohttp.ClientSession() as session: + stt = ElevenLabsSTTService( + api_key=os.getenv("ELEVENLABS_API_KEY"), + aiohttp_session=session, + ) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating ElevenLabs STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=ElevenLabsSTTSettings(language=Language.ES)) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55h-update-settings-speechmatics-stt.py b/examples/foundational/55h-update-settings-speechmatics-stt.py new file mode 100644 index 000000000..82e207207 --- /dev/null +++ b/examples/foundational/55h-update-settings-speechmatics-stt.py @@ -0,0 +1,131 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.speechmatics.stt import SpeechmaticsSTTService, SpeechmaticsSTTSettings +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = SpeechmaticsSTTService(api_key=os.getenv("SPEECHMATICS_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating Speechmatics STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=SpeechmaticsSTTSettings(language=Language.ES)) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55i-update-settings-whisper-api-stt.py b/examples/foundational/55i-update-settings-whisper-api-stt.py new file mode 100644 index 000000000..b2655bc86 --- /dev/null +++ b/examples/foundational/55i-update-settings-whisper-api-stt.py @@ -0,0 +1,137 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.whisper.base_stt import BaseWhisperSTTService, BaseWhisperSTTSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = BaseWhisperSTTService( + model="whisper-1", + api_key=os.getenv("OPENAI_API_KEY"), + ) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info( + 'Updating Whisper API STT settings: prompt="Transcribe in English", temperature=0.5' + ) + await task.queue_frame( + STTUpdateSettingsFrame( + update=BaseWhisperSTTSettings(prompt="Transcribe in English", temperature=0.5) + ) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55j-update-settings-sarvam-stt.py b/examples/foundational/55j-update-settings-sarvam-stt.py new file mode 100644 index 000000000..a4d0cd192 --- /dev/null +++ b/examples/foundational/55j-update-settings-sarvam-stt.py @@ -0,0 +1,131 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.sarvam.stt import SarvamSTTService, SarvamSTTSettings +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = SarvamSTTService(api_key=os.getenv("SARVAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating Sarvam STT settings: language=hi") + await task.queue_frame( + STTUpdateSettingsFrame(update=SarvamSTTSettings(language=Language.HI)) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55k-update-settings-soniox-stt.py b/examples/foundational/55k-update-settings-soniox-stt.py new file mode 100644 index 000000000..5f192580b --- /dev/null +++ b/examples/foundational/55k-update-settings-soniox-stt.py @@ -0,0 +1,131 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.soniox.stt import SonioxSTTService, SonioxSTTSettings +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = SonioxSTTService(api_key=os.getenv("SONIOX_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating Soniox STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=SonioxSTTSettings(language=Language.ES)) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55l-update-settings-aws-transcribe-stt.py b/examples/foundational/55l-update-settings-aws-transcribe-stt.py new file mode 100644 index 000000000..469efe32e --- /dev/null +++ b/examples/foundational/55l-update-settings-aws-transcribe-stt.py @@ -0,0 +1,131 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.aws.stt import AWSTranscribeSTTService, AWSTranscribeSTTSettings +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = AWSTranscribeSTTService() + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating AWS Transcribe STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=AWSTranscribeSTTSettings(language=Language.ES)) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55m-update-settings-cartesia-stt.py b/examples/foundational/55m-update-settings-cartesia-stt.py new file mode 100644 index 000000000..0ed1aa169 --- /dev/null +++ b/examples/foundational/55m-update-settings-cartesia-stt.py @@ -0,0 +1,131 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.stt import CartesiaSTTService, CartesiaSTTSettings +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = CartesiaSTTService(api_key=os.getenv("CARTESIA_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating Cartesia STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=CartesiaSTTSettings(language=Language.ES)) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55n-update-settings-cartesia-tts.py b/examples/foundational/55n-update-settings-cartesia-tts.py new file mode 100644 index 000000000..afa9bbeb5 --- /dev/null +++ b/examples/foundational/55n-update-settings-cartesia-tts.py @@ -0,0 +1,131 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService, CartesiaTTSSettings +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +# We use lambdas to defer transport parameter creation until the transport +# type is selected at runtime. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), # Transport user input + stt, + user_aggregator, # User responses + llm, # LLM + tts, # TTS + transport.output(), # Transport bot output + assistant_aggregator, # Assistant spoken responses + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + # Kick off the conversation. + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating Cartesia TTS settings: speed=fast") + await task.queue_frame(TTSUpdateSettingsFrame(update=CartesiaTTSSettings(speed="fast"))) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55o-update-settings-elevenlabs-tts.py b/examples/foundational/55o-update-settings-elevenlabs-tts.py new file mode 100644 index 000000000..65ccfc41a --- /dev/null +++ b/examples/foundational/55o-update-settings-elevenlabs-tts.py @@ -0,0 +1,130 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.elevenlabs.tts import ElevenLabsTTSService, ElevenLabsTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = ElevenLabsTTSService( + api_key=os.getenv("ELEVENLABS_API_KEY"), + voice_id=os.getenv("ELEVENLABS_VOICE_ID"), + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating ElevenLabs TTS settings: speed=1.2, stability=0.3") + await task.queue_frame( + TTSUpdateSettingsFrame(update=ElevenLabsTTSSettings(speed=1.2, stability=0.3)) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55p-update-settings-openai-tts.py b/examples/foundational/55p-update-settings-openai-tts.py new file mode 100644 index 000000000..ebcb1cd54 --- /dev/null +++ b/examples/foundational/55p-update-settings-openai-tts.py @@ -0,0 +1,126 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.openai.tts import OpenAITTSService, OpenAITTSSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = OpenAITTSService(api_key=os.getenv("OPENAI_API_KEY")) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + audio_out_sample_rate=24000, + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating OpenAI TTS settings: speed=1.5") + await task.queue_frame(TTSUpdateSettingsFrame(update=OpenAITTSSettings(speed=1.5))) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55q-update-settings-deepgram-tts.py b/examples/foundational/55q-update-settings-deepgram-tts.py new file mode 100644 index 000000000..166f6fc26 --- /dev/null +++ b/examples/foundational/55q-update-settings-deepgram-tts.py @@ -0,0 +1,127 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.deepgram.tts import DeepgramTTSService, DeepgramTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info('Updating Deepgram TTS settings: voice="aura-2-luna-en"') + await task.queue_frame( + TTSUpdateSettingsFrame(update=DeepgramTTSSettings(voice="aura-2-luna-en")) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55r-update-settings-azure-tts.py b/examples/foundational/55r-update-settings-azure-tts.py new file mode 100644 index 000000000..5aa63bef8 --- /dev/null +++ b/examples/foundational/55r-update-settings-azure-tts.py @@ -0,0 +1,130 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.azure.tts import AzureTTSService, AzureTTSSettings +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = AzureTTSService( + api_key=os.getenv("AZURE_SPEECH_API_KEY"), + region=os.getenv("AZURE_SPEECH_REGION"), + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info('Updating Azure TTS settings: rate="1.3", style="cheerful"') + await task.queue_frame( + TTSUpdateSettingsFrame(update=AzureTTSSettings(rate="1.3", style="cheerful")) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55s-update-settings-google-http-tts.py b/examples/foundational/55s-update-settings-google-http-tts.py new file mode 100644 index 000000000..5033d2b28 --- /dev/null +++ b/examples/foundational/55s-update-settings-google-http-tts.py @@ -0,0 +1,127 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.google.tts import GoogleHttpTTSService, GoogleHttpTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = GoogleHttpTTSService(credentials=os.getenv("GOOGLE_TEST_CREDENTIALS")) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating Google HTTP TTS settings: speaking_rate=1.4") + await task.queue_frame( + TTSUpdateSettingsFrame(update=GoogleHttpTTSSettings(speaking_rate=1.4)) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55t-update-settings-playht-tts.py b/examples/foundational/55t-update-settings-playht-tts.py new file mode 100644 index 000000000..37bf48897 --- /dev/null +++ b/examples/foundational/55t-update-settings-playht-tts.py @@ -0,0 +1,129 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.playht.tts import PlayHTTTSService, PlayHTTTSSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = PlayHTTTSService( + api_key=os.getenv("PLAYHT_API_KEY"), + user_id=os.getenv("PLAYHT_USER_ID"), + voice_url=os.getenv("PLAYHT_VOICE_URL", ""), + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating PlayHT TTS settings: speed=1.3") + await task.queue_frame(TTSUpdateSettingsFrame(update=PlayHTTTSSettings(speed=1.3))) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55u-update-settings-rime-tts.py b/examples/foundational/55u-update-settings-rime-tts.py new file mode 100644 index 000000000..31de262aa --- /dev/null +++ b/examples/foundational/55u-update-settings-rime-tts.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.rime.tts import RimeTTSService, RimeTTSSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = RimeTTSService( + api_key=os.getenv("RIME_API_KEY"), + voice_id="eva", + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating Rime TTS settings: speedAlpha=1.5") + await task.queue_frame(TTSUpdateSettingsFrame(update=RimeTTSSettings(speedAlpha=1.5))) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55v-update-settings-lmnt-tts.py b/examples/foundational/55v-update-settings-lmnt-tts.py new file mode 100644 index 000000000..f61026735 --- /dev/null +++ b/examples/foundational/55v-update-settings-lmnt-tts.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.lmnt.tts import LmntTTSService, LmntTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = LmntTTSService( + api_key=os.getenv("LMNT_API_KEY"), + voice_id="lily", + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info('Updating LMNT TTS settings: voice="lily"') + await task.queue_frame(TTSUpdateSettingsFrame(update=LmntTTSSettings(voice="lily"))) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55w-update-settings-fish-tts.py b/examples/foundational/55w-update-settings-fish-tts.py new file mode 100644 index 000000000..85d942ad7 --- /dev/null +++ b/examples/foundational/55w-update-settings-fish-tts.py @@ -0,0 +1,127 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.fish.tts import FishAudioTTSService, FishAudioTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = FishAudioTTSService(api_key=os.getenv("FISH_API_KEY")) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating Fish Audio TTS settings: prosody_speed=1.5") + await task.queue_frame( + TTSUpdateSettingsFrame(update=FishAudioTTSSettings(prosody_speed=1.5)) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55x-update-settings-minimax-tts.py b/examples/foundational/55x-update-settings-minimax-tts.py new file mode 100644 index 000000000..f5d74b0f3 --- /dev/null +++ b/examples/foundational/55x-update-settings-minimax-tts.py @@ -0,0 +1,133 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +import aiohttp +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.minimax.tts import MiniMaxHttpTTSService, MiniMaxTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + async with aiohttp.ClientSession() as session: + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = MiniMaxHttpTTSService( + api_key=os.getenv("MINIMAX_API_KEY", ""), + group_id=os.getenv("MINIMAX_GROUP_ID", ""), + aiohttp_session=session, + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info('Updating MiniMax TTS settings: speed=1.5, emotion="happy"') + await task.queue_frame( + TTSUpdateSettingsFrame(update=MiniMaxTTSSettings(speed=1.5, emotion="happy")) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55y-update-settings-groq-tts.py b/examples/foundational/55y-update-settings-groq-tts.py new file mode 100644 index 000000000..a7f4936ee --- /dev/null +++ b/examples/foundational/55y-update-settings-groq-tts.py @@ -0,0 +1,125 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.groq.tts import GroqTTSService, GroqTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = GroqTTSService(api_key=os.getenv("GROQ_API_KEY")) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating Groq TTS settings: speed=1.5") + await task.queue_frame(TTSUpdateSettingsFrame(update=GroqTTSSettings(speed=1.5))) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55z-update-settings-hume-tts.py b/examples/foundational/55z-update-settings-hume-tts.py new file mode 100644 index 000000000..f4ec141ca --- /dev/null +++ b/examples/foundational/55z-update-settings-hume-tts.py @@ -0,0 +1,134 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.hume.tts import HumeTTSService, HumeTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = HumeTTSService( + api_key=os.getenv("HUME_API_KEY"), + voice_id="ee7ea9f8-c99a-4516-a65d-80235fa3acdc", + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info( + 'Updating Hume TTS settings: speed=1.5, description="Speak with excitement"' + ) + await task.queue_frame( + TTSUpdateSettingsFrame( + update=HumeTTSSettings(speed=1.5, description="Speak with excitement") + ) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55za-update-settings-neuphonic-tts.py b/examples/foundational/55za-update-settings-neuphonic-tts.py new file mode 100644 index 000000000..d76ba5c89 --- /dev/null +++ b/examples/foundational/55za-update-settings-neuphonic-tts.py @@ -0,0 +1,125 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.neuphonic.tts import NeuphonicTTSService, NeuphonicTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = NeuphonicTTSService(api_key=os.getenv("NEUPHONIC_API_KEY")) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating Neuphonic TTS settings: speed=1.4") + await task.queue_frame(TTSUpdateSettingsFrame(update=NeuphonicTTSSettings(speed=1.4))) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zb-update-settings-inworld-tts.py b/examples/foundational/55zb-update-settings-inworld-tts.py new file mode 100644 index 000000000..159d75f8c --- /dev/null +++ b/examples/foundational/55zb-update-settings-inworld-tts.py @@ -0,0 +1,129 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.inworld.tts import InworldTTSService, InworldTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = InworldTTSService(api_key=os.getenv("INWORLD_API_KEY")) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating Inworld TTS settings: speaking_rate=1.3, temperature=0.8") + await task.queue_frame( + TTSUpdateSettingsFrame( + update=InworldTTSSettings(speaking_rate=1.3, temperature=0.8) + ) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zc-update-settings-gemini-tts.py b/examples/foundational/55zc-update-settings-gemini-tts.py new file mode 100644 index 000000000..958115630 --- /dev/null +++ b/examples/foundational/55zc-update-settings-gemini-tts.py @@ -0,0 +1,129 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.google.tts import GeminiTTSService, GeminiTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = GeminiTTSService(api_key=os.getenv("GOOGLE_API_KEY")) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info('Updating Gemini TTS settings: prompt="Speak slowly and dramatically"') + await task.queue_frame( + TTSUpdateSettingsFrame( + update=GeminiTTSSettings(prompt="Speak slowly and dramatically") + ) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zd-update-settings-aws-polly-tts.py b/examples/foundational/55zd-update-settings-aws-polly-tts.py new file mode 100644 index 000000000..951347ddd --- /dev/null +++ b/examples/foundational/55zd-update-settings-aws-polly-tts.py @@ -0,0 +1,127 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.aws.tts import AWSPollyTTSService, AWSPollyTTSSettings +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = AWSPollyTTSService() + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info('Updating AWS Polly TTS settings: rate="fast", pitch="+10%"') + await task.queue_frame( + TTSUpdateSettingsFrame(update=AWSPollyTTSSettings(rate="fast", pitch="+10%")) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55ze-update-settings-sarvam-tts.py b/examples/foundational/55ze-update-settings-sarvam-tts.py new file mode 100644 index 000000000..3674d2767 --- /dev/null +++ b/examples/foundational/55ze-update-settings-sarvam-tts.py @@ -0,0 +1,125 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.sarvam.tts import SarvamTTSService, SarvamWSTTSSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = SarvamTTSService(api_key=os.getenv("SARVAM_API_KEY")) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating Sarvam TTS settings: pace=1.3") + await task.queue_frame(TTSUpdateSettingsFrame(update=SarvamWSTTSSettings(pace=1.3))) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zf-update-settings-camb-tts.py b/examples/foundational/55zf-update-settings-camb-tts.py new file mode 100644 index 000000000..cc629ae03 --- /dev/null +++ b/examples/foundational/55zf-update-settings-camb-tts.py @@ -0,0 +1,129 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.camb.tts import CambTTSService, CambTTSSettings +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CambTTSService(api_key=os.getenv("CAMB_API_KEY")) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info('Updating Camb TTS settings: user_instructions="Speak enthusiastically"') + await task.queue_frame( + TTSUpdateSettingsFrame( + update=CambTTSSettings(user_instructions="Speak enthusiastically") + ) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zg-update-settings-hathora-tts.py b/examples/foundational/55zg-update-settings-hathora-tts.py new file mode 100644 index 000000000..2c59029b8 --- /dev/null +++ b/examples/foundational/55zg-update-settings-hathora-tts.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.hathora.tts import HathoraTTSService, HathoraTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = HathoraTTSService( + api_key=os.getenv("HATHORA_API_KEY"), + model="hathora-ai/polar", + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating Hathora TTS settings: speed=1.3") + await task.queue_frame(TTSUpdateSettingsFrame(update=HathoraTTSSettings(speed=1.3))) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zh-update-settings-resembleai-tts.py b/examples/foundational/55zh-update-settings-resembleai-tts.py new file mode 100644 index 000000000..39ab30eee --- /dev/null +++ b/examples/foundational/55zh-update-settings-resembleai-tts.py @@ -0,0 +1,132 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.resembleai.tts import ResembleAITTSService, ResembleAITTSSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = ResembleAITTSService( + api_key=os.getenv("RESEMBLEAI_API_KEY"), + voice_id=os.getenv("RESEMBLEAI_VOICE_ID", ""), + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating ResembleAI TTS settings: voice (changed)") + await task.queue_frame( + TTSUpdateSettingsFrame( + update=ResembleAITTSSettings(voice=os.getenv("RESEMBLEAI_VOICE_ID_ALT", "")) + ) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zi-update-settings-openai-llm.py b/examples/foundational/55zi-update-settings-openai-llm.py new file mode 100644 index 000000000..d4befeddb --- /dev/null +++ b/examples/foundational/55zi-update-settings-openai-llm.py @@ -0,0 +1,131 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating OpenAI LLM settings: temperature=0.1") + await task.queue_frame( + LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1)) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zj-update-settings-anthropic-llm.py b/examples/foundational/55zj-update-settings-anthropic-llm.py new file mode 100644 index 000000000..e2a2af3db --- /dev/null +++ b/examples/foundational/55zj-update-settings-anthropic-llm.py @@ -0,0 +1,130 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.anthropic.llm import AnthropicLLMService, AnthropicLLMSettings +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = AnthropicLLMService(api_key=os.getenv("ANTHROPIC_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating Anthropic LLM settings: temperature=0.1") + await task.queue_frame( + LLMUpdateSettingsFrame(update=AnthropicLLMSettings(temperature=0.1)) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zk-update-settings-google-llm.py b/examples/foundational/55zk-update-settings-google-llm.py new file mode 100644 index 000000000..6d7ba9573 --- /dev/null +++ b/examples/foundational/55zk-update-settings-google-llm.py @@ -0,0 +1,130 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.google.llm import GoogleLLMService, GoogleLLMSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating Google LLM settings: temperature=0.1") + await task.queue_frame( + LLMUpdateSettingsFrame(update=GoogleLLMSettings(temperature=0.1)) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zl-update-settings-openai-realtime.py b/examples/foundational/55zl-update-settings-openai-realtime.py new file mode 100644 index 000000000..9f6daadfd --- /dev/null +++ b/examples/foundational/55zl-update-settings-openai-realtime.py @@ -0,0 +1,118 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.openai.realtime.llm import ( + OpenAIRealtimeLLMService, + OpenAIRealtimeLLMSettings, +) +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + llm = OpenAIRealtimeLLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair(context) + + pipeline = Pipeline( + [ + transport.input(), + user_aggregator, + llm, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating OpenAI Realtime LLM settings: temperature=0.1") + await task.queue_frame( + LLMUpdateSettingsFrame(update=OpenAIRealtimeLLMSettings(temperature=0.1)) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zm-update-settings-gemini-live.py b/examples/foundational/55zm-update-settings-gemini-live.py new file mode 100644 index 000000000..0a5b38529 --- /dev/null +++ b/examples/foundational/55zm-update-settings-gemini-live.py @@ -0,0 +1,118 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.google.gemini_live.llm import ( + GeminiLiveLLMService, + GeminiLiveLLMSettings, +) +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + llm = GeminiLiveLLMService(api_key=os.getenv("GOOGLE_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair(context) + + pipeline = Pipeline( + [ + transport.input(), + user_aggregator, + llm, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating Gemini Live LLM settings: temperature=0.1") + await task.queue_frame( + LLMUpdateSettingsFrame(update=GeminiLiveLLMSettings(temperature=0.1)) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zn-update-settings-ultravox-realtime.py b/examples/foundational/55zn-update-settings-ultravox-realtime.py new file mode 100644 index 000000000..7fcb25d83 --- /dev/null +++ b/examples/foundational/55zn-update-settings-ultravox-realtime.py @@ -0,0 +1,124 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.ultravox.llm import ( + OneShotInputParams, + UltravoxRealtimeLLMService, + UltravoxRealtimeLLMSettings, +) +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + llm = UltravoxRealtimeLLMService( + params=OneShotInputParams( + api_key=os.getenv("ULTRAVOX_API_KEY"), + system_prompt="You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + ), + ) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair(context) + + pipeline = Pipeline( + [ + transport.input(), + user_aggregator, + llm, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating Ultravox Realtime LLM settings: temperature=0.1") + await task.queue_frame( + LLMUpdateSettingsFrame(update=UltravoxRealtimeLLMSettings(temperature=0.1)) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zo-update-settings-grok-realtime.py b/examples/foundational/55zo-update-settings-grok-realtime.py new file mode 100644 index 000000000..6366ee933 --- /dev/null +++ b/examples/foundational/55zo-update-settings-grok-realtime.py @@ -0,0 +1,118 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.grok.realtime.llm import ( + GrokRealtimeLLMService, + GrokRealtimeLLMSettings, +) +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + llm = GrokRealtimeLLMService(api_key=os.getenv("XAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair(context) + + pipeline = Pipeline( + [ + transport.input(), + user_aggregator, + llm, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating Grok Realtime LLM settings: temperature=0.1") + await task.queue_frame( + LLMUpdateSettingsFrame(update=GrokRealtimeLLMSettings(temperature=0.1)) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zp-update-settings-aws-bedrock-llm.py b/examples/foundational/55zp-update-settings-aws-bedrock-llm.py new file mode 100644 index 000000000..0b26774ae --- /dev/null +++ b/examples/foundational/55zp-update-settings-aws-bedrock-llm.py @@ -0,0 +1,130 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.aws.llm import AWSBedrockLLMService, AWSBedrockLLMSettings +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = AWSBedrockLLMService(model="anthropic.claude-sonnet-4-20250514-v1:0") + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + async def update_settings(): + await asyncio.sleep(10) + logger.info("Updating AWS Bedrock LLM settings: temperature=0.1") + await task.queue_frame( + LLMUpdateSettingsFrame(update=AWSBedrockLLMSettings(temperature=0.1)) + ) + + asyncio.create_task(update_settings()) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() From 7910f20e144b1bcf64707ede9066780f62bad69f Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 10:07:33 -0500 Subject: [PATCH 021/189] Update comment in Azure TTS explaining how we could support dynamic settings updates in the future --- src/pipecat/services/azure/stt.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/pipecat/services/azure/stt.py b/src/pipecat/services/azure/stt.py index 7f9d3f1ba..18fc9b108 100644 --- a/src/pipecat/services/azure/stt.py +++ b/src/pipecat/services/azure/stt.py @@ -146,8 +146,10 @@ class AzureSTTService(STTService): # if "language" in changed: # self._speech_config.speech_recognition_language = self._settings.language # if self._speech_recognizer: - # self._speech_recognizer.stop_continuous_recognition_async() - # self._speech_recognizer.start_continuous_recognition_async() + # # Requires refactoring to set up and tear down recognizer, as + # # language is applied at recognizer initialization + # await self._disconnect() + # await self._connect() self._warn_unhandled_updated_settings(changed) From a7ada79fd9e138399bc0710c9618b37461deb6b7 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 10:50:53 -0500 Subject: [PATCH 022/189] Fix `ElevenLabsRealtimeSTTService`: - Move `CommitStrategy` up in the file so it could be used by `ElevenLabsRealtimeSTTSettings` - Fix a bug where `run_tts` would erroneously try to reconnect if a reconnection was already in flight (like a reconnection triggered by `_update_settings`) --- src/pipecat/services/elevenlabs/stt.py | 35 ++++++++++++++++++-------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/src/pipecat/services/elevenlabs/stt.py b/src/pipecat/services/elevenlabs/stt.py index 0ef137006..e5b7b3843 100644 --- a/src/pipecat/services/elevenlabs/stt.py +++ b/src/pipecat/services/elevenlabs/stt.py @@ -11,6 +11,7 @@ using segmented audio processing. The service uploads audio files and receives transcription results directly. """ +import asyncio import base64 import io import json @@ -169,6 +170,13 @@ def language_to_elevenlabs_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=False) +class CommitStrategy(str, Enum): + """Commit strategies for transcript segmentation.""" + + MANUAL = "manual" + VAD = "vad" + + @dataclass class ElevenLabsSTTSettings(STTSettings): """Settings for the ElevenLabs file-based STT service. @@ -426,13 +434,6 @@ def audio_format_from_sample_rate(sample_rate: int) -> str: return "pcm_16000" -class CommitStrategy(str, Enum): - """Commit strategies for transcript segmentation.""" - - MANUAL = "manual" - VAD = "vad" - - class ElevenLabsRealtimeSTTService(WebsocketSTTService): """Speech-to-text service using ElevenLabs' Realtime WebSocket API. @@ -515,6 +516,9 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): self._audio_format = "" # initialized in start() self._receive_task = None + self._connected_event = asyncio.Event() + self._connected_event.set() + self._settings = ElevenLabsRealtimeSTTSettings( model=model, language=params.language_code, @@ -630,6 +634,9 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): Yields: None - transcription results are handled via WebSocket responses. """ + # Wait for any in-flight _connect() to finish before checking state + await self._connected_event.wait() + # Reconnect if connection is closed if not self._websocket or self._websocket.state is State.CLOSED: await self._connect() @@ -654,12 +661,18 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): async def _connect(self): """Establish WebSocket connection to ElevenLabs Realtime STT.""" - await self._connect_websocket() + self._connected_event.clear() + try: + await self._connect_websocket() - await super()._connect() + await super()._connect() - if self._websocket and not self._receive_task: - self._receive_task = self.create_task(self._receive_task_handler(self._report_error)) + if self._websocket and not self._receive_task: + self._receive_task = self.create_task( + self._receive_task_handler(self._report_error) + ) + finally: + self._connected_event.set() async def _disconnect(self): """Close WebSocket connection and cleanup tasks.""" From e98bb1df66889bbf651a41d63ec522ff04a8269f Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 11:06:33 -0500 Subject: [PATCH 023/189] Simplify 55* examples: inline the settings update directly in the on_client_connected handler instead of wrapping it in a separate async task --- .../55a-update-settings-deepgram-stt.py | 13 +++++-------- .../55b-update-settings-azure-stt.py | 13 +++++-------- .../55c-update-settings-google-stt.py | 13 +++++-------- .../55d-update-settings-assemblyai-stt.py | 13 +++++-------- .../55e-update-settings-gladia-stt.py | 13 +++++-------- ...update-settings-elevenlabs-realtime-stt.py | 13 +++++-------- .../55g-update-settings-elevenlabs-stt.py | 13 +++++-------- .../55h-update-settings-speechmatics-stt.py | 13 +++++-------- .../55i-update-settings-whisper-api-stt.py | 19 ++++++++----------- .../55j-update-settings-sarvam-stt.py | 13 +++++-------- .../55k-update-settings-soniox-stt.py | 13 +++++-------- .../55l-update-settings-aws-transcribe-stt.py | 13 +++++-------- .../55m-update-settings-cartesia-stt.py | 13 +++++-------- .../55n-update-settings-cartesia-tts.py | 9 +++------ .../55o-update-settings-elevenlabs-tts.py | 13 +++++-------- .../55p-update-settings-openai-tts.py | 9 +++------ .../55q-update-settings-deepgram-tts.py | 13 +++++-------- .../55r-update-settings-azure-tts.py | 13 +++++-------- .../55s-update-settings-google-http-tts.py | 13 +++++-------- .../55t-update-settings-playht-tts.py | 9 +++------ .../55u-update-settings-rime-tts.py | 9 +++------ .../55v-update-settings-lmnt-tts.py | 9 +++------ .../55w-update-settings-fish-tts.py | 13 +++++-------- .../55x-update-settings-minimax-tts.py | 13 +++++-------- .../55y-update-settings-groq-tts.py | 9 +++------ .../55z-update-settings-hume-tts.py | 17 ++++++----------- .../55za-update-settings-neuphonic-tts.py | 9 +++------ .../55zb-update-settings-inworld-tts.py | 15 +++++---------- .../55zc-update-settings-gemini-tts.py | 15 +++++---------- .../55zd-update-settings-aws-polly-tts.py | 13 +++++-------- .../55ze-update-settings-sarvam-tts.py | 9 +++------ .../55zf-update-settings-camb-tts.py | 15 ++++++--------- .../55zg-update-settings-hathora-tts.py | 9 +++------ .../55zh-update-settings-resembleai-tts.py | 15 ++++++--------- .../55zi-update-settings-openai-llm.py | 11 +++-------- .../55zj-update-settings-anthropic-llm.py | 11 +++-------- .../55zk-update-settings-google-llm.py | 11 +++-------- .../55zl-update-settings-openai-realtime.py | 13 +++++-------- .../55zm-update-settings-gemini-live.py | 13 +++++-------- .../55zn-update-settings-ultravox-realtime.py | 13 +++++-------- .../55zo-update-settings-grok-realtime.py | 13 +++++-------- .../55zp-update-settings-aws-bedrock-llm.py | 13 +++++-------- 42 files changed, 192 insertions(+), 330 deletions(-) diff --git a/examples/foundational/55a-update-settings-deepgram-stt.py b/examples/foundational/55a-update-settings-deepgram-stt.py index bf1247ed6..aea9475a8 100644 --- a/examples/foundational/55a-update-settings-deepgram-stt.py +++ b/examples/foundational/55a-update-settings-deepgram-stt.py @@ -100,14 +100,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating Deepgram STT settings: language=es") - await task.queue_frame( - STTUpdateSettingsFrame(update=DeepgramSTTSettings(language=Language.ES)) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating Deepgram STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=DeepgramSTTSettings(language=Language.ES)) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55b-update-settings-azure-stt.py b/examples/foundational/55b-update-settings-azure-stt.py index 9ff2a5af7..7fd0d2ca4 100644 --- a/examples/foundational/55b-update-settings-azure-stt.py +++ b/examples/foundational/55b-update-settings-azure-stt.py @@ -103,14 +103,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating Azure STT settings: language=es") - await task.queue_frame( - STTUpdateSettingsFrame(update=AzureSTTSettings(language=Language.ES)) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating Azure STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=AzureSTTSettings(language=Language.ES)) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55c-update-settings-google-stt.py b/examples/foundational/55c-update-settings-google-stt.py index b25046fe3..dd33bfe75 100644 --- a/examples/foundational/55c-update-settings-google-stt.py +++ b/examples/foundational/55c-update-settings-google-stt.py @@ -100,14 +100,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating Google STT settings: language=es") - await task.queue_frame( - STTUpdateSettingsFrame(update=GoogleSTTSettings(language=Language.ES)) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating Google STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=GoogleSTTSettings(language=Language.ES)) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55d-update-settings-assemblyai-stt.py b/examples/foundational/55d-update-settings-assemblyai-stt.py index 488f17f54..6d6a2532e 100644 --- a/examples/foundational/55d-update-settings-assemblyai-stt.py +++ b/examples/foundational/55d-update-settings-assemblyai-stt.py @@ -100,14 +100,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating AssemblyAI STT settings: language=es") - await task.queue_frame( - STTUpdateSettingsFrame(update=AssemblyAISTTSettings(language=Language.ES)) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating AssemblyAI STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=AssemblyAISTTSettings(language=Language.ES)) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55e-update-settings-gladia-stt.py b/examples/foundational/55e-update-settings-gladia-stt.py index 75e524d08..a2c6f21fe 100644 --- a/examples/foundational/55e-update-settings-gladia-stt.py +++ b/examples/foundational/55e-update-settings-gladia-stt.py @@ -100,14 +100,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating Gladia STT settings: language=es") - await task.queue_frame( - STTUpdateSettingsFrame(update=GladiaSTTSettings(language=Language.ES)) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating Gladia STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=GladiaSTTSettings(language=Language.ES)) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55f-update-settings-elevenlabs-realtime-stt.py b/examples/foundational/55f-update-settings-elevenlabs-realtime-stt.py index f74ea709e..9aee04fbb 100644 --- a/examples/foundational/55f-update-settings-elevenlabs-realtime-stt.py +++ b/examples/foundational/55f-update-settings-elevenlabs-realtime-stt.py @@ -103,14 +103,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating ElevenLabs Realtime STT settings: language=es") - await task.queue_frame( - STTUpdateSettingsFrame(update=ElevenLabsRealtimeSTTSettings(language=Language.ES)) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating ElevenLabs Realtime STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=ElevenLabsRealtimeSTTSettings(language=Language.ES)) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55g-update-settings-elevenlabs-stt.py b/examples/foundational/55g-update-settings-elevenlabs-stt.py index dd5191218..33844935a 100644 --- a/examples/foundational/55g-update-settings-elevenlabs-stt.py +++ b/examples/foundational/55g-update-settings-elevenlabs-stt.py @@ -105,14 +105,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating ElevenLabs STT settings: language=es") - await task.queue_frame( - STTUpdateSettingsFrame(update=ElevenLabsSTTSettings(language=Language.ES)) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating ElevenLabs STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=ElevenLabsSTTSettings(language=Language.ES)) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55h-update-settings-speechmatics-stt.py b/examples/foundational/55h-update-settings-speechmatics-stt.py index 82e207207..46ed44016 100644 --- a/examples/foundational/55h-update-settings-speechmatics-stt.py +++ b/examples/foundational/55h-update-settings-speechmatics-stt.py @@ -100,14 +100,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating Speechmatics STT settings: language=es") - await task.queue_frame( - STTUpdateSettingsFrame(update=SpeechmaticsSTTSettings(language=Language.ES)) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating Speechmatics STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=SpeechmaticsSTTSettings(language=Language.ES)) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55i-update-settings-whisper-api-stt.py b/examples/foundational/55i-update-settings-whisper-api-stt.py index b2655bc86..27581a819 100644 --- a/examples/foundational/55i-update-settings-whisper-api-stt.py +++ b/examples/foundational/55i-update-settings-whisper-api-stt.py @@ -102,18 +102,15 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info( - 'Updating Whisper API STT settings: prompt="Transcribe in English", temperature=0.5' + await asyncio.sleep(10) + logger.info( + 'Updating Whisper API STT settings: prompt="Transcribe in English", temperature=0.5' + ) + await task.queue_frame( + STTUpdateSettingsFrame( + update=BaseWhisperSTTSettings(prompt="Transcribe in English", temperature=0.5) ) - await task.queue_frame( - STTUpdateSettingsFrame( - update=BaseWhisperSTTSettings(prompt="Transcribe in English", temperature=0.5) - ) - ) - - asyncio.create_task(update_settings()) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55j-update-settings-sarvam-stt.py b/examples/foundational/55j-update-settings-sarvam-stt.py index a4d0cd192..b7f619987 100644 --- a/examples/foundational/55j-update-settings-sarvam-stt.py +++ b/examples/foundational/55j-update-settings-sarvam-stt.py @@ -100,14 +100,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating Sarvam STT settings: language=hi") - await task.queue_frame( - STTUpdateSettingsFrame(update=SarvamSTTSettings(language=Language.HI)) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating Sarvam STT settings: language=hi") + await task.queue_frame( + STTUpdateSettingsFrame(update=SarvamSTTSettings(language=Language.HI)) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55k-update-settings-soniox-stt.py b/examples/foundational/55k-update-settings-soniox-stt.py index 5f192580b..2cbcd44f4 100644 --- a/examples/foundational/55k-update-settings-soniox-stt.py +++ b/examples/foundational/55k-update-settings-soniox-stt.py @@ -100,14 +100,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating Soniox STT settings: language=es") - await task.queue_frame( - STTUpdateSettingsFrame(update=SonioxSTTSettings(language=Language.ES)) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating Soniox STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=SonioxSTTSettings(language=Language.ES)) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55l-update-settings-aws-transcribe-stt.py b/examples/foundational/55l-update-settings-aws-transcribe-stt.py index 469efe32e..0f4c18981 100644 --- a/examples/foundational/55l-update-settings-aws-transcribe-stt.py +++ b/examples/foundational/55l-update-settings-aws-transcribe-stt.py @@ -100,14 +100,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating AWS Transcribe STT settings: language=es") - await task.queue_frame( - STTUpdateSettingsFrame(update=AWSTranscribeSTTSettings(language=Language.ES)) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating AWS Transcribe STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=AWSTranscribeSTTSettings(language=Language.ES)) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55m-update-settings-cartesia-stt.py b/examples/foundational/55m-update-settings-cartesia-stt.py index 0ed1aa169..6ba27a85e 100644 --- a/examples/foundational/55m-update-settings-cartesia-stt.py +++ b/examples/foundational/55m-update-settings-cartesia-stt.py @@ -100,14 +100,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating Cartesia STT settings: language=es") - await task.queue_frame( - STTUpdateSettingsFrame(update=CartesiaSTTSettings(language=Language.ES)) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating Cartesia STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=CartesiaSTTSettings(language=Language.ES)) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55n-update-settings-cartesia-tts.py b/examples/foundational/55n-update-settings-cartesia-tts.py index afa9bbeb5..38070765c 100644 --- a/examples/foundational/55n-update-settings-cartesia-tts.py +++ b/examples/foundational/55n-update-settings-cartesia-tts.py @@ -102,12 +102,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating Cartesia TTS settings: speed=fast") - await task.queue_frame(TTSUpdateSettingsFrame(update=CartesiaTTSSettings(speed="fast"))) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating Cartesia TTS settings: speed=fast") + await task.queue_frame(TTSUpdateSettingsFrame(update=CartesiaTTSSettings(speed="fast"))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55o-update-settings-elevenlabs-tts.py b/examples/foundational/55o-update-settings-elevenlabs-tts.py index 65ccfc41a..0ca72ba5b 100644 --- a/examples/foundational/55o-update-settings-elevenlabs-tts.py +++ b/examples/foundational/55o-update-settings-elevenlabs-tts.py @@ -99,14 +99,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating ElevenLabs TTS settings: speed=1.2, stability=0.3") - await task.queue_frame( - TTSUpdateSettingsFrame(update=ElevenLabsTTSSettings(speed=1.2, stability=0.3)) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating ElevenLabs TTS settings: speed=1.2, stability=0.3") + await task.queue_frame( + TTSUpdateSettingsFrame(update=ElevenLabsTTSSettings(speed=1.2, stability=0.3)) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55p-update-settings-openai-tts.py b/examples/foundational/55p-update-settings-openai-tts.py index ebcb1cd54..58e8efde1 100644 --- a/examples/foundational/55p-update-settings-openai-tts.py +++ b/examples/foundational/55p-update-settings-openai-tts.py @@ -97,12 +97,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating OpenAI TTS settings: speed=1.5") - await task.queue_frame(TTSUpdateSettingsFrame(update=OpenAITTSSettings(speed=1.5))) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating OpenAI TTS settings: speed=1.5") + await task.queue_frame(TTSUpdateSettingsFrame(update=OpenAITTSSettings(speed=1.5))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55q-update-settings-deepgram-tts.py b/examples/foundational/55q-update-settings-deepgram-tts.py index 166f6fc26..4b7f50ae8 100644 --- a/examples/foundational/55q-update-settings-deepgram-tts.py +++ b/examples/foundational/55q-update-settings-deepgram-tts.py @@ -96,14 +96,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info('Updating Deepgram TTS settings: voice="aura-2-luna-en"') - await task.queue_frame( - TTSUpdateSettingsFrame(update=DeepgramTTSSettings(voice="aura-2-luna-en")) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info('Updating Deepgram TTS settings: voice="aura-2-luna-en"') + await task.queue_frame( + TTSUpdateSettingsFrame(update=DeepgramTTSSettings(voice="aura-2-luna-en")) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55r-update-settings-azure-tts.py b/examples/foundational/55r-update-settings-azure-tts.py index 5aa63bef8..076901707 100644 --- a/examples/foundational/55r-update-settings-azure-tts.py +++ b/examples/foundational/55r-update-settings-azure-tts.py @@ -99,14 +99,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info('Updating Azure TTS settings: rate="1.3", style="cheerful"') - await task.queue_frame( - TTSUpdateSettingsFrame(update=AzureTTSSettings(rate="1.3", style="cheerful")) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info('Updating Azure TTS settings: rate="1.3", style="cheerful"') + await task.queue_frame( + TTSUpdateSettingsFrame(update=AzureTTSSettings(rate="1.3", style="cheerful")) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55s-update-settings-google-http-tts.py b/examples/foundational/55s-update-settings-google-http-tts.py index 5033d2b28..6c302411a 100644 --- a/examples/foundational/55s-update-settings-google-http-tts.py +++ b/examples/foundational/55s-update-settings-google-http-tts.py @@ -96,14 +96,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating Google HTTP TTS settings: speaking_rate=1.4") - await task.queue_frame( - TTSUpdateSettingsFrame(update=GoogleHttpTTSSettings(speaking_rate=1.4)) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating Google HTTP TTS settings: speaking_rate=1.4") + await task.queue_frame( + TTSUpdateSettingsFrame(update=GoogleHttpTTSSettings(speaking_rate=1.4)) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55t-update-settings-playht-tts.py b/examples/foundational/55t-update-settings-playht-tts.py index 37bf48897..ec468a81c 100644 --- a/examples/foundational/55t-update-settings-playht-tts.py +++ b/examples/foundational/55t-update-settings-playht-tts.py @@ -100,12 +100,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating PlayHT TTS settings: speed=1.3") - await task.queue_frame(TTSUpdateSettingsFrame(update=PlayHTTTSSettings(speed=1.3))) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating PlayHT TTS settings: speed=1.3") + await task.queue_frame(TTSUpdateSettingsFrame(update=PlayHTTTSSettings(speed=1.3))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55u-update-settings-rime-tts.py b/examples/foundational/55u-update-settings-rime-tts.py index 31de262aa..e95aeb830 100644 --- a/examples/foundational/55u-update-settings-rime-tts.py +++ b/examples/foundational/55u-update-settings-rime-tts.py @@ -99,12 +99,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating Rime TTS settings: speedAlpha=1.5") - await task.queue_frame(TTSUpdateSettingsFrame(update=RimeTTSSettings(speedAlpha=1.5))) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating Rime TTS settings: speedAlpha=1.5") + await task.queue_frame(TTSUpdateSettingsFrame(update=RimeTTSSettings(speedAlpha=1.5))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55v-update-settings-lmnt-tts.py b/examples/foundational/55v-update-settings-lmnt-tts.py index f61026735..c8e6a3e09 100644 --- a/examples/foundational/55v-update-settings-lmnt-tts.py +++ b/examples/foundational/55v-update-settings-lmnt-tts.py @@ -99,12 +99,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info('Updating LMNT TTS settings: voice="lily"') - await task.queue_frame(TTSUpdateSettingsFrame(update=LmntTTSSettings(voice="lily"))) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info('Updating LMNT TTS settings: voice="lily"') + await task.queue_frame(TTSUpdateSettingsFrame(update=LmntTTSSettings(voice="lily"))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55w-update-settings-fish-tts.py b/examples/foundational/55w-update-settings-fish-tts.py index 85d942ad7..be9049333 100644 --- a/examples/foundational/55w-update-settings-fish-tts.py +++ b/examples/foundational/55w-update-settings-fish-tts.py @@ -96,14 +96,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating Fish Audio TTS settings: prosody_speed=1.5") - await task.queue_frame( - TTSUpdateSettingsFrame(update=FishAudioTTSSettings(prosody_speed=1.5)) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating Fish Audio TTS settings: prosody_speed=1.5") + await task.queue_frame( + TTSUpdateSettingsFrame(update=FishAudioTTSSettings(prosody_speed=1.5)) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55x-update-settings-minimax-tts.py b/examples/foundational/55x-update-settings-minimax-tts.py index f5d74b0f3..306b8f2bd 100644 --- a/examples/foundational/55x-update-settings-minimax-tts.py +++ b/examples/foundational/55x-update-settings-minimax-tts.py @@ -102,14 +102,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info('Updating MiniMax TTS settings: speed=1.5, emotion="happy"') - await task.queue_frame( - TTSUpdateSettingsFrame(update=MiniMaxTTSSettings(speed=1.5, emotion="happy")) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info('Updating MiniMax TTS settings: speed=1.5, emotion="happy"') + await task.queue_frame( + TTSUpdateSettingsFrame(update=MiniMaxTTSSettings(speed=1.5, emotion="happy")) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55y-update-settings-groq-tts.py b/examples/foundational/55y-update-settings-groq-tts.py index a7f4936ee..e6ce851c6 100644 --- a/examples/foundational/55y-update-settings-groq-tts.py +++ b/examples/foundational/55y-update-settings-groq-tts.py @@ -96,12 +96,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating Groq TTS settings: speed=1.5") - await task.queue_frame(TTSUpdateSettingsFrame(update=GroqTTSSettings(speed=1.5))) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating Groq TTS settings: speed=1.5") + await task.queue_frame(TTSUpdateSettingsFrame(update=GroqTTSSettings(speed=1.5))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55z-update-settings-hume-tts.py b/examples/foundational/55z-update-settings-hume-tts.py index f4ec141ca..abab6abec 100644 --- a/examples/foundational/55z-update-settings-hume-tts.py +++ b/examples/foundational/55z-update-settings-hume-tts.py @@ -99,18 +99,13 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info( - 'Updating Hume TTS settings: speed=1.5, description="Speak with excitement"' + await asyncio.sleep(10) + logger.info('Updating Hume TTS settings: speed=1.5, description="Speak with excitement"') + await task.queue_frame( + TTSUpdateSettingsFrame( + update=HumeTTSSettings(speed=1.5, description="Speak with excitement") ) - await task.queue_frame( - TTSUpdateSettingsFrame( - update=HumeTTSSettings(speed=1.5, description="Speak with excitement") - ) - ) - - asyncio.create_task(update_settings()) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55za-update-settings-neuphonic-tts.py b/examples/foundational/55za-update-settings-neuphonic-tts.py index d76ba5c89..187594c7e 100644 --- a/examples/foundational/55za-update-settings-neuphonic-tts.py +++ b/examples/foundational/55za-update-settings-neuphonic-tts.py @@ -96,12 +96,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating Neuphonic TTS settings: speed=1.4") - await task.queue_frame(TTSUpdateSettingsFrame(update=NeuphonicTTSSettings(speed=1.4))) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating Neuphonic TTS settings: speed=1.4") + await task.queue_frame(TTSUpdateSettingsFrame(update=NeuphonicTTSSettings(speed=1.4))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zb-update-settings-inworld-tts.py b/examples/foundational/55zb-update-settings-inworld-tts.py index 159d75f8c..d9947c196 100644 --- a/examples/foundational/55zb-update-settings-inworld-tts.py +++ b/examples/foundational/55zb-update-settings-inworld-tts.py @@ -96,16 +96,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating Inworld TTS settings: speaking_rate=1.3, temperature=0.8") - await task.queue_frame( - TTSUpdateSettingsFrame( - update=InworldTTSSettings(speaking_rate=1.3, temperature=0.8) - ) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating Inworld TTS settings: speaking_rate=1.3, temperature=0.8") + await task.queue_frame( + TTSUpdateSettingsFrame(update=InworldTTSSettings(speaking_rate=1.3, temperature=0.8)) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zc-update-settings-gemini-tts.py b/examples/foundational/55zc-update-settings-gemini-tts.py index 958115630..0bf878871 100644 --- a/examples/foundational/55zc-update-settings-gemini-tts.py +++ b/examples/foundational/55zc-update-settings-gemini-tts.py @@ -96,16 +96,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info('Updating Gemini TTS settings: prompt="Speak slowly and dramatically"') - await task.queue_frame( - TTSUpdateSettingsFrame( - update=GeminiTTSSettings(prompt="Speak slowly and dramatically") - ) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info('Updating Gemini TTS settings: prompt="Speak slowly and dramatically"') + await task.queue_frame( + TTSUpdateSettingsFrame(update=GeminiTTSSettings(prompt="Speak slowly and dramatically")) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zd-update-settings-aws-polly-tts.py b/examples/foundational/55zd-update-settings-aws-polly-tts.py index 951347ddd..8abf68e2c 100644 --- a/examples/foundational/55zd-update-settings-aws-polly-tts.py +++ b/examples/foundational/55zd-update-settings-aws-polly-tts.py @@ -96,14 +96,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info('Updating AWS Polly TTS settings: rate="fast", pitch="+10%"') - await task.queue_frame( - TTSUpdateSettingsFrame(update=AWSPollyTTSSettings(rate="fast", pitch="+10%")) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info('Updating AWS Polly TTS settings: rate="fast", pitch="+10%"') + await task.queue_frame( + TTSUpdateSettingsFrame(update=AWSPollyTTSSettings(rate="fast", pitch="+10%")) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55ze-update-settings-sarvam-tts.py b/examples/foundational/55ze-update-settings-sarvam-tts.py index 3674d2767..07065bfaf 100644 --- a/examples/foundational/55ze-update-settings-sarvam-tts.py +++ b/examples/foundational/55ze-update-settings-sarvam-tts.py @@ -96,12 +96,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating Sarvam TTS settings: pace=1.3") - await task.queue_frame(TTSUpdateSettingsFrame(update=SarvamWSTTSSettings(pace=1.3))) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating Sarvam TTS settings: pace=1.3") + await task.queue_frame(TTSUpdateSettingsFrame(update=SarvamWSTTSSettings(pace=1.3))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zf-update-settings-camb-tts.py b/examples/foundational/55zf-update-settings-camb-tts.py index cc629ae03..0b663ef64 100644 --- a/examples/foundational/55zf-update-settings-camb-tts.py +++ b/examples/foundational/55zf-update-settings-camb-tts.py @@ -96,16 +96,13 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info('Updating Camb TTS settings: user_instructions="Speak enthusiastically"') - await task.queue_frame( - TTSUpdateSettingsFrame( - update=CambTTSSettings(user_instructions="Speak enthusiastically") - ) + await asyncio.sleep(10) + logger.info('Updating Camb TTS settings: user_instructions="Speak enthusiastically"') + await task.queue_frame( + TTSUpdateSettingsFrame( + update=CambTTSSettings(user_instructions="Speak enthusiastically") ) - - asyncio.create_task(update_settings()) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zg-update-settings-hathora-tts.py b/examples/foundational/55zg-update-settings-hathora-tts.py index 2c59029b8..363ac7d85 100644 --- a/examples/foundational/55zg-update-settings-hathora-tts.py +++ b/examples/foundational/55zg-update-settings-hathora-tts.py @@ -99,12 +99,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating Hathora TTS settings: speed=1.3") - await task.queue_frame(TTSUpdateSettingsFrame(update=HathoraTTSSettings(speed=1.3))) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating Hathora TTS settings: speed=1.3") + await task.queue_frame(TTSUpdateSettingsFrame(update=HathoraTTSSettings(speed=1.3))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zh-update-settings-resembleai-tts.py b/examples/foundational/55zh-update-settings-resembleai-tts.py index 39ab30eee..10d750394 100644 --- a/examples/foundational/55zh-update-settings-resembleai-tts.py +++ b/examples/foundational/55zh-update-settings-resembleai-tts.py @@ -99,16 +99,13 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating ResembleAI TTS settings: voice (changed)") - await task.queue_frame( - TTSUpdateSettingsFrame( - update=ResembleAITTSSettings(voice=os.getenv("RESEMBLEAI_VOICE_ID_ALT", "")) - ) + await asyncio.sleep(10) + logger.info("Updating ResembleAI TTS settings: voice (changed)") + await task.queue_frame( + TTSUpdateSettingsFrame( + update=ResembleAITTSSettings(voice=os.getenv("RESEMBLEAI_VOICE_ID_ALT", "")) ) - - asyncio.create_task(update_settings()) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zi-update-settings-openai-llm.py b/examples/foundational/55zi-update-settings-openai-llm.py index d4befeddb..a8c253bc2 100644 --- a/examples/foundational/55zi-update-settings-openai-llm.py +++ b/examples/foundational/55zi-update-settings-openai-llm.py @@ -100,14 +100,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating OpenAI LLM settings: temperature=0.1") - await task.queue_frame( - LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1)) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating OpenAI LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zj-update-settings-anthropic-llm.py b/examples/foundational/55zj-update-settings-anthropic-llm.py index e2a2af3db..4c8341a6a 100644 --- a/examples/foundational/55zj-update-settings-anthropic-llm.py +++ b/examples/foundational/55zj-update-settings-anthropic-llm.py @@ -99,14 +99,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating Anthropic LLM settings: temperature=0.1") - await task.queue_frame( - LLMUpdateSettingsFrame(update=AnthropicLLMSettings(temperature=0.1)) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating Anthropic LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(update=AnthropicLLMSettings(temperature=0.1))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zk-update-settings-google-llm.py b/examples/foundational/55zk-update-settings-google-llm.py index 6d7ba9573..140c0fccb 100644 --- a/examples/foundational/55zk-update-settings-google-llm.py +++ b/examples/foundational/55zk-update-settings-google-llm.py @@ -99,14 +99,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating Google LLM settings: temperature=0.1") - await task.queue_frame( - LLMUpdateSettingsFrame(update=GoogleLLMSettings(temperature=0.1)) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating Google LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(update=GoogleLLMSettings(temperature=0.1))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zl-update-settings-openai-realtime.py b/examples/foundational/55zl-update-settings-openai-realtime.py index 9f6daadfd..2207b6851 100644 --- a/examples/foundational/55zl-update-settings-openai-realtime.py +++ b/examples/foundational/55zl-update-settings-openai-realtime.py @@ -87,14 +87,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info(f"Client connected") await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating OpenAI Realtime LLM settings: temperature=0.1") - await task.queue_frame( - LLMUpdateSettingsFrame(update=OpenAIRealtimeLLMSettings(temperature=0.1)) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating OpenAI Realtime LLM settings: temperature=0.1") + await task.queue_frame( + LLMUpdateSettingsFrame(update=OpenAIRealtimeLLMSettings(temperature=0.1)) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zm-update-settings-gemini-live.py b/examples/foundational/55zm-update-settings-gemini-live.py index 0a5b38529..8ad635fd5 100644 --- a/examples/foundational/55zm-update-settings-gemini-live.py +++ b/examples/foundational/55zm-update-settings-gemini-live.py @@ -87,14 +87,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info(f"Client connected") await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating Gemini Live LLM settings: temperature=0.1") - await task.queue_frame( - LLMUpdateSettingsFrame(update=GeminiLiveLLMSettings(temperature=0.1)) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating Gemini Live LLM settings: temperature=0.1") + await task.queue_frame( + LLMUpdateSettingsFrame(update=GeminiLiveLLMSettings(temperature=0.1)) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zn-update-settings-ultravox-realtime.py b/examples/foundational/55zn-update-settings-ultravox-realtime.py index 7fcb25d83..8c640ccc4 100644 --- a/examples/foundational/55zn-update-settings-ultravox-realtime.py +++ b/examples/foundational/55zn-update-settings-ultravox-realtime.py @@ -93,14 +93,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info(f"Client connected") await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating Ultravox Realtime LLM settings: temperature=0.1") - await task.queue_frame( - LLMUpdateSettingsFrame(update=UltravoxRealtimeLLMSettings(temperature=0.1)) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating Ultravox Realtime LLM settings: temperature=0.1") + await task.queue_frame( + LLMUpdateSettingsFrame(update=UltravoxRealtimeLLMSettings(temperature=0.1)) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zo-update-settings-grok-realtime.py b/examples/foundational/55zo-update-settings-grok-realtime.py index 6366ee933..567eeae2b 100644 --- a/examples/foundational/55zo-update-settings-grok-realtime.py +++ b/examples/foundational/55zo-update-settings-grok-realtime.py @@ -87,14 +87,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info(f"Client connected") await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating Grok Realtime LLM settings: temperature=0.1") - await task.queue_frame( - LLMUpdateSettingsFrame(update=GrokRealtimeLLMSettings(temperature=0.1)) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating Grok Realtime LLM settings: temperature=0.1") + await task.queue_frame( + LLMUpdateSettingsFrame(update=GrokRealtimeLLMSettings(temperature=0.1)) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zp-update-settings-aws-bedrock-llm.py b/examples/foundational/55zp-update-settings-aws-bedrock-llm.py index 0b26774ae..aaf0d973c 100644 --- a/examples/foundational/55zp-update-settings-aws-bedrock-llm.py +++ b/examples/foundational/55zp-update-settings-aws-bedrock-llm.py @@ -99,14 +99,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - async def update_settings(): - await asyncio.sleep(10) - logger.info("Updating AWS Bedrock LLM settings: temperature=0.1") - await task.queue_frame( - LLMUpdateSettingsFrame(update=AWSBedrockLLMSettings(temperature=0.1)) - ) - - asyncio.create_task(update_settings()) + await asyncio.sleep(10) + logger.info("Updating AWS Bedrock LLM settings: temperature=0.1") + await task.queue_frame( + LLMUpdateSettingsFrame(update=AWSBedrockLLMSettings(temperature=0.1)) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): From d913d954dbf65d4e9bd2d734704b8427fd9fadd2 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 11:34:52 -0500 Subject: [PATCH 024/189] Fix `SpeechmaticsSTTService` settings update code, and augment test file to better exercise it --- .../55h-update-settings-speechmatics-stt.py | 27 ++++++++++++++++++- src/pipecat/services/speechmatics/stt.py | 20 +++++++++----- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/examples/foundational/55h-update-settings-speechmatics-stt.py b/examples/foundational/55h-update-settings-speechmatics-stt.py index 46ed44016..d041d69d2 100644 --- a/examples/foundational/55h-update-settings-speechmatics-stt.py +++ b/examples/foundational/55h-update-settings-speechmatics-stt.py @@ -51,7 +51,14 @@ transport_params = { async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info(f"Starting bot") - stt = SpeechmaticsSTTService(api_key=os.getenv("SPEECHMATICS_API_KEY")) + stt = SpeechmaticsSTTService( + api_key=os.getenv("SPEECHMATICS_API_KEY"), + params=SpeechmaticsSTTService.InputParams( + enable_diarization=True, + speaker_active_format="<{speaker_id}>{text}", + speaker_passive_format="<{speaker_id}>{text}", + ), + ) tts = CartesiaTTSService( api_key=os.getenv("CARTESIA_API_KEY"), @@ -106,6 +113,24 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): STTUpdateSettingsFrame(update=SpeechmaticsSTTSettings(language=Language.ES)) ) + await asyncio.sleep(10) + logger.info("Updating Speechmatics STT settings: focus_speakers=['S1']") + await task.queue_frame( + STTUpdateSettingsFrame(update=SpeechmaticsSTTSettings(focus_speakers=["S1"])) + ) + + await asyncio.sleep(10) + logger.info( + "Updating Speechmatics STT settings: speaker_active_format={text}" + ) + await task.queue_frame( + STTUpdateSettingsFrame( + update=SpeechmaticsSTTSettings( + speaker_active_format="{text}" + ) + ) + ) + @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") diff --git a/src/pipecat/services/speechmatics/stt.py b/src/pipecat/services/speechmatics/stt.py index c6fe0d16e..166e19d97 100644 --- a/src/pipecat/services/speechmatics/stt.py +++ b/src/pipecat/services/speechmatics/stt.py @@ -508,12 +508,14 @@ class SpeechmaticsSTTService(STTService): needs_reconnect = bool(changed.keys() - no_reconnect) if needs_reconnect: + logger.debug(f"{self} settings update requires reconnect: {changed.keys()}") # Connection-level fields changed — rebuild the SDK config # from the now-updated self._settings, then reconnect. self._config = self._build_config() await self._disconnect() await self._connect() - elif changed & SpeechmaticsSTTSettings.HOT_FIELDS: + elif changed.keys() & SpeechmaticsSTTSettings.HOT_FIELDS: + logger.debug(f"{self} applying hot settings update: {changed.keys()}") if self._config.enable_diarization: # Only hot-updatable fields changed — push to the live session. self._config.speaker_config.focus_speakers = self._settings.focus_speakers @@ -522,11 +524,17 @@ class SpeechmaticsSTTService(STTService): if self._client: self._client.update_diarization_config(self._config.speaker_config) else: - # Diarization not enabled — need a full reconnect to apply. - self._config = self._build_config() - await self._disconnect() - await self._connect() - # LOCAL_FIELDS: already applied by super(); nothing else to do. + logger.debug( + f"{self} hot settings updated but diarization not enabled: {changed.keys()}. ignoring." + ) + # Diarization not enabled — the new settings will take effect + # if/when diarization is enabled, which does require a reconnect. + elif changed.keys() & SpeechmaticsSTTSettings.LOCAL_FIELDS: + logger.debug( + f"{self} local settings update, no special action required: {changed.keys()}" + ) + # Only local fields changed — no need to push to the STT engine, + # the new settings will take effect immediately. return changed From a14690e3a0150e89e479125dcc25124b056f3910 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 11:55:14 -0500 Subject: [PATCH 025/189] Fix the 55i example --- .../55i-update-settings-whisper-api-stt.py | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/examples/foundational/55i-update-settings-whisper-api-stt.py b/examples/foundational/55i-update-settings-whisper-api-stt.py index 27581a819..1d5022674 100644 --- a/examples/foundational/55i-update-settings-whisper-api-stt.py +++ b/examples/foundational/55i-update-settings-whisper-api-stt.py @@ -24,7 +24,8 @@ from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.services.whisper.base_stt import BaseWhisperSTTService, BaseWhisperSTTSettings +from pipecat.services.openai.stt import OpenAISTTService +from pipecat.services.whisper.base_stt import BaseWhisperSTTSettings from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams @@ -50,8 +51,11 @@ transport_params = { async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info(f"Starting bot") - stt = BaseWhisperSTTService( - model="whisper-1", + # This file is meant to exercise Whisper API-based STT services, so we use + # OpenAI's Whisper STT as an example here. Here we could've also used: + # - SambaNova + # - Groq + stt = OpenAISTTService( api_key=os.getenv("OPENAI_API_KEY"), ) @@ -103,14 +107,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await task.queue_frames([LLMRunFrame()]) await asyncio.sleep(10) - logger.info( - 'Updating Whisper API STT settings: prompt="Transcribe in English", temperature=0.5' - ) - await task.queue_frame( - STTUpdateSettingsFrame( - update=BaseWhisperSTTSettings(prompt="Transcribe in English", temperature=0.5) - ) - ) + logger.info('Updating OpenAI STT settings: language="es"') + await task.queue_frame(STTUpdateSettingsFrame(update=BaseWhisperSTTSettings(language="es"))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): From b8b531b66a1b9646bbde37907f7d0534701fe498 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 12:37:34 -0500 Subject: [PATCH 026/189] In Cartesia TTS service, we don't need to override `_update_settings`. Parent class handling is enough, as new settings are picked up on the next `run_tts` (no need to reconnect). --- .../55n-update-settings-cartesia-tts.py | 10 +++++++--- src/pipecat/services/cartesia/tts.py | 19 ------------------- 2 files changed, 7 insertions(+), 22 deletions(-) diff --git a/examples/foundational/55n-update-settings-cartesia-tts.py b/examples/foundational/55n-update-settings-cartesia-tts.py index 38070765c..303c23a25 100644 --- a/examples/foundational/55n-update-settings-cartesia-tts.py +++ b/examples/foundational/55n-update-settings-cartesia-tts.py @@ -22,7 +22,7 @@ from pipecat.processors.aggregators.llm_response_universal import ( ) from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport -from pipecat.services.cartesia.tts import CartesiaTTSService, CartesiaTTSSettings +from pipecat.services.cartesia.tts import CartesiaTTSService, CartesiaTTSSettings, GenerationConfig from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams @@ -103,8 +103,12 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await task.queue_frames([LLMRunFrame()]) await asyncio.sleep(10) - logger.info("Updating Cartesia TTS settings: speed=fast") - await task.queue_frame(TTSUpdateSettingsFrame(update=CartesiaTTSSettings(speed="fast"))) + logger.info("Updating Cartesia TTS settings: speed increased to 1.5") + await task.queue_frame( + TTSUpdateSettingsFrame( + update=CartesiaTTSSettings(generation_config=GenerationConfig(speed=1.5)) + ) + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/src/pipecat/services/cartesia/tts.py b/src/pipecat/services/cartesia/tts.py index edee9e2ea..4e45f50aa 100644 --- a/src/pipecat/services/cartesia/tts.py +++ b/src/pipecat/services/cartesia/tts.py @@ -355,25 +355,6 @@ class CartesiaTTSService(AudioContextWordTTSService): """ return True - async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: - """Apply a settings update. - - Settings are stored but not applied to the active connection. - """ - changed = await super()._update_settings(update) - - if not changed: - return changed - - # TODO: someday we could reconnect here to apply updated settings. - # Code might look something like the below: - # await self._disconnect() - # await self._connect() - - self._warn_unhandled_updated_settings(changed) - - return changed - def language_to_service_language(self, language: Language) -> Optional[str]: """Convert a Language enum to Cartesia language format. From caf5dacbe86ea10c5cb07ec230ba29c7356ed2a4 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 12:37:50 -0500 Subject: [PATCH 027/189] Update 55j example to avoid console warning --- examples/foundational/55j-update-settings-sarvam-stt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/foundational/55j-update-settings-sarvam-stt.py b/examples/foundational/55j-update-settings-sarvam-stt.py index b7f619987..e39c5cb5a 100644 --- a/examples/foundational/55j-update-settings-sarvam-stt.py +++ b/examples/foundational/55j-update-settings-sarvam-stt.py @@ -101,9 +101,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await task.queue_frames([LLMRunFrame()]) await asyncio.sleep(10) - logger.info("Updating Sarvam STT settings: language=hi") + logger.info("Updating Sarvam STT settings: language=en-IN") await task.queue_frame( - STTUpdateSettingsFrame(update=SarvamSTTSettings(language=Language.HI)) + STTUpdateSettingsFrame(update=SarvamSTTSettings(language=Language.EN_IN)) ) @transport.event_handler("on_client_disconnected") From 17886d14e8d2d710b5631be8ed64df27bd5c922b Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 13:47:02 -0500 Subject: [PATCH 028/189] Fix `ElevenLabsTTSService` settings update code --- .../foundational/55o-update-settings-elevenlabs-tts.py | 7 +++---- src/pipecat/services/elevenlabs/tts.py | 8 ++++---- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/examples/foundational/55o-update-settings-elevenlabs-tts.py b/examples/foundational/55o-update-settings-elevenlabs-tts.py index 0ca72ba5b..6c85e2452 100644 --- a/examples/foundational/55o-update-settings-elevenlabs-tts.py +++ b/examples/foundational/55o-update-settings-elevenlabs-tts.py @@ -25,6 +25,7 @@ from pipecat.runner.utils import create_transport from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.elevenlabs.tts import ElevenLabsTTSService, ElevenLabsTTSSettings from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams @@ -100,10 +101,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await task.queue_frames([LLMRunFrame()]) await asyncio.sleep(10) - logger.info("Updating ElevenLabs TTS settings: speed=1.2, stability=0.3") - await task.queue_frame( - TTSUpdateSettingsFrame(update=ElevenLabsTTSSettings(speed=1.2, stability=0.3)) - ) + logger.info("Updating ElevenLabs TTS settings: speed=1.2") + await task.queue_frame(TTSUpdateSettingsFrame(update=ElevenLabsTTSSettings(speed=0.7))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/src/pipecat/services/elevenlabs/tts.py b/src/pipecat/services/elevenlabs/tts.py index 022b08b94..fbde4a4b7 100644 --- a/src/pipecat/services/elevenlabs/tts.py +++ b/src/pipecat/services/elevenlabs/tts.py @@ -492,19 +492,19 @@ class ElevenLabsTTSService(AudioContextWordTTSService): # Rebuild voice settings for next context self._voice_settings = self._set_voice_settings() - url_changed = bool(changed & ElevenLabsTTSSettings.URL_FIELDS) - voice_settings_changed = bool(changed & ElevenLabsTTSSettings.VOICE_SETTINGS_FIELDS) + url_changed = bool(changed.keys() & ElevenLabsTTSSettings.URL_FIELDS) + voice_settings_changed = bool(changed.keys() & ElevenLabsTTSSettings.VOICE_SETTINGS_FIELDS) if url_changed: logger.debug( - f"URL-level setting changed ({changed & ElevenLabsTTSSettings.URL_FIELDS}), " + f"URL-level setting changed ({changed.keys() & ElevenLabsTTSSettings.URL_FIELDS}), " f"reconnecting WebSocket" ) await self._disconnect() await self._connect() elif voice_settings_changed and self._context_id: logger.debug( - f"Voice settings changed ({changed & ElevenLabsTTSSettings.VOICE_SETTINGS_FIELDS}), " + f"Voice settings changed ({changed.keys() & ElevenLabsTTSSettings.VOICE_SETTINGS_FIELDS}), " f"closing current context to apply changes" ) try: From 28677ec829d043e4804589e7dbc631ff4927b253 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 13:49:32 -0500 Subject: [PATCH 029/189] Tweak 55p example to make the settings update more pronounced --- examples/foundational/55p-update-settings-openai-tts.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/foundational/55p-update-settings-openai-tts.py b/examples/foundational/55p-update-settings-openai-tts.py index 58e8efde1..5aef081fc 100644 --- a/examples/foundational/55p-update-settings-openai-tts.py +++ b/examples/foundational/55p-update-settings-openai-tts.py @@ -98,8 +98,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await task.queue_frames([LLMRunFrame()]) await asyncio.sleep(10) - logger.info("Updating OpenAI TTS settings: speed=1.5") - await task.queue_frame(TTSUpdateSettingsFrame(update=OpenAITTSSettings(speed=1.5))) + logger.info("Updating OpenAI TTS settings: speed=2.0") + await task.queue_frame(TTSUpdateSettingsFrame(update=OpenAITTSSettings(speed=2.0))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): From 728a97ade327c58fa276036ba53cbc61caf4b89c Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 14:11:51 -0500 Subject: [PATCH 030/189] Update Deepgram TTS to support updating settings dynamically --- .../55q-update-settings-deepgram-tts.py | 6 ++++++ src/pipecat/services/deepgram/tts.py | 19 ++++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/examples/foundational/55q-update-settings-deepgram-tts.py b/examples/foundational/55q-update-settings-deepgram-tts.py index 4b7f50ae8..636342194 100644 --- a/examples/foundational/55q-update-settings-deepgram-tts.py +++ b/examples/foundational/55q-update-settings-deepgram-tts.py @@ -102,6 +102,12 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): TTSUpdateSettingsFrame(update=DeepgramTTSSettings(voice="aura-2-luna-en")) ) + await asyncio.sleep(10) + logger.info('Updating Deepgram TTS settings: voice="aura-2-aries-en"') + await task.queue_frame( + TTSUpdateSettingsFrame(update=DeepgramTTSSettings(voice="aura-2-aries-en")) + ) + @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") diff --git a/src/pipecat/services/deepgram/tts.py b/src/pipecat/services/deepgram/tts.py index bccbace26..7f4d78f13 100644 --- a/src/pipecat/services/deepgram/tts.py +++ b/src/pipecat/services/deepgram/tts.py @@ -12,7 +12,7 @@ for generating speech from text using various voice models. import json from dataclasses import dataclass, field -from typing import AsyncGenerator, Optional +from typing import Any, AsyncGenerator, Optional import aiohttp from loguru import logger @@ -183,6 +183,23 @@ class DeepgramTTSService(WebsocketTTSService): await self._disconnect_websocket() + async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: + """Apply a settings update. + + Args: + update: A :class:`TTSSettings` (or ``DeepgramTTSSettings``) delta. + + Returns: + Dict mapping changed field names to their previous values. + """ + changed = await super()._update_settings(update) + + if changed: + await self._disconnect() + await self._connect() + + return changed + async def _connect_websocket(self): """Connect to Deepgram WebSocket API with configured settings.""" try: From b4c5cb258bb9687f47925857acb0a26a1892f16d Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 14:15:14 -0500 Subject: [PATCH 031/189] Tweak 55r example to make the settings update more pronounced --- examples/foundational/55r-update-settings-azure-tts.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/foundational/55r-update-settings-azure-tts.py b/examples/foundational/55r-update-settings-azure-tts.py index 076901707..d156eab43 100644 --- a/examples/foundational/55r-update-settings-azure-tts.py +++ b/examples/foundational/55r-update-settings-azure-tts.py @@ -100,9 +100,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await task.queue_frames([LLMRunFrame()]) await asyncio.sleep(10) - logger.info('Updating Azure TTS settings: rate="1.3", style="cheerful"') + logger.info('Updating Azure TTS settings: rate="0.7", style="sad"') await task.queue_frame( - TTSUpdateSettingsFrame(update=AzureTTSSettings(rate="1.3", style="cheerful")) + TTSUpdateSettingsFrame(update=AzureTTSSettings(rate="0.7", style="sad")) ) @transport.event_handler("on_client_disconnected") From 416e1cf877f7e83dccbfa460bf2d00c27ca761d5 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 14:33:12 -0500 Subject: [PATCH 032/189] Update Rime TTS services to store voice in the standard `settings.voice` field, as opposed to the nonstandard `speaker` field --- .../55u-update-settings-rime-tts.py | 4 ++-- src/pipecat/services/rime/tts.py | 24 ++++++++----------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/examples/foundational/55u-update-settings-rime-tts.py b/examples/foundational/55u-update-settings-rime-tts.py index e95aeb830..25d0515fe 100644 --- a/examples/foundational/55u-update-settings-rime-tts.py +++ b/examples/foundational/55u-update-settings-rime-tts.py @@ -100,8 +100,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await task.queue_frames([LLMRunFrame()]) await asyncio.sleep(10) - logger.info("Updating Rime TTS settings: speedAlpha=1.5") - await task.queue_frame(TTSUpdateSettingsFrame(update=RimeTTSSettings(speedAlpha=1.5))) + logger.info("Updating Rime TTS settings: voice=rex") + await task.queue_frame(TTSUpdateSettingsFrame(update=RimeTTSSettings(voice="rex"))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index 9e916025c..d76eafbfa 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -13,7 +13,7 @@ using Rime's API for streaming and batch audio synthesis. import base64 import json from dataclasses import dataclass, field -from typing import Any, AsyncGenerator, Optional +from typing import Any, AsyncGenerator, ClassVar, Dict, Optional import aiohttp from loguru import logger @@ -75,7 +75,6 @@ class RimeTTSSettings(TTSSettings): """Settings for Rime WS JSON and HTTP TTS services. Parameters: - speaker: Voice speaker ID. modelId: Rime model identifier. audioFormat: Audio output format. samplingRate: Audio sample rate. @@ -87,7 +86,6 @@ class RimeTTSSettings(TTSSettings): inlineSpeedAlpha: Inline speed control markup. """ - speaker: str = field(default_factory=lambda: NOT_GIVEN) modelId: str = field(default_factory=lambda: NOT_GIVEN) audioFormat: str = field(default_factory=lambda: NOT_GIVEN) samplingRate: int = field(default_factory=lambda: NOT_GIVEN) @@ -98,13 +96,14 @@ class RimeTTSSettings(TTSSettings): phonemizeBetweenBrackets: bool = field(default_factory=lambda: NOT_GIVEN) inlineSpeedAlpha: str = field(default_factory=lambda: NOT_GIVEN) + _aliases: ClassVar[Dict[str, str]] = {"speaker": "voice"} + @dataclass class RimeNonJsonTTSSettings(TTSSettings): """Settings for Rime non-JSON WS TTS service. Parameters: - speaker: Voice speaker ID. modelId: Rime model identifier. audioFormat: Audio output format. samplingRate: Audio sample rate. @@ -115,7 +114,6 @@ class RimeNonJsonTTSSettings(TTSSettings): top_p: Cumulative probability threshold (0.0-1.0). """ - speaker: str = field(default_factory=lambda: NOT_GIVEN) modelId: str = field(default_factory=lambda: NOT_GIVEN) audioFormat: str = field(default_factory=lambda: NOT_GIVEN) samplingRate: int = field(default_factory=lambda: NOT_GIVEN) @@ -125,6 +123,8 @@ class RimeNonJsonTTSSettings(TTSSettings): temperature: float = field(default_factory=lambda: NOT_GIVEN) top_p: float = field(default_factory=lambda: NOT_GIVEN) + _aliases: ClassVar[Dict[str, str]] = {"speaker": "voice"} + class RimeTTSService(AudioContextWordTTSService): """Text-to-Speech service using Rime's websocket API. @@ -210,7 +210,7 @@ class RimeTTSService(AudioContextWordTTSService): self._voice_id = voice_id self._model = model self._settings = RimeTTSSettings( - speaker=voice_id, + voice=voice_id, modelId=model, audioFormat="pcm", samplingRate=0, @@ -273,10 +273,8 @@ class RimeTTSService(AudioContextWordTTSService): async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: """Apply a settings update and reconnect if voice changed.""" - prev_voice = self._voice_id changed = await super()._update_settings(update) if "voice" in changed: - self._settings.speaker = self._voice_id await self._disconnect() await self._connect() else: @@ -355,7 +353,7 @@ class RimeTTSService(AudioContextWordTTSService): params = "&".join( f"{k}={v}" for k, v in { - "speaker": self._settings.speaker, + "speaker": self._settings.voice, "modelId": self._settings.modelId, "audioFormat": self._settings.audioFormat, "samplingRate": self._settings.samplingRate, @@ -772,7 +770,7 @@ class RimeNonJsonTTSService(InterruptibleTTSService): self._voice_id = voice_id self._model = model self._settings = RimeNonJsonTTSSettings( - speaker=voice_id, + voice=voice_id, modelId=model, audioFormat=audio_format, samplingRate=sample_rate, @@ -866,7 +864,7 @@ class RimeNonJsonTTSService(InterruptibleTTSService): return # Build URL with query parameters (only given, non-None values) settings_dict = { - "speaker": self._settings.speaker, + "speaker": self._settings.voice, "modelId": self._settings.modelId, "audioFormat": self._settings.audioFormat, "samplingRate": self._settings.samplingRate, @@ -985,9 +983,7 @@ class RimeNonJsonTTSService(InterruptibleTTSService): """ changed = await super()._update_settings(update) - # Sync voice and model to settings dict fields - if "voice" in changed: - self._settings.speaker = self._voice_id + # Sync model to settings dict field if "model" in changed: self._settings.modelId = self._model_name From 0c73b7732717a850030fae2e6a947867328ba463 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 14:47:38 -0500 Subject: [PATCH 033/189] Update Lmnt TTS to support updating settings dynamically --- .../55v-update-settings-lmnt-tts.py | 4 ++-- src/pipecat/services/lmnt/tts.py | 19 ++++++++++++++++++- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/examples/foundational/55v-update-settings-lmnt-tts.py b/examples/foundational/55v-update-settings-lmnt-tts.py index c8e6a3e09..d98462e20 100644 --- a/examples/foundational/55v-update-settings-lmnt-tts.py +++ b/examples/foundational/55v-update-settings-lmnt-tts.py @@ -100,8 +100,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await task.queue_frames([LLMRunFrame()]) await asyncio.sleep(10) - logger.info('Updating LMNT TTS settings: voice="lily"') - await task.queue_frame(TTSUpdateSettingsFrame(update=LmntTTSSettings(voice="lily"))) + logger.info('Updating LMNT TTS settings: voice="tyler"') + await task.queue_frame(TTSUpdateSettingsFrame(update=LmntTTSSettings(voice="tyler"))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/src/pipecat/services/lmnt/tts.py b/src/pipecat/services/lmnt/tts.py index 1b23c8ae2..5b2adcaf4 100644 --- a/src/pipecat/services/lmnt/tts.py +++ b/src/pipecat/services/lmnt/tts.py @@ -8,7 +8,7 @@ import json from dataclasses import dataclass, field -from typing import AsyncGenerator, Optional +from typing import Any, AsyncGenerator, Optional from loguru import logger @@ -207,6 +207,23 @@ class LmntTTSService(InterruptibleTTSService): await self._disconnect_websocket() + async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: + """Apply a settings update. + + Args: + update: A :class:`TTSSettings` (or ``LmntTTSSettings``) delta. + + Returns: + Dict mapping changed field names to their previous values. + """ + changed = await super()._update_settings(update) + + if changed: + await self._disconnect() + await self._connect() + + return changed + async def _connect_websocket(self): """Connect to LMNT websocket.""" try: From 323ee00b831c3cf7e045dbebc628362f6c296da5 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 14:51:48 -0500 Subject: [PATCH 034/189] Fix 55w example --- examples/foundational/55w-update-settings-fish-tts.py | 5 ++++- src/pipecat/services/fish/tts.py | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/examples/foundational/55w-update-settings-fish-tts.py b/examples/foundational/55w-update-settings-fish-tts.py index be9049333..82722ec34 100644 --- a/examples/foundational/55w-update-settings-fish-tts.py +++ b/examples/foundational/55w-update-settings-fish-tts.py @@ -52,7 +52,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) - tts = FishAudioTTSService(api_key=os.getenv("FISH_API_KEY")) + tts = FishAudioTTSService( + api_key=os.getenv("FISH_API_KEY"), + model="4ce7e917cedd4bc2bb2e6ff3a46acaa1", # Barack Obama + ) llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) diff --git a/src/pipecat/services/fish/tts.py b/src/pipecat/services/fish/tts.py index 7dd06d705..09ed72099 100644 --- a/src/pipecat/services/fish/tts.py +++ b/src/pipecat/services/fish/tts.py @@ -208,9 +208,11 @@ class FishAudioTTSService(InterruptibleTTSService): Dict mapping changed field names to their previous values. """ changed = await super()._update_settings(update) + if changed: await self._disconnect() await self._connect() + return changed async def start(self, frame: StartFrame): From 0fa51811ea57c9368f62c8c34ae54c3cdcb88e52 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 15:10:36 -0500 Subject: [PATCH 035/189] Fix 55z example --- examples/foundational/55z-update-settings-hume-tts.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/foundational/55z-update-settings-hume-tts.py b/examples/foundational/55z-update-settings-hume-tts.py index abab6abec..427b99bab 100644 --- a/examples/foundational/55z-update-settings-hume-tts.py +++ b/examples/foundational/55z-update-settings-hume-tts.py @@ -54,7 +54,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): tts = HumeTTSService( api_key=os.getenv("HUME_API_KEY"), - voice_id="ee7ea9f8-c99a-4516-a65d-80235fa3acdc", + voice_id="f898a92e-685f-43fa-985b-a46920f0650b", ) llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) @@ -100,10 +100,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await task.queue_frames([LLMRunFrame()]) await asyncio.sleep(10) - logger.info('Updating Hume TTS settings: speed=1.5, description="Speak with excitement"') + logger.info('Updating Hume TTS settings: speed=2.0, description="Speak with excitement"') await task.queue_frame( TTSUpdateSettingsFrame( - update=HumeTTSSettings(speed=1.5, description="Speak with excitement") + update=HumeTTSSettings(speed=2.0, description="Speak with excitement") ) ) From b00d45484240eb4d21989d92cdbe5ced59b740f0 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 15:19:57 -0500 Subject: [PATCH 036/189] Fix Inworld TTS settings updating --- .../55zb-update-settings-inworld-tts.py | 4 +-- src/pipecat/services/inworld/tts.py | 34 ++++++++----------- 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/examples/foundational/55zb-update-settings-inworld-tts.py b/examples/foundational/55zb-update-settings-inworld-tts.py index d9947c196..f8a66bdd8 100644 --- a/examples/foundational/55zb-update-settings-inworld-tts.py +++ b/examples/foundational/55zb-update-settings-inworld-tts.py @@ -97,9 +97,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await task.queue_frames([LLMRunFrame()]) await asyncio.sleep(10) - logger.info("Updating Inworld TTS settings: speaking_rate=1.3, temperature=0.8") + logger.info("Updating Inworld TTS settings: speaking_rate=1.5, temperature=0.8") await task.queue_frame( - TTSUpdateSettingsFrame(update=InworldTTSSettings(speaking_rate=1.3, temperature=0.8)) + TTSUpdateSettingsFrame(update=InworldTTSSettings(speaking_rate=1.5, temperature=0.8)) ) @transport.event_handler("on_client_disconnected") diff --git a/src/pipecat/services/inworld/tts.py b/src/pipecat/services/inworld/tts.py index acc6187cb..fea30f3a1 100644 --- a/src/pipecat/services/inworld/tts.py +++ b/src/pipecat/services/inworld/tts.py @@ -184,25 +184,6 @@ class InworldHttpTTSService(WordTTSService): """ return True - async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: - """Apply a settings update. - - Settings are stored but not applied to the active connection. - """ - changed = await super()._update_settings(update) - - if not changed: - return changed - - # TODO: someday we could reconnect here to apply updated settings. - # Code might look something like the below: - # await self._disconnect() - # await self._connect() - - self._warn_unhandled_updated_settings(changed) - - return changed - async def start(self, frame: StartFrame): """Start the Inworld TTS service. @@ -756,6 +737,21 @@ class InworldTTSService(AudioContextWordTTSService): await self._disconnect_websocket() + async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: + """Apply a settings update. + + Settings are stored but not applied to the active connection. + """ + changed = await super()._update_settings(update) + + if not changed: + return changed + + await self._disconnect() + await self._connect() + + return changed + async def _connect_websocket(self): """Connect to the Inworld WebSocket TTS service. From e38f7d945119afe983c608e0f82a945ef49dd01b Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 15:23:23 -0500 Subject: [PATCH 037/189] Fix 55zc example --- .../foundational/55zc-update-settings-gemini-tts.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/examples/foundational/55zc-update-settings-gemini-tts.py b/examples/foundational/55zc-update-settings-gemini-tts.py index 0bf878871..6af28e69f 100644 --- a/examples/foundational/55zc-update-settings-gemini-tts.py +++ b/examples/foundational/55zc-update-settings-gemini-tts.py @@ -25,6 +25,7 @@ from pipecat.runner.utils import create_transport from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.google.tts import GeminiTTSService, GeminiTTSSettings from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams @@ -52,7 +53,15 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) - tts = GeminiTTSService(api_key=os.getenv("GOOGLE_API_KEY")) + tts = GeminiTTSService( + credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"), + model="gemini-2.5-flash-tts", + voice_id="Charon", + params=GeminiTTSService.InputParams( + language=Language.EN_US, + prompt="You are a helpful AI assistant. Speak in a natural, conversational tone.", + ), + ) llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) From b718a23c170266c822a27a7e55073a95906b0f03 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 15:25:50 -0500 Subject: [PATCH 038/189] Tweak 55zd example --- examples/foundational/55zd-update-settings-aws-polly-tts.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/examples/foundational/55zd-update-settings-aws-polly-tts.py b/examples/foundational/55zd-update-settings-aws-polly-tts.py index 8abf68e2c..3d9f72cf4 100644 --- a/examples/foundational/55zd-update-settings-aws-polly-tts.py +++ b/examples/foundational/55zd-update-settings-aws-polly-tts.py @@ -97,10 +97,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await task.queue_frames([LLMRunFrame()]) await asyncio.sleep(10) - logger.info('Updating AWS Polly TTS settings: rate="fast", pitch="+10%"') - await task.queue_frame( - TTSUpdateSettingsFrame(update=AWSPollyTTSSettings(rate="fast", pitch="+10%")) - ) + logger.info('Updating AWS Polly TTS settings: rate="fast"') + await task.queue_frame(TTSUpdateSettingsFrame(update=AWSPollyTTSSettings(rate="fast"))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): From d386a0efda8569ab26265d0e726f3cadfe1d10b3 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 15:31:08 -0500 Subject: [PATCH 039/189] Update Sarvam TTS to apply all changes to settings, not just voic --- .../foundational/55ze-update-settings-sarvam-tts.py | 6 +++--- src/pipecat/services/sarvam/tts.py | 11 ++++++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/examples/foundational/55ze-update-settings-sarvam-tts.py b/examples/foundational/55ze-update-settings-sarvam-tts.py index 07065bfaf..98408c4b8 100644 --- a/examples/foundational/55ze-update-settings-sarvam-tts.py +++ b/examples/foundational/55ze-update-settings-sarvam-tts.py @@ -24,7 +24,7 @@ from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.services.sarvam.tts import SarvamTTSService, SarvamWSTTSSettings +from pipecat.services.sarvam.tts import SarvamTTSService, SarvamTTSSettings from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams @@ -97,8 +97,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await task.queue_frames([LLMRunFrame()]) await asyncio.sleep(10) - logger.info("Updating Sarvam TTS settings: pace=1.3") - await task.queue_frame(TTSUpdateSettingsFrame(update=SarvamWSTTSSettings(pace=1.3))) + logger.info("Updating Sarvam TTS settings: pace=1.5") + await task.queue_frame(TTSUpdateSettingsFrame(update=SarvamTTSSettings(pace=1.5))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/src/pipecat/services/sarvam/tts.py b/src/pipecat/services/sarvam/tts.py index 99a0827f5..aff96f1dd 100644 --- a/src/pipecat/services/sarvam/tts.py +++ b/src/pipecat/services/sarvam/tts.py @@ -276,7 +276,7 @@ class SarvamHttpTTSSettings(TTSSettings): @dataclass -class SarvamWSTTSSettings(TTSSettings): +class SarvamTTSSettings(TTSSettings): """Settings for Sarvam WebSocket TTS service. Parameters: @@ -686,7 +686,7 @@ class SarvamTTSService(InterruptibleTTSService): See https://docs.sarvam.ai/api-reference-docs/text-to-speech/stream for API details. """ - _settings: SarvamWSTTSSettings + _settings: SarvamTTSSettings class InputParams(BaseModel): """Configuration parameters for Sarvam TTS WebSocket service. @@ -841,7 +841,7 @@ class SarvamTTSService(InterruptibleTTSService): pace = max(pace_min, min(pace_max, pace)) # Build base settings - self._settings = SarvamWSTTSSettings( + self._settings = SarvamTTSSettings( target_language_code=( self.language_to_service_language(params.language) if params.language else "en-IN" ), @@ -956,9 +956,10 @@ class SarvamTTSService(InterruptibleTTSService): async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: """Apply a settings update and resend config if voice changed.""" changed = await super()._update_settings(update) - if "voice" in changed: + + if changed: await self._send_config() - self._warn_unhandled_updated_settings(changed.keys() - {"voice"}) + return changed async def _connect(self): From 88a2dbdb822167fb63ec4f9fb4e3d9a2f53bb1c9 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 15:48:50 -0500 Subject: [PATCH 040/189] Update 55zf example to update a setting that is supported by the default Camb TTS model --- examples/foundational/55zf-update-settings-camb-tts.py | 9 +++------ src/pipecat/services/camb/tts.py | 2 +- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/examples/foundational/55zf-update-settings-camb-tts.py b/examples/foundational/55zf-update-settings-camb-tts.py index 0b663ef64..1fe758849 100644 --- a/examples/foundational/55zf-update-settings-camb-tts.py +++ b/examples/foundational/55zf-update-settings-camb-tts.py @@ -25,6 +25,7 @@ from pipecat.runner.utils import create_transport from pipecat.services.camb.tts import CambTTSService, CambTTSSettings from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams @@ -97,12 +98,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await task.queue_frames([LLMRunFrame()]) await asyncio.sleep(10) - logger.info('Updating Camb TTS settings: user_instructions="Speak enthusiastically"') - await task.queue_frame( - TTSUpdateSettingsFrame( - update=CambTTSSettings(user_instructions="Speak enthusiastically") - ) - ) + logger.info("Updating Camb TTS settings: language -> Spanish") + await task.queue_frame(TTSUpdateSettingsFrame(update=CambTTSSettings(language=Language.ES))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/src/pipecat/services/camb/tts.py b/src/pipecat/services/camb/tts.py index 4176b4413..95b0ddd52 100644 --- a/src/pipecat/services/camb/tts.py +++ b/src/pipecat/services/camb/tts.py @@ -17,7 +17,7 @@ Features: """ from dataclasses import dataclass, field -from typing import AsyncGenerator, Dict, Optional +from typing import Any, AsyncGenerator, Dict, Optional from camb import StreamTtsOutputConfiguration from camb.client import AsyncCambAI From c054780477c5b75aba21dd39b31f3e9f20849fd6 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 15:59:34 -0500 Subject: [PATCH 041/189] Fix 55zh example --- .../foundational/55zh-update-settings-resembleai-tts.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/foundational/55zh-update-settings-resembleai-tts.py b/examples/foundational/55zh-update-settings-resembleai-tts.py index 10d750394..39b745500 100644 --- a/examples/foundational/55zh-update-settings-resembleai-tts.py +++ b/examples/foundational/55zh-update-settings-resembleai-tts.py @@ -53,8 +53,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) tts = ResembleAITTSService( - api_key=os.getenv("RESEMBLEAI_API_KEY"), - voice_id=os.getenv("RESEMBLEAI_VOICE_ID", ""), + api_key=os.getenv("RESEMBLE_API_KEY"), + voice_id=os.getenv("RESEMBLE_VOICE_UUID"), ) llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) @@ -103,7 +103,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info("Updating ResembleAI TTS settings: voice (changed)") await task.queue_frame( TTSUpdateSettingsFrame( - update=ResembleAITTSSettings(voice=os.getenv("RESEMBLEAI_VOICE_ID_ALT", "")) + update=ResembleAITTSSettings(voice=os.getenv("RESEMBLE_VOICE_UUID_ALT")) ) ) From 97d34ef9e195f6474238a0741179e8f48af7c104 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 16:16:06 -0500 Subject: [PATCH 042/189] Update OpenAI Realtime to warn when you try to update settings that can't be updated dynamically. Update corresponding example to demonstrate updating output modality. --- .../55zl-update-settings-openai-realtime.py | 30 +++++++++++++++++-- src/pipecat/services/openai/realtime/llm.py | 7 +++-- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/examples/foundational/55zl-update-settings-openai-realtime.py b/examples/foundational/55zl-update-settings-openai-realtime.py index 2207b6851..90663a95c 100644 --- a/examples/foundational/55zl-update-settings-openai-realtime.py +++ b/examples/foundational/55zl-update-settings-openai-realtime.py @@ -16,13 +16,17 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair +from pipecat.processors.aggregators.llm_response_universal import ( + AssistantTurnStoppedMessage, + LLMContextAggregatorPair, +) from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.openai.realtime.llm import ( OpenAIRealtimeLLMService, OpenAIRealtimeLLMSettings, ) +from pipecat.services.openai_realtime import events from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams @@ -82,15 +86,35 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) + @assistant_aggregator.event_handler("on_assistant_turn_stopped") + async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage): + timestamp = f"[{message.timestamp}] " if message.timestamp else "" + line = f"{timestamp}assistant: {message.content}" + logger.info(f"Transcript: {line}") + @transport.event_handler("on_client_connected") async def on_client_connected(transport, client): logger.info(f"Client connected") await task.queue_frames([LLMRunFrame()]) await asyncio.sleep(10) - logger.info("Updating OpenAI Realtime LLM settings: temperature=0.1") + logger.info("Updating OpenAI Realtime LLM settings: output_modalities=['text']") await task.queue_frame( - LLMUpdateSettingsFrame(update=OpenAIRealtimeLLMSettings(temperature=0.1)) + LLMUpdateSettingsFrame( + update=OpenAIRealtimeLLMSettings( + session_properties=events.SessionProperties(output_modalities=["text"]) + ) + ) + ) + + await asyncio.sleep(10) + logger.info("Updating OpenAI Realtime LLM settings: output_modalities=['audio']") + await task.queue_frame( + LLMUpdateSettingsFrame( + update=OpenAIRealtimeLLMSettings( + session_properties=events.SessionProperties(output_modalities=["audio"]) + ) + ) ) @transport.event_handler("on_client_disconnected") diff --git a/src/pipecat/services/openai/realtime/llm.py b/src/pipecat/services/openai/realtime/llm.py index f6e8b1646..3560f0c27 100644 --- a/src/pipecat/services/openai/realtime/llm.py +++ b/src/pipecat/services/openai/realtime/llm.py @@ -59,7 +59,7 @@ from pipecat.processors.aggregators.openai_llm_context import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import FunctionCallFromLLM, LLMService -from pipecat.services.settings import NOT_GIVEN, LLMSettings +from pipecat.services.settings import NOT_GIVEN, LLMSettings, _NotGiven from pipecat.transcriptions.language import Language from pipecat.utils.time import time_now_iso8601 from pipecat.utils.tracing.service_decorators import traced_openai_realtime, traced_stt @@ -99,7 +99,9 @@ class OpenAIRealtimeLLMSettings(LLMSettings): session_properties: OpenAI Realtime session configuration. """ - session_properties: Any = field(default_factory=lambda: NOT_GIVEN) + session_properties: events.SessionProperties | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) class OpenAIRealtimeLLMService(LLMService): @@ -539,6 +541,7 @@ class OpenAIRealtimeLLMService(LLMService): changed = await super()._update_settings(update) if "session_properties" in changed: await self._send_session_update() + self._warn_unhandled_updated_settings(changed.keys() - {"session_properties"}) return changed async def _send_session_update(self): From ad942f6e4c81ed07996c822c03b97cb87857f9d6 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 16:33:05 -0500 Subject: [PATCH 043/189] Update 55zn example (UIltravox dynamic settings updates) to exercise changing modality, which is a setting that supports dynamic updates --- .../55zn-update-settings-ultravox-realtime.py | 32 ++++++++++++++++--- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/examples/foundational/55zn-update-settings-ultravox-realtime.py b/examples/foundational/55zn-update-settings-ultravox-realtime.py index 8c640ccc4..967d40741 100644 --- a/examples/foundational/55zn-update-settings-ultravox-realtime.py +++ b/examples/foundational/55zn-update-settings-ultravox-realtime.py @@ -5,18 +5,23 @@ # import asyncio +import datetime import os from dotenv import load_dotenv from loguru import logger +from pipecat.adapters.schemas.tools_schema import ToolsSchema from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair +from pipecat.processors.aggregators.llm_response_universal import ( + AssistantTurnStoppedMessage, + LLMContextAggregatorPair, +) from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.ultravox.llm import ( @@ -52,17 +57,22 @@ transport_params = { async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info(f"Starting bot") + system_prompt = "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way." + llm = UltravoxRealtimeLLMService( params=OneShotInputParams( api_key=os.getenv("ULTRAVOX_API_KEY"), - system_prompt="You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + system_prompt=system_prompt, + temperature=0.3, + max_duration=datetime.timedelta(minutes=3), ), + one_shot_selected_tools=ToolsSchema(standard_tools=[]), ) messages = [ { "role": "system", - "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + "content": system_prompt, }, ] @@ -88,15 +98,27 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) + @assistant_aggregator.event_handler("on_assistant_turn_stopped") + async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage): + timestamp = f"[{message.timestamp}] " if message.timestamp else "" + line = f"{timestamp}assistant: {message.content}" + logger.info(f"Transcript: {line}") + @transport.event_handler("on_client_connected") async def on_client_connected(transport, client): logger.info(f"Client connected") await task.queue_frames([LLMRunFrame()]) await asyncio.sleep(10) - logger.info("Updating Ultravox Realtime LLM settings: temperature=0.1") + logger.info("Updating Ultravox Realtime LLM settings: output_medium=text") await task.queue_frame( - LLMUpdateSettingsFrame(update=UltravoxRealtimeLLMSettings(temperature=0.1)) + LLMUpdateSettingsFrame(update=UltravoxRealtimeLLMSettings(output_medium="text")) + ) + + await asyncio.sleep(10) + logger.info("Updating Ultravox Realtime LLM settings: output_medium=voice") + await task.queue_frame( + LLMUpdateSettingsFrame(update=UltravoxRealtimeLLMSettings(output_medium="voice")) ) @transport.event_handler("on_client_disconnected") From 2a07138abff910674c56ff18b5fd7afe1472be07 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 17:12:36 -0500 Subject: [PATCH 044/189] Fix Grok Realtime dynamic session properties updating, and update corresponding 55zo example --- .../55zl-update-settings-openai-realtime.py | 2 +- .../55zo-update-settings-grok-realtime.py | 22 +++++-- src/pipecat/services/grok/realtime/llm.py | 59 +++++++++++++------ 3 files changed, 59 insertions(+), 24 deletions(-) diff --git a/examples/foundational/55zl-update-settings-openai-realtime.py b/examples/foundational/55zl-update-settings-openai-realtime.py index 90663a95c..9c18d528e 100644 --- a/examples/foundational/55zl-update-settings-openai-realtime.py +++ b/examples/foundational/55zl-update-settings-openai-realtime.py @@ -22,11 +22,11 @@ from pipecat.processors.aggregators.llm_response_universal import ( ) from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport +from pipecat.services.openai.realtime import events from pipecat.services.openai.realtime.llm import ( OpenAIRealtimeLLMService, OpenAIRealtimeLLMSettings, ) -from pipecat.services.openai_realtime import events from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams diff --git a/examples/foundational/55zo-update-settings-grok-realtime.py b/examples/foundational/55zo-update-settings-grok-realtime.py index 567eeae2b..7d7370f7b 100644 --- a/examples/foundational/55zo-update-settings-grok-realtime.py +++ b/examples/foundational/55zo-update-settings-grok-realtime.py @@ -16,9 +16,13 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair +from pipecat.processors.aggregators.llm_response_universal import ( + AssistantTurnStoppedMessage, + LLMContextAggregatorPair, +) from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport +from pipecat.services.grok.realtime import events from pipecat.services.grok.realtime.llm import ( GrokRealtimeLLMService, GrokRealtimeLLMSettings, @@ -51,7 +55,7 @@ transport_params = { async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info(f"Starting bot") - llm = GrokRealtimeLLMService(api_key=os.getenv("XAI_API_KEY")) + llm = GrokRealtimeLLMService(api_key=os.getenv("GROK_API_KEY")) messages = [ { @@ -82,15 +86,25 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) + @assistant_aggregator.event_handler("on_assistant_turn_stopped") + async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage): + timestamp = f"[{message.timestamp}] " if message.timestamp else "" + line = f"{timestamp}assistant: {message.content}" + logger.info(f"Transcript: {line}") + @transport.event_handler("on_client_connected") async def on_client_connected(transport, client): logger.info(f"Client connected") await task.queue_frames([LLMRunFrame()]) await asyncio.sleep(10) - logger.info("Updating Grok Realtime LLM settings: temperature=0.1") + logger.info("Updating Grok Realtime LLM settings: voice='Rex'") await task.queue_frame( - LLMUpdateSettingsFrame(update=GrokRealtimeLLMSettings(temperature=0.1)) + LLMUpdateSettingsFrame( + update=GrokRealtimeLLMSettings( + session_properties=events.SessionProperties(voice="Rex") + ) + ) ) @transport.event_handler("on_client_disconnected") diff --git a/src/pipecat/services/grok/realtime/llm.py b/src/pipecat/services/grok/realtime/llm.py index f31769774..14c93c94a 100644 --- a/src/pipecat/services/grok/realtime/llm.py +++ b/src/pipecat/services/grok/realtime/llm.py @@ -56,7 +56,7 @@ from pipecat.processors.aggregators.llm_response_universal import ( from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import FunctionCallFromLLM, LLMService -from pipecat.services.settings import NOT_GIVEN, LLMSettings +from pipecat.services.settings import NOT_GIVEN, LLMSettings, _NotGiven from pipecat.utils.time import time_now_iso8601 from . import events @@ -94,7 +94,9 @@ class GrokRealtimeLLMSettings(LLMSettings): session_properties: Grok Realtime session configuration. """ - session_properties: Any = field(default_factory=lambda: NOT_GIVEN) + session_properties: events.SessionProperties | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) class GrokRealtimeLLMService(LLMService): @@ -294,6 +296,27 @@ class GrokRealtimeLLMService(LLMService): # Standard AIService frame handling # + def _ensure_audio_config(self, input_sample_rate: int, output_sample_rate: int): + """Ensure session_properties.audio has input and output configs. + + Fills in any missing audio configuration using the given sample rates. + + Args: + input_sample_rate: Sample rate for audio input (Hz). + output_sample_rate: Sample rate for audio output (Hz). + """ + props = self._settings.session_properties + if not props.audio: + props.audio = events.AudioConfiguration() + if not props.audio.input: + props.audio.input = events.AudioInput( + format=events.PCMAudioFormat(rate=input_sample_rate) + ) + if not props.audio.output: + props.audio.output = events.AudioOutput( + format=events.PCMAudioFormat(rate=output_sample_rate) + ) + async def start(self, frame: StartFrame): """Start the service and establish WebSocket connection. @@ -301,23 +324,7 @@ class GrokRealtimeLLMService(LLMService): frame: The start frame triggering service initialization. """ await super().start(frame) - - # Ensure audio configuration exists with both input and output - if not self._settings.session_properties.audio: - self._settings.session_properties.audio = events.AudioConfiguration() - - # Fill in missing input configuration - if not self._settings.session_properties.audio.input: - self._settings.session_properties.audio.input = events.AudioInput( - format=events.PCMAudioFormat(rate=frame.audio_in_sample_rate) - ) - - # Fill in missing output configuration - if not self._settings.session_properties.audio.output: - self._settings.session_properties.audio.output = events.AudioOutput( - format=events.PCMAudioFormat(rate=frame.audio_out_sample_rate) - ) - + self._ensure_audio_config(frame.audio_in_sample_rate, frame.audio_out_sample_rate) await self._connect() async def stop(self, frame: EndFrame): @@ -458,9 +465,23 @@ class GrokRealtimeLLMService(LLMService): async def _update_settings(self, update): """Apply a settings update, sending a session update if needed.""" + # Capture current sample rates before the update replaces them. + input_rate = self._get_configured_sample_rate("input") + output_rate = self._get_configured_sample_rate("output") + changed = await super()._update_settings(update) + if "session_properties" in changed: + if input_rate and output_rate: + self._ensure_audio_config(input_rate, output_rate) + else: + logger.warning( + "Attempting to apply session properties update without configured sample rates. " + "Audio configuration may be incomplete." + ) await self._send_session_update() + + self._warn_unhandled_updated_settings(changed.keys() - {"session_properties"}) return changed async def _send_session_update(self): From a7edd8e441b630aeb3dafda11e70d4a14913b7bb Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Feb 2026 17:15:22 -0500 Subject: [PATCH 045/189] Fix 55zp example --- .../foundational/55zp-update-settings-aws-bedrock-llm.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/examples/foundational/55zp-update-settings-aws-bedrock-llm.py b/examples/foundational/55zp-update-settings-aws-bedrock-llm.py index aaf0d973c..1c2781e72 100644 --- a/examples/foundational/55zp-update-settings-aws-bedrock-llm.py +++ b/examples/foundational/55zp-update-settings-aws-bedrock-llm.py @@ -57,7 +57,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady ) - llm = AWSBedrockLLMService(model="anthropic.claude-sonnet-4-20250514-v1:0") + llm = AWSBedrockLLMService( + aws_region="us-west-2", + model="us.anthropic.claude-haiku-4-5-20251001-v1:0", + params=AWSBedrockLLMService.InputParams(temperature=0.8), + ) messages = [ { From 421696e1c25d5b387e47b476f87d28889add7e06 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Thu, 19 Feb 2026 11:28:29 -0500 Subject: [PATCH 046/189] Replace `Any` with specific types and add `| _NotGiven` to all `*Settings` field annotations across 49 service files Every `*Settings` dataclass field whose default is `NOT_GIVEN` now carries `_NotGiven` in its type union so the type system accurately reflects the three-state semantics (real value, `None` where applicable, or not-yet-specified). Fields previously typed as bare `Any`, `str`, `float`, `bool`, `list`, `dict`, or `Optional[X]` are now narrowed to the specific type from the corresponding `InputParams` Pydantic model. --- src/pipecat/services/anthropic/llm.py | 8 ++-- src/pipecat/services/assemblyai/stt.py | 6 ++- src/pipecat/services/asyncai/tts.py | 8 ++-- src/pipecat/services/aws/llm.py | 8 ++-- src/pipecat/services/aws/stt.py | 12 ++--- src/pipecat/services/aws/tts.py | 12 ++--- src/pipecat/services/azure/stt.py | 6 +-- src/pipecat/services/azure/tts.py | 18 +++---- src/pipecat/services/camb/tts.py | 4 +- src/pipecat/services/cartesia/stt.py | 4 +- src/pipecat/services/cartesia/tts.py | 16 +++---- src/pipecat/services/deepgram/stt.py | 4 +- .../services/deepgram/stt_sagemaker.py | 4 +- src/pipecat/services/deepgram/tts.py | 4 +- src/pipecat/services/elevenlabs/stt.py | 20 ++++---- src/pipecat/services/elevenlabs/tts.py | 38 ++++++++------- src/pipecat/services/fal/stt.py | 8 ++-- src/pipecat/services/fish/tts.py | 16 +++---- src/pipecat/services/gladia/stt.py | 4 +- .../services/google/gemini_live/llm.py | 20 ++++---- src/pipecat/services/google/llm.py | 6 ++- src/pipecat/services/google/stt.py | 26 +++++----- src/pipecat/services/google/tts.py | 40 +++++++++------- src/pipecat/services/gradium/stt.py | 4 +- src/pipecat/services/gradium/tts.py | 4 +- src/pipecat/services/groq/tts.py | 8 ++-- src/pipecat/services/hathora/stt.py | 4 +- src/pipecat/services/hathora/tts.py | 6 +-- src/pipecat/services/inworld/tts.py | 14 +++--- src/pipecat/services/kokoro/tts.py | 4 +- src/pipecat/services/lmnt/tts.py | 4 +- src/pipecat/services/minimax/tts.py | 26 +++++----- src/pipecat/services/neuphonic/tts.py | 10 ++-- src/pipecat/services/nvidia/stt.py | 12 ++--- src/pipecat/services/openai/base_llm.py | 6 +-- src/pipecat/services/openai/stt.py | 4 +- src/pipecat/services/openai/tts.py | 6 +-- .../services/openai_realtime_beta/openai.py | 8 ++-- src/pipecat/services/playht/tts.py | 12 ++--- src/pipecat/services/resembleai/tts.py | 8 ++-- src/pipecat/services/rime/tts.py | 36 +++++++------- src/pipecat/services/sarvam/stt.py | 10 ++-- src/pipecat/services/sarvam/tts.py | 40 ++++++++-------- src/pipecat/services/soniox/stt.py | 4 +- src/pipecat/services/speechmatics/stt.py | 48 ++++++++++--------- src/pipecat/services/ultravox/llm.py | 4 +- src/pipecat/services/whisper/base_stt.py | 8 ++-- src/pipecat/services/whisper/stt.py | 14 +++--- src/pipecat/services/xtts/tts.py | 4 +- 49 files changed, 314 insertions(+), 286 deletions(-) diff --git a/src/pipecat/services/anthropic/llm.py b/src/pipecat/services/anthropic/llm.py index 159b666d1..4416aa018 100644 --- a/src/pipecat/services/anthropic/llm.py +++ b/src/pipecat/services/anthropic/llm.py @@ -59,7 +59,7 @@ from pipecat.processors.aggregators.openai_llm_context import ( from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import FunctionCallFromLLM, LLMService from pipecat.services.settings import NOT_GIVEN as _NOT_GIVEN -from pipecat.services.settings import LLMSettings, is_given +from pipecat.services.settings import LLMSettings, _NotGiven, is_given from pipecat.utils.tracing.service_decorators import traced_llm try: @@ -79,8 +79,10 @@ class AnthropicLLMSettings(LLMSettings): thinking: Extended thinking configuration. """ - enable_prompt_caching: Any = field(default_factory=lambda: _NOT_GIVEN) - thinking: Any = field(default_factory=lambda: _NOT_GIVEN) + enable_prompt_caching: bool | _NotGiven = field(default_factory=lambda: _NOT_GIVEN) + thinking: "AnthropicLLMService.ThinkingConfig" | _NotGiven = field( + default_factory=lambda: _NOT_GIVEN + ) @classmethod def from_mapping(cls, settings): diff --git a/src/pipecat/services/assemblyai/stt.py b/src/pipecat/services/assemblyai/stt.py index 23b7d149b..6a33b6a20 100644 --- a/src/pipecat/services/assemblyai/stt.py +++ b/src/pipecat/services/assemblyai/stt.py @@ -30,7 +30,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, STTSettings +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import ASSEMBLYAI_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.transcriptions.language import Language @@ -64,7 +64,9 @@ class AssemblyAISTTSettings(STTSettings): connection_params: Connection configuration parameters. """ - connection_params: AssemblyAIConnectionParams = field(default_factory=lambda: NOT_GIVEN) + connection_params: AssemblyAIConnectionParams | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) class AssemblyAISTTService(WebsocketSTTService): diff --git a/src/pipecat/services/asyncai/tts.py b/src/pipecat/services/asyncai/tts.py index 489d7cbff..d01fd4396 100644 --- a/src/pipecat/services/asyncai/tts.py +++ b/src/pipecat/services/asyncai/tts.py @@ -28,7 +28,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, TTSSettings +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import AudioContextTTSService, TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -84,9 +84,9 @@ class AsyncAITTSSettings(TTSSettings): output_sample_rate: Audio sample rate in Hz. """ - output_container: str = field(default_factory=lambda: NOT_GIVEN) - output_encoding: str = field(default_factory=lambda: NOT_GIVEN) - output_sample_rate: int = field(default_factory=lambda: NOT_GIVEN) + output_container: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + output_encoding: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + output_sample_rate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) @classmethod def from_mapping(cls, settings: Mapping[str, Any]) -> "AsyncAITTSSettings": diff --git a/src/pipecat/services/aws/llm.py b/src/pipecat/services/aws/llm.py index 3fca8e374..b39d518ec 100644 --- a/src/pipecat/services/aws/llm.py +++ b/src/pipecat/services/aws/llm.py @@ -56,7 +56,7 @@ from pipecat.processors.aggregators.openai_llm_context import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import LLMService -from pipecat.services.settings import NOT_GIVEN, LLMSettings +from pipecat.services.settings import NOT_GIVEN, LLMSettings, _NotGiven from pipecat.utils.tracing.service_decorators import traced_llm try: @@ -80,8 +80,10 @@ class AWSBedrockLLMSettings(LLMSettings): additional_model_request_fields: Additional model-specific parameters. """ - latency: Any = field(default_factory=lambda: NOT_GIVEN) - additional_model_request_fields: Any = field(default_factory=lambda: NOT_GIVEN) + latency: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + additional_model_request_fields: Dict[str, Any] | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) @dataclass diff --git a/src/pipecat/services/aws/stt.py b/src/pipecat/services/aws/stt.py index 21220e646..09552ecfc 100644 --- a/src/pipecat/services/aws/stt.py +++ b/src/pipecat/services/aws/stt.py @@ -29,7 +29,7 @@ from pipecat.frames.frames import ( TranscriptionFrame, ) from pipecat.services.aws.utils import build_event_message, decode_event, get_presigned_url -from pipecat.services.settings import NOT_GIVEN, STTSettings +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import AWS_TRANSCRIBE_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.transcriptions.language import Language, resolve_language @@ -57,11 +57,11 @@ class AWSTranscribeSTTSettings(STTSettings): enable_channel_identification: Whether to enable channel identification. """ - sample_rate: int = field(default_factory=lambda: NOT_GIVEN) - media_encoding: str = field(default_factory=lambda: NOT_GIVEN) - number_of_channels: int = field(default_factory=lambda: NOT_GIVEN) - show_speaker_label: bool = field(default_factory=lambda: NOT_GIVEN) - enable_channel_identification: bool = field(default_factory=lambda: NOT_GIVEN) + sample_rate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + media_encoding: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + number_of_channels: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + show_speaker_label: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_channel_identification: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class AWSTranscribeSTTService(WebsocketSTTService): diff --git a/src/pipecat/services/aws/tts.py b/src/pipecat/services/aws/tts.py index 47c524196..e223a1abc 100644 --- a/src/pipecat/services/aws/tts.py +++ b/src/pipecat/services/aws/tts.py @@ -25,7 +25,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) -from pipecat.services.settings import NOT_GIVEN, TTSSettings +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -135,11 +135,11 @@ class AWSPollyTTSSettings(TTSSettings): lexicon_names: List of pronunciation lexicons to apply. """ - engine: str = field(default_factory=lambda: NOT_GIVEN) - pitch: str = field(default_factory=lambda: NOT_GIVEN) - rate: str = field(default_factory=lambda: NOT_GIVEN) - volume: str = field(default_factory=lambda: NOT_GIVEN) - lexicon_names: List[str] = field(default_factory=lambda: NOT_GIVEN) + engine: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + pitch: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + rate: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + volume: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + lexicon_names: List[str] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class AWSPollyTTSService(TTSService): diff --git a/src/pipecat/services/azure/stt.py b/src/pipecat/services/azure/stt.py index 18fc9b108..d161b3829 100644 --- a/src/pipecat/services/azure/stt.py +++ b/src/pipecat/services/azure/stt.py @@ -26,7 +26,7 @@ from pipecat.frames.frames import ( TranscriptionFrame, ) from pipecat.services.azure.common import language_to_azure_language -from pipecat.services.settings import NOT_GIVEN, STTSettings +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import AZURE_TTFS_P99 from pipecat.services.stt_service import STTService from pipecat.transcriptions.language import Language @@ -59,8 +59,8 @@ class AzureSTTSettings(STTSettings): sample_rate: Audio sample rate in Hz. """ - region: str = field(default_factory=lambda: NOT_GIVEN) - sample_rate: Optional[int] = field(default_factory=lambda: NOT_GIVEN) + region: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + sample_rate: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class AzureSTTService(STTService): diff --git a/src/pipecat/services/azure/tts.py b/src/pipecat/services/azure/tts.py index b72b33901..b69e60b69 100644 --- a/src/pipecat/services/azure/tts.py +++ b/src/pipecat/services/azure/tts.py @@ -26,7 +26,7 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.azure.common import language_to_azure_language -from pipecat.services.settings import NOT_GIVEN, TTSSettings +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import TTSService, WordTTSService from pipecat.transcriptions.language import Language from pipecat.utils.tracing.service_decorators import traced_tts @@ -82,14 +82,14 @@ class AzureTTSSettings(TTSSettings): volume: Volume level (e.g., "+20%", "loud", "x-soft"). """ - emphasis: str = field(default_factory=lambda: NOT_GIVEN) - language: str = field(default_factory=lambda: NOT_GIVEN) - pitch: str = field(default_factory=lambda: NOT_GIVEN) - rate: str = field(default_factory=lambda: NOT_GIVEN) - role: str = field(default_factory=lambda: NOT_GIVEN) - style: str = field(default_factory=lambda: NOT_GIVEN) - style_degree: str = field(default_factory=lambda: NOT_GIVEN) - volume: str = field(default_factory=lambda: NOT_GIVEN) + emphasis: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + language: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + pitch: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + rate: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + role: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + style: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + style_degree: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + volume: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class AzureBaseTTSService: diff --git a/src/pipecat/services/camb/tts.py b/src/pipecat/services/camb/tts.py index 95b0ddd52..40dabd17e 100644 --- a/src/pipecat/services/camb/tts.py +++ b/src/pipecat/services/camb/tts.py @@ -32,7 +32,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) -from pipecat.services.settings import NOT_GIVEN, TTSSettings +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -144,7 +144,7 @@ class CambTTSSettings(TTSSettings): Ignored for other models. Max 1000 characters. """ - user_instructions: str = field(default_factory=lambda: NOT_GIVEN) + user_instructions: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class CambTTSService(TTSService): diff --git a/src/pipecat/services/cartesia/stt.py b/src/pipecat/services/cartesia/stt.py index 6629d05bb..e3270936b 100644 --- a/src/pipecat/services/cartesia/stt.py +++ b/src/pipecat/services/cartesia/stt.py @@ -28,7 +28,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, STTSettings +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import CARTESIA_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.transcriptions.language import Language @@ -52,7 +52,7 @@ class CartesiaSTTSettings(STTSettings): encoding: Audio encoding format (e.g. ``"pcm_s16le"``). """ - encoding: str = field(default_factory=lambda: NOT_GIVEN) + encoding: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class CartesiaLiveOptions: diff --git a/src/pipecat/services/cartesia/tts.py b/src/pipecat/services/cartesia/tts.py index 4e45f50aa..0d8936fdd 100644 --- a/src/pipecat/services/cartesia/tts.py +++ b/src/pipecat/services/cartesia/tts.py @@ -28,7 +28,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, TTSSettings, is_given +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven, is_given from pipecat.services.tts_service import AudioContextWordTTSService, TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.text.base_text_aggregator import BaseTextAggregator @@ -209,13 +209,13 @@ class CartesiaTTSSettings(TTSSettings): custom pronunciations. """ - output_container: str = field(default_factory=lambda: NOT_GIVEN) - output_encoding: str = field(default_factory=lambda: NOT_GIVEN) - output_sample_rate: int = field(default_factory=lambda: NOT_GIVEN) - speed: str = field(default_factory=lambda: NOT_GIVEN) - emotion: List[str] = field(default_factory=lambda: NOT_GIVEN) - generation_config: GenerationConfig = field(default_factory=lambda: NOT_GIVEN) - pronunciation_dict_id: str = field(default_factory=lambda: NOT_GIVEN) + output_container: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + output_encoding: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + output_sample_rate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speed: Literal["slow", "normal", "fast"] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + emotion: List[str] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + generation_config: GenerationConfig | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + pronunciation_dict_id: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) @classmethod def from_mapping(cls, settings: Mapping[str, Any]) -> "CartesiaTTSSettings": diff --git a/src/pipecat/services/deepgram/stt.py b/src/pipecat/services/deepgram/stt.py index f52932b2c..8d4a72fc3 100644 --- a/src/pipecat/services/deepgram/stt.py +++ b/src/pipecat/services/deepgram/stt.py @@ -24,7 +24,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, STTSettings, is_given +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven, is_given from pipecat.services.stt_latency import DEEPGRAM_TTFS_P99 from pipecat.services.stt_service import STTService from pipecat.transcriptions.language import Language @@ -55,7 +55,7 @@ class DeepgramSTTSettings(STTSettings): live_options: Deepgram ``LiveOptions`` for detailed configuration. """ - live_options: LiveOptions = field(default_factory=lambda: NOT_GIVEN) + live_options: LiveOptions | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class DeepgramSTTService(STTService): diff --git a/src/pipecat/services/deepgram/stt_sagemaker.py b/src/pipecat/services/deepgram/stt_sagemaker.py index 870ded11f..3184bf7f8 100644 --- a/src/pipecat/services/deepgram/stt_sagemaker.py +++ b/src/pipecat/services/deepgram/stt_sagemaker.py @@ -32,7 +32,7 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.aws.sagemaker.bidi_client import SageMakerBidiClient -from pipecat.services.settings import NOT_GIVEN, STTSettings, is_given +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven, is_given from pipecat.services.stt_latency import DEEPGRAM_SAGEMAKER_TTFS_P99 from pipecat.services.stt_service import STTService from pipecat.transcriptions.language import Language @@ -57,7 +57,7 @@ class DeepgramSageMakerSTTSettings(STTSettings): live_options: Deepgram ``LiveOptions`` for detailed configuration. """ - live_options: LiveOptions = field(default_factory=lambda: NOT_GIVEN) + live_options: LiveOptions | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class DeepgramSageMakerSTTService(STTService): diff --git a/src/pipecat/services/deepgram/tts.py b/src/pipecat/services/deepgram/tts.py index 7f4d78f13..3458a4529 100644 --- a/src/pipecat/services/deepgram/tts.py +++ b/src/pipecat/services/deepgram/tts.py @@ -30,7 +30,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, TTSSettings +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import TTSService, WebsocketTTSService from pipecat.utils.tracing.service_decorators import traced_tts @@ -53,7 +53,7 @@ class DeepgramTTSSettings(TTSSettings): encoding: Audio encoding format (linear16, mulaw, alaw). """ - encoding: str = field(default_factory=lambda: NOT_GIVEN) + encoding: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class DeepgramTTSService(WebsocketTTSService): diff --git a/src/pipecat/services/elevenlabs/stt.py b/src/pipecat/services/elevenlabs/stt.py index e5b7b3843..c3e6b29e7 100644 --- a/src/pipecat/services/elevenlabs/stt.py +++ b/src/pipecat/services/elevenlabs/stt.py @@ -35,7 +35,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, STTSettings +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import ELEVENLABS_REALTIME_TTFS_P99, ELEVENLABS_TTFS_P99 from pipecat.services.stt_service import SegmentedSTTService, WebsocketSTTService from pipecat.transcriptions.language import Language, resolve_language @@ -185,7 +185,7 @@ class ElevenLabsSTTSettings(STTSettings): tag_audio_events: Whether to include audio event tags in transcription. """ - tag_audio_events: bool = field(default_factory=lambda: NOT_GIVEN) + tag_audio_events: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) @dataclass @@ -205,14 +205,14 @@ class ElevenLabsRealtimeSTTSettings(STTSettings): include_language_detection: Whether to include language detection in transcripts. """ - commit_strategy: CommitStrategy = field(default_factory=lambda: NOT_GIVEN) - vad_silence_threshold_secs: float = field(default_factory=lambda: NOT_GIVEN) - vad_threshold: float = field(default_factory=lambda: NOT_GIVEN) - min_speech_duration_ms: int = field(default_factory=lambda: NOT_GIVEN) - min_silence_duration_ms: int = field(default_factory=lambda: NOT_GIVEN) - include_timestamps: bool = field(default_factory=lambda: NOT_GIVEN) - enable_logging: bool = field(default_factory=lambda: NOT_GIVEN) - include_language_detection: bool = field(default_factory=lambda: NOT_GIVEN) + commit_strategy: CommitStrategy | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + vad_silence_threshold_secs: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + vad_threshold: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + min_speech_duration_ms: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + min_silence_duration_ms: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + include_timestamps: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_logging: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + include_language_detection: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class ElevenLabsSTTService(SegmentedSTTService): diff --git a/src/pipecat/services/elevenlabs/tts.py b/src/pipecat/services/elevenlabs/tts.py index fbde4a4b7..9503866a7 100644 --- a/src/pipecat/services/elevenlabs/tts.py +++ b/src/pipecat/services/elevenlabs/tts.py @@ -44,7 +44,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, TTSSettings, is_given +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven, is_given from pipecat.services.tts_service import ( AudioContextWordTTSService, WordTTSService, @@ -206,15 +206,17 @@ class ElevenLabsTTSSettings(TTSSettings): apply_text_normalization: Text normalization mode ("auto", "on", "off"). """ - stability: float = field(default_factory=lambda: NOT_GIVEN) - similarity_boost: float = field(default_factory=lambda: NOT_GIVEN) - style: float = field(default_factory=lambda: NOT_GIVEN) - use_speaker_boost: bool = field(default_factory=lambda: NOT_GIVEN) - speed: float = field(default_factory=lambda: NOT_GIVEN) - auto_mode: str = field(default_factory=lambda: NOT_GIVEN) - enable_ssml_parsing: bool = field(default_factory=lambda: NOT_GIVEN) - enable_logging: bool = field(default_factory=lambda: NOT_GIVEN) - apply_text_normalization: str = field(default_factory=lambda: NOT_GIVEN) + stability: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + similarity_boost: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + style: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + use_speaker_boost: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speed: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + auto_mode: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_ssml_parsing: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_logging: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + apply_text_normalization: Literal["auto", "on", "off"] | None | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) #: Fields in the WS URL — changing any of these requires a reconnect. URL_FIELDS: ClassVar[frozenset[str]] = frozenset({"voice", "model", "language"}) @@ -242,13 +244,15 @@ class ElevenLabsHttpTTSSettings(TTSSettings): apply_text_normalization: Text normalization mode ("auto", "on", "off"). """ - optimize_streaming_latency: int = field(default_factory=lambda: NOT_GIVEN) - stability: float = field(default_factory=lambda: NOT_GIVEN) - similarity_boost: float = field(default_factory=lambda: NOT_GIVEN) - style: float = field(default_factory=lambda: NOT_GIVEN) - use_speaker_boost: bool = field(default_factory=lambda: NOT_GIVEN) - speed: float = field(default_factory=lambda: NOT_GIVEN) - apply_text_normalization: str = field(default_factory=lambda: NOT_GIVEN) + optimize_streaming_latency: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + stability: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + similarity_boost: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + style: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + use_speaker_boost: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speed: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + apply_text_normalization: Literal["auto", "on", "off"] | None | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) _aliases: ClassVar[Dict[str, str]] = {"voice_id": "voice"} diff --git a/src/pipecat/services/fal/stt.py b/src/pipecat/services/fal/stt.py index a29d8d70d..bcfc583c6 100644 --- a/src/pipecat/services/fal/stt.py +++ b/src/pipecat/services/fal/stt.py @@ -18,7 +18,7 @@ from loguru import logger from pydantic import BaseModel from pipecat.frames.frames import ErrorFrame, Frame, TranscriptionFrame -from pipecat.services.settings import NOT_GIVEN, STTSettings +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import FAL_TTFS_P99 from pipecat.services.stt_service import SegmentedSTTService from pipecat.transcriptions.language import Language, resolve_language @@ -159,9 +159,9 @@ class FalSTTSettings(STTSettings): version: Version of Wizper model to use. Defaults to '3'. """ - task: str = field(default_factory=lambda: NOT_GIVEN) - chunk_level: str = field(default_factory=lambda: NOT_GIVEN) - version: str = field(default_factory=lambda: NOT_GIVEN) + task: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + chunk_level: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + version: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class FalSTTService(SegmentedSTTService): diff --git a/src/pipecat/services/fish/tts.py b/src/pipecat/services/fish/tts.py index 09ed72099..131495769 100644 --- a/src/pipecat/services/fish/tts.py +++ b/src/pipecat/services/fish/tts.py @@ -29,7 +29,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, TTSSettings +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import InterruptibleTTSService from pipecat.transcriptions.language import Language from pipecat.utils.tracing.service_decorators import traced_tts @@ -61,13 +61,13 @@ class FishAudioTTSSettings(TTSSettings): reference_id: Reference ID of the voice model. """ - fish_sample_rate: int = field(default_factory=lambda: NOT_GIVEN) - latency: str = field(default_factory=lambda: NOT_GIVEN) - format: str = field(default_factory=lambda: NOT_GIVEN) - normalize: bool = field(default_factory=lambda: NOT_GIVEN) - prosody_speed: float = field(default_factory=lambda: NOT_GIVEN) - prosody_volume: int = field(default_factory=lambda: NOT_GIVEN) - reference_id: str = field(default_factory=lambda: NOT_GIVEN) + fish_sample_rate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + latency: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + format: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + normalize: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + prosody_speed: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + prosody_volume: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + reference_id: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) _aliases: ClassVar[Dict[str, str]] = {"voice_id": "voice", "sample_rate": "fish_sample_rate"} diff --git a/src/pipecat/services/gladia/stt.py b/src/pipecat/services/gladia/stt.py index 25922e7aa..c92d9469f 100644 --- a/src/pipecat/services/gladia/stt.py +++ b/src/pipecat/services/gladia/stt.py @@ -33,7 +33,7 @@ from pipecat.frames.frames import ( UserStoppedSpeakingFrame, ) from pipecat.services.gladia.config import GladiaInputParams -from pipecat.services.settings import NOT_GIVEN, STTSettings +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import GLADIA_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.transcriptions.language import Language, resolve_language @@ -188,7 +188,7 @@ class GladiaSTTSettings(STTSettings): input_params: Gladia ``GladiaInputParams`` for detailed configuration. """ - input_params: GladiaInputParams = field(default_factory=lambda: NOT_GIVEN) + input_params: GladiaInputParams | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class GladiaSTTService(WebsocketSTTService): diff --git a/src/pipecat/services/google/gemini_live/llm.py b/src/pipecat/services/google/gemini_live/llm.py index 3047e258d..00b540385 100644 --- a/src/pipecat/services/google/gemini_live/llm.py +++ b/src/pipecat/services/google/gemini_live/llm.py @@ -76,7 +76,7 @@ from pipecat.services.openai.llm import ( OpenAIAssistantContextAggregator, OpenAIUserContextAggregator, ) -from pipecat.services.settings import NOT_GIVEN, LLMSettings +from pipecat.services.settings import NOT_GIVEN, LLMSettings, _NotGiven from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.string import match_endofsentence from pipecat.utils.time import time_now_iso8601 @@ -617,14 +617,16 @@ class GeminiLiveLLMSettings(LLMSettings): proactivity: Proactivity configuration. """ - modalities: Any = field(default_factory=lambda: NOT_GIVEN) - language: Any = field(default_factory=lambda: NOT_GIVEN) - media_resolution: Any = field(default_factory=lambda: NOT_GIVEN) - vad: Any = field(default_factory=lambda: NOT_GIVEN) - context_window_compression: Any = field(default_factory=lambda: NOT_GIVEN) - thinking: Any = field(default_factory=lambda: NOT_GIVEN) - enable_affective_dialog: Any = field(default_factory=lambda: NOT_GIVEN) - proactivity: Any = field(default_factory=lambda: NOT_GIVEN) + modalities: GeminiModalities | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + language: Language | str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + media_resolution: GeminiMediaResolution | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + vad: GeminiVADParams | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + context_window_compression: ContextWindowCompressionParams | dict | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + thinking: ThinkingConfig | dict | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_affective_dialog: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + proactivity: ProactivityConfig | dict | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class GeminiLiveLLMService(LLMService): diff --git a/src/pipecat/services/google/llm.py b/src/pipecat/services/google/llm.py index bf1958f66..0a097b770 100644 --- a/src/pipecat/services/google/llm.py +++ b/src/pipecat/services/google/llm.py @@ -58,7 +58,7 @@ from pipecat.services.openai.llm import ( OpenAIAssistantContextAggregator, OpenAIUserContextAggregator, ) -from pipecat.services.settings import NOT_GIVEN, LLMSettings, is_given +from pipecat.services.settings import NOT_GIVEN, LLMSettings, _NotGiven, is_given from pipecat.utils.tracing.service_decorators import traced_llm # Suppress gRPC fork warnings @@ -681,7 +681,9 @@ class GoogleLLMSettings(LLMSettings): thinking: Thinking configuration. """ - thinking: Any = field(default_factory=lambda: NOT_GIVEN) + thinking: "GoogleLLMService.ThinkingConfig" | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) @classmethod def from_mapping(cls, settings): diff --git a/src/pipecat/services/google/stt.py b/src/pipecat/services/google/stt.py index cdd583c8e..72d4f12b6 100644 --- a/src/pipecat/services/google/stt.py +++ b/src/pipecat/services/google/stt.py @@ -36,7 +36,7 @@ from pipecat.frames.frames import ( StartFrame, TranscriptionFrame, ) -from pipecat.services.settings import NOT_GIVEN, STTSettings +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import GOOGLE_TTFS_P99 from pipecat.services.stt_service import STTService from pipecat.transcriptions.language import Language, resolve_language @@ -383,17 +383,19 @@ class GoogleSTTSettings(STTSettings): enable_voice_activity_events: Detect voice activity in audio. """ - languages: Any = field(default_factory=lambda: NOT_GIVEN) - language_codes: Any = field(default_factory=lambda: NOT_GIVEN) - use_separate_recognition_per_channel: Any = field(default_factory=lambda: NOT_GIVEN) - enable_automatic_punctuation: Any = field(default_factory=lambda: NOT_GIVEN) - enable_spoken_punctuation: Any = field(default_factory=lambda: NOT_GIVEN) - enable_spoken_emojis: Any = field(default_factory=lambda: NOT_GIVEN) - profanity_filter: Any = field(default_factory=lambda: NOT_GIVEN) - enable_word_time_offsets: Any = field(default_factory=lambda: NOT_GIVEN) - enable_word_confidence: Any = field(default_factory=lambda: NOT_GIVEN) - enable_interim_results: Any = field(default_factory=lambda: NOT_GIVEN) - enable_voice_activity_events: Any = field(default_factory=lambda: NOT_GIVEN) + languages: List[Language] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + language_codes: List[str] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + use_separate_recognition_per_channel: bool | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + enable_automatic_punctuation: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_spoken_punctuation: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_spoken_emojis: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + profanity_filter: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_word_time_offsets: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_word_confidence: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_interim_results: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_voice_activity_events: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class GoogleSTTService(STTService): diff --git a/src/pipecat/services/google/tts.py b/src/pipecat/services/google/tts.py index e47aa384a..60bed9c6d 100644 --- a/src/pipecat/services/google/tts.py +++ b/src/pipecat/services/google/tts.py @@ -24,7 +24,7 @@ from pipecat.utils.tracing.service_decorators import traced_tts os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "false" from dataclasses import dataclass, field -from typing import Any, AsyncGenerator, List, Literal, Optional +from typing import Any, AsyncGenerator, Dict, List, Literal, Optional from loguru import logger from pydantic import BaseModel @@ -37,7 +37,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) -from pipecat.services.settings import NOT_GIVEN, TTSSettings, is_given +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven, is_given from pipecat.services.tts_service import TTSService from pipecat.transcriptions.language import Language, resolve_language @@ -493,14 +493,20 @@ class GoogleHttpTTSSettings(TTSSettings): google_style: Google-specific voice style. """ - pitch: str = field(default_factory=lambda: NOT_GIVEN) - rate: str = field(default_factory=lambda: NOT_GIVEN) - speaking_rate: float = field(default_factory=lambda: NOT_GIVEN) - volume: str = field(default_factory=lambda: NOT_GIVEN) - emphasis: str = field(default_factory=lambda: NOT_GIVEN) - language: str = field(default_factory=lambda: NOT_GIVEN) - gender: str = field(default_factory=lambda: NOT_GIVEN) - google_style: str = field(default_factory=lambda: NOT_GIVEN) + pitch: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + rate: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speaking_rate: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + volume: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + emphasis: Literal["strong", "moderate", "reduced", "none"] | None | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + language: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + gender: Literal["male", "female", "neutral"] | None | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + google_style: ( + Literal["apologetic", "calm", "empathetic", "firm", "lively"] | None | _NotGiven + ) = field(default_factory=lambda: NOT_GIVEN) @dataclass @@ -512,8 +518,8 @@ class GoogleStreamTTSSettings(TTSSettings): speaking_rate: The speaking rate, in the range [0.25, 2.0]. """ - language: str = field(default_factory=lambda: NOT_GIVEN) - speaking_rate: float = field(default_factory=lambda: NOT_GIVEN) + language: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speaking_rate: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) @dataclass @@ -527,10 +533,12 @@ class GeminiTTSSettings(TTSSettings): speaker_configs: List of speaker configurations for multi-speaker mode. """ - language: str = field(default_factory=lambda: NOT_GIVEN) - prompt: str = field(default_factory=lambda: NOT_GIVEN) - multi_speaker: bool = field(default_factory=lambda: NOT_GIVEN) - speaker_configs: List[dict] = field(default_factory=lambda: NOT_GIVEN) + language: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + prompt: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + multi_speaker: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speaker_configs: list[dict[str, Any]] | None | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) class GoogleHttpTTSService(TTSService): diff --git a/src/pipecat/services/gradium/stt.py b/src/pipecat/services/gradium/stt.py index 381f76884..1583fac3c 100644 --- a/src/pipecat/services/gradium/stt.py +++ b/src/pipecat/services/gradium/stt.py @@ -28,7 +28,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, STTSettings, is_given +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven, is_given from pipecat.services.stt_latency import GRADIUM_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.transcriptions.language import Language, resolve_language @@ -75,7 +75,7 @@ class GradiumSTTSettings(STTSettings): generated. Higher delays allow more context but increase latency. """ - delay_in_frames: int = field(default_factory=lambda: NOT_GIVEN) + delay_in_frames: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class GradiumSTTService(WebsocketSTTService): diff --git a/src/pipecat/services/gradium/tts.py b/src/pipecat/services/gradium/tts.py index 3bffbb5bf..c41c77436 100644 --- a/src/pipecat/services/gradium/tts.py +++ b/src/pipecat/services/gradium/tts.py @@ -23,7 +23,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, TTSSettings +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import InterruptibleWordTTSService from pipecat.utils.tracing.service_decorators import traced_tts @@ -47,7 +47,7 @@ class GradiumTTSSettings(TTSSettings): output_format: Audio output format. """ - output_format: str = field(default_factory=lambda: NOT_GIVEN) + output_format: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class GradiumTTSService(InterruptibleWordTTSService): diff --git a/src/pipecat/services/groq/tts.py b/src/pipecat/services/groq/tts.py index e4c10f2e9..b3b4c5f57 100644 --- a/src/pipecat/services/groq/tts.py +++ b/src/pipecat/services/groq/tts.py @@ -21,7 +21,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) -from pipecat.services.settings import NOT_GIVEN, TTSSettings +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import TTSService from pipecat.transcriptions.language import Language from pipecat.utils.tracing.service_decorators import traced_tts @@ -44,9 +44,9 @@ class GroqTTSSettings(TTSSettings): groq_sample_rate: Audio sample rate. """ - output_format: str = field(default_factory=lambda: NOT_GIVEN) - speed: float = field(default_factory=lambda: NOT_GIVEN) - groq_sample_rate: int = field(default_factory=lambda: NOT_GIVEN) + output_format: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speed: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + groq_sample_rate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) _aliases: ClassVar[Dict[str, str]] = {"voice_id": "voice", "sample_rate": "groq_sample_rate"} diff --git a/src/pipecat/services/hathora/stt.py b/src/pipecat/services/hathora/stt.py index a620ed79a..e77e382c0 100644 --- a/src/pipecat/services/hathora/stt.py +++ b/src/pipecat/services/hathora/stt.py @@ -19,7 +19,7 @@ from pipecat.frames.frames import ( Frame, TranscriptionFrame, ) -from pipecat.services.settings import NOT_GIVEN, STTSettings +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import HATHORA_TTFS_P99 from pipecat.services.stt_service import SegmentedSTTService from pipecat.transcriptions.language import Language @@ -39,7 +39,7 @@ class HathoraSTTSettings(STTSettings): what is supported. """ - config: Optional[list] = field(default_factory=lambda: NOT_GIVEN) + config: list[ConfigOption] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class HathoraSTTService(SegmentedSTTService): diff --git a/src/pipecat/services/hathora/tts.py b/src/pipecat/services/hathora/tts.py index f3524734a..e15dfcc54 100644 --- a/src/pipecat/services/hathora/tts.py +++ b/src/pipecat/services/hathora/tts.py @@ -22,7 +22,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) -from pipecat.services.settings import NOT_GIVEN, TTSSettings +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import TTSService from pipecat.utils.tracing.service_decorators import traced_tts @@ -58,8 +58,8 @@ class HathoraTTSSettings(TTSSettings): what is supported. """ - speed: float = field(default_factory=lambda: NOT_GIVEN) - config: list = field(default_factory=lambda: NOT_GIVEN) + speed: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + config: list[ConfigOption] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class HathoraTTSService(TTSService): diff --git a/src/pipecat/services/inworld/tts.py b/src/pipecat/services/inworld/tts.py index fea30f3a1..bdbbb82d7 100644 --- a/src/pipecat/services/inworld/tts.py +++ b/src/pipecat/services/inworld/tts.py @@ -24,7 +24,7 @@ import websockets from loguru import logger from pydantic import BaseModel -from pipecat.services.settings import NOT_GIVEN, TTSSettings, is_given +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven, is_given try: from websockets.asyncio.client import connect as websocket_connect @@ -67,12 +67,12 @@ class InworldTTSSettings(TTSSettings): apply_text_normalization: Whether to apply text normalization. """ - audio_encoding: str = field(default_factory=lambda: NOT_GIVEN) - audio_sample_rate: int = field(default_factory=lambda: NOT_GIVEN) - speaking_rate: float = field(default_factory=lambda: NOT_GIVEN) - temperature: float = field(default_factory=lambda: NOT_GIVEN) - auto_mode: bool = field(default_factory=lambda: NOT_GIVEN) - apply_text_normalization: str = field(default_factory=lambda: NOT_GIVEN) + audio_encoding: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + audio_sample_rate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speaking_rate: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + temperature: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + auto_mode: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + apply_text_normalization: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) _aliases: ClassVar[Dict[str, str]] = { "voice_id": "voice", diff --git a/src/pipecat/services/kokoro/tts.py b/src/pipecat/services/kokoro/tts.py index b88511437..735145da7 100644 --- a/src/pipecat/services/kokoro/tts.py +++ b/src/pipecat/services/kokoro/tts.py @@ -23,7 +23,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) -from pipecat.services.settings import NOT_GIVEN, TTSSettings +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -97,7 +97,7 @@ class KokoroTTSSettings(TTSSettings): lang_code: Kokoro language code for synthesis. """ - lang_code: str = field(default_factory=lambda: NOT_GIVEN) + lang_code: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class KokoroTTSService(TTSService): diff --git a/src/pipecat/services/lmnt/tts.py b/src/pipecat/services/lmnt/tts.py index 5b2adcaf4..94f4a1a9e 100644 --- a/src/pipecat/services/lmnt/tts.py +++ b/src/pipecat/services/lmnt/tts.py @@ -24,7 +24,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, TTSSettings +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import InterruptibleTTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -81,7 +81,7 @@ class LmntTTSSettings(TTSSettings): format: Audio output format. Defaults to "raw". """ - format: str = field(default_factory=lambda: NOT_GIVEN) + format: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class LmntTTSService(InterruptibleTTSService): diff --git a/src/pipecat/services/minimax/tts.py b/src/pipecat/services/minimax/tts.py index 6a107d950..290439704 100644 --- a/src/pipecat/services/minimax/tts.py +++ b/src/pipecat/services/minimax/tts.py @@ -26,7 +26,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) -from pipecat.services.settings import NOT_GIVEN, TTSSettings, is_given +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven, is_given from pipecat.services.tts_service import TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -107,18 +107,18 @@ class MiniMaxTTSSettings(TTSSettings): language_boost: Language boost string for multilingual support. """ - stream: bool = field(default_factory=lambda: NOT_GIVEN) - speed: float = field(default_factory=lambda: NOT_GIVEN) - volume: float = field(default_factory=lambda: NOT_GIVEN) - pitch: int = field(default_factory=lambda: NOT_GIVEN) - emotion: str = field(default_factory=lambda: NOT_GIVEN) - text_normalization: bool = field(default_factory=lambda: NOT_GIVEN) - latex_read: bool = field(default_factory=lambda: NOT_GIVEN) - audio_bitrate: int = field(default_factory=lambda: NOT_GIVEN) - audio_format: str = field(default_factory=lambda: NOT_GIVEN) - audio_channel: int = field(default_factory=lambda: NOT_GIVEN) - audio_sample_rate: int = field(default_factory=lambda: NOT_GIVEN) - language_boost: str = field(default_factory=lambda: NOT_GIVEN) + stream: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speed: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + volume: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + pitch: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + emotion: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + text_normalization: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + latex_read: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + audio_bitrate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + audio_format: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + audio_channel: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + audio_sample_rate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + language_boost: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) _aliases: ClassVar[Dict[str, str]] = {"voice_id": "voice"} diff --git a/src/pipecat/services/neuphonic/tts.py b/src/pipecat/services/neuphonic/tts.py index 0797f9b1b..2e51297ab 100644 --- a/src/pipecat/services/neuphonic/tts.py +++ b/src/pipecat/services/neuphonic/tts.py @@ -35,7 +35,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, TTSSettings +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import InterruptibleTTSService, TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -85,10 +85,10 @@ class NeuphonicTTSSettings(TTSSettings): sampling_rate: Audio sample rate. """ - lang_code: str = field(default_factory=lambda: NOT_GIVEN) - speed: float = field(default_factory=lambda: NOT_GIVEN) - encoding: str = field(default_factory=lambda: NOT_GIVEN) - sampling_rate: int = field(default_factory=lambda: NOT_GIVEN) + lang_code: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speed: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + encoding: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + sampling_rate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class NeuphonicTTSService(InterruptibleTTSService): diff --git a/src/pipecat/services/nvidia/stt.py b/src/pipecat/services/nvidia/stt.py index 8e1babec7..a79119c34 100644 --- a/src/pipecat/services/nvidia/stt.py +++ b/src/pipecat/services/nvidia/stt.py @@ -23,7 +23,7 @@ from pipecat.frames.frames import ( StartFrame, TranscriptionFrame, ) -from pipecat.services.settings import NOT_GIVEN, STTSettings +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import NVIDIA_TTFS_P99 from pipecat.services.stt_service import SegmentedSTTService, STTService from pipecat.transcriptions.language import Language, resolve_language @@ -110,11 +110,11 @@ class NvidiaSegmentedSTTSettings(STTSettings): boosted_lm_score: Score boost for specified words. """ - profanity_filter: bool = field(default_factory=lambda: NOT_GIVEN) - automatic_punctuation: bool = field(default_factory=lambda: NOT_GIVEN) - verbatim_transcripts: bool = field(default_factory=lambda: NOT_GIVEN) - boosted_lm_words: Optional[List[str]] = field(default_factory=lambda: NOT_GIVEN) - boosted_lm_score: float = field(default_factory=lambda: NOT_GIVEN) + profanity_filter: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + automatic_punctuation: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + verbatim_transcripts: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + boosted_lm_words: List[str] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + boosted_lm_score: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class NvidiaSTTService(STTService): diff --git a/src/pipecat/services/openai/base_llm.py b/src/pipecat/services/openai/base_llm.py index 13cbc07cb..5b624010f 100644 --- a/src/pipecat/services/openai/base_llm.py +++ b/src/pipecat/services/openai/base_llm.py @@ -43,7 +43,7 @@ from pipecat.processors.aggregators.openai_llm_context import ( from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import FunctionCallFromLLM, LLMService from pipecat.services.settings import NOT_GIVEN as _NOT_GIVEN -from pipecat.services.settings import LLMSettings +from pipecat.services.settings import LLMSettings, _NotGiven from pipecat.utils.tracing.service_decorators import traced_llm @@ -56,8 +56,8 @@ class OpenAILLMSettings(LLMSettings): service_tier: Service tier to use (e.g., "auto", "flex", "priority"). """ - max_completion_tokens: Any = field(default_factory=lambda: _NOT_GIVEN) - service_tier: Any = field(default_factory=lambda: _NOT_GIVEN) + max_completion_tokens: int | _NotGiven = field(default_factory=lambda: _NOT_GIVEN) + service_tier: str | _NotGiven = field(default_factory=lambda: _NOT_GIVEN) class BaseOpenAILLMService(LLMService): diff --git a/src/pipecat/services/openai/stt.py b/src/pipecat/services/openai/stt.py index 6daefd1da..82ad8c0f0 100644 --- a/src/pipecat/services/openai/stt.py +++ b/src/pipecat/services/openai/stt.py @@ -35,7 +35,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, STTSettings, is_given +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven, is_given from pipecat.services.stt_latency import OPENAI_REALTIME_TTFS_P99, OPENAI_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.services.whisper.base_stt import BaseWhisperSTTService, Transcription @@ -133,7 +133,7 @@ class OpenAIRealtimeSTTSettings(STTSettings): prompt: Optional prompt text to guide transcription style. """ - prompt: Optional[str] = field(default_factory=lambda: NOT_GIVEN) + prompt: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class OpenAIRealtimeSTTService(WebsocketSTTService): diff --git a/src/pipecat/services/openai/tts.py b/src/pipecat/services/openai/tts.py index f283a7912..2253e369a 100644 --- a/src/pipecat/services/openai/tts.py +++ b/src/pipecat/services/openai/tts.py @@ -25,7 +25,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) -from pipecat.services.settings import NOT_GIVEN, TTSSettings +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import TTSService from pipecat.utils.tracing.service_decorators import traced_tts @@ -71,8 +71,8 @@ class OpenAITTSSettings(TTSSettings): speed: Voice speed control (0.25 to 4.0, default 1.0). """ - instructions: str = field(default_factory=lambda: NOT_GIVEN) - speed: float = field(default_factory=lambda: NOT_GIVEN) + instructions: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speed: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class OpenAITTSService(TTSService): diff --git a/src/pipecat/services/openai_realtime_beta/openai.py b/src/pipecat/services/openai_realtime_beta/openai.py index b456ed0b8..ef40dcb15 100644 --- a/src/pipecat/services/openai_realtime_beta/openai.py +++ b/src/pipecat/services/openai_realtime_beta/openai.py @@ -11,7 +11,7 @@ import json import time import warnings from dataclasses import dataclass, field -from typing import Any, Optional +from typing import Optional from loguru import logger @@ -54,7 +54,7 @@ from pipecat.processors.aggregators.openai_llm_context import ( from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import FunctionCallFromLLM, LLMService from pipecat.services.openai.llm import OpenAIContextAggregatorPair -from pipecat.services.settings import NOT_GIVEN, LLMSettings +from pipecat.services.settings import NOT_GIVEN, LLMSettings, _NotGiven from pipecat.transcriptions.language import Language from pipecat.utils.time import time_now_iso8601 from pipecat.utils.tracing.service_decorators import traced_openai_realtime, traced_stt @@ -100,7 +100,9 @@ class OpenAIRealtimeBetaLLMSettings(LLMSettings): session_properties: OpenAI Realtime session configuration. """ - session_properties: Any = field(default_factory=lambda: NOT_GIVEN) + session_properties: events.SessionProperties | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) class OpenAIRealtimeBetaLLMService(LLMService): diff --git a/src/pipecat/services/playht/tts.py b/src/pipecat/services/playht/tts.py index 1965c9ea3..b5c683fbe 100644 --- a/src/pipecat/services/playht/tts.py +++ b/src/pipecat/services/playht/tts.py @@ -33,7 +33,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, TTSSettings, is_given +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven, is_given from pipecat.services.tts_service import InterruptibleTTSService, TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -111,11 +111,11 @@ class PlayHTTTSSettings(TTSSettings): playht_sample_rate: Audio sample rate sent to the API. """ - output_format: str = field(default_factory=lambda: NOT_GIVEN) - voice_engine: str = field(default_factory=lambda: NOT_GIVEN) - speed: float = field(default_factory=lambda: NOT_GIVEN) - seed: int = field(default_factory=lambda: NOT_GIVEN) - playht_sample_rate: int = field(default_factory=lambda: NOT_GIVEN) + output_format: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + voice_engine: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speed: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + seed: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + playht_sample_rate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class PlayHTTTSService(InterruptibleTTSService): diff --git a/src/pipecat/services/resembleai/tts.py b/src/pipecat/services/resembleai/tts.py index acba883e4..f2873a8a1 100644 --- a/src/pipecat/services/resembleai/tts.py +++ b/src/pipecat/services/resembleai/tts.py @@ -25,7 +25,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, TTSSettings +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import AudioContextWordTTSService from pipecat.transcriptions.language import Language from pipecat.utils.text.base_text_aggregator import BaseTextAggregator @@ -50,9 +50,9 @@ class ResembleAITTSSettings(TTSSettings): resemble_sample_rate: Audio sample rate sent to the API. """ - precision: str = field(default_factory=lambda: NOT_GIVEN) - output_format: str = field(default_factory=lambda: NOT_GIVEN) - resemble_sample_rate: int = field(default_factory=lambda: NOT_GIVEN) + precision: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + output_format: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + resemble_sample_rate: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) _aliases: ClassVar[Dict[str, str]] = { "voice_id": "voice", diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index d76eafbfa..87596cefd 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -31,7 +31,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, TTSSettings, is_given +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven, is_given from pipecat.services.tts_service import ( AudioContextWordTTSService, InterruptibleTTSService, @@ -86,15 +86,15 @@ class RimeTTSSettings(TTSSettings): inlineSpeedAlpha: Inline speed control markup. """ - modelId: str = field(default_factory=lambda: NOT_GIVEN) - audioFormat: str = field(default_factory=lambda: NOT_GIVEN) - samplingRate: int = field(default_factory=lambda: NOT_GIVEN) - lang: str = field(default_factory=lambda: NOT_GIVEN) - speedAlpha: float = field(default_factory=lambda: NOT_GIVEN) - reduceLatency: bool = field(default_factory=lambda: NOT_GIVEN) - pauseBetweenBrackets: bool = field(default_factory=lambda: NOT_GIVEN) - phonemizeBetweenBrackets: bool = field(default_factory=lambda: NOT_GIVEN) - inlineSpeedAlpha: str = field(default_factory=lambda: NOT_GIVEN) + modelId: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + audioFormat: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + samplingRate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + lang: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speedAlpha: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + reduceLatency: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + pauseBetweenBrackets: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + phonemizeBetweenBrackets: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + inlineSpeedAlpha: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) _aliases: ClassVar[Dict[str, str]] = {"speaker": "voice"} @@ -114,14 +114,14 @@ class RimeNonJsonTTSSettings(TTSSettings): top_p: Cumulative probability threshold (0.0-1.0). """ - modelId: str = field(default_factory=lambda: NOT_GIVEN) - audioFormat: str = field(default_factory=lambda: NOT_GIVEN) - samplingRate: int = field(default_factory=lambda: NOT_GIVEN) - lang: str = field(default_factory=lambda: NOT_GIVEN) - segment: str = field(default_factory=lambda: NOT_GIVEN) - repetition_penalty: float = field(default_factory=lambda: NOT_GIVEN) - temperature: float = field(default_factory=lambda: NOT_GIVEN) - top_p: float = field(default_factory=lambda: NOT_GIVEN) + modelId: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + audioFormat: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + samplingRate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + lang: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + segment: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + repetition_penalty: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + temperature: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + top_p: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) _aliases: ClassVar[Dict[str, str]] = {"speaker": "voice"} diff --git a/src/pipecat/services/sarvam/stt.py b/src/pipecat/services/sarvam/stt.py index 80e6d6ca2..aa6baef14 100644 --- a/src/pipecat/services/sarvam/stt.py +++ b/src/pipecat/services/sarvam/stt.py @@ -32,7 +32,7 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.sarvam._sdk import sdk_headers -from pipecat.services.settings import NOT_GIVEN, STTSettings, is_given +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven, is_given from pipecat.services.stt_latency import SARVAM_TTFS_P99 from pipecat.services.stt_service import STTService from pipecat.transcriptions.language import Language, resolve_language @@ -142,10 +142,10 @@ class SarvamSTTSettings(STTSettings): high_vad_sensitivity: Enable high VAD sensitivity. """ - prompt: Optional[str] = field(default_factory=lambda: NOT_GIVEN) - mode: Optional[str] = field(default_factory=lambda: NOT_GIVEN) - vad_signals: Optional[bool] = field(default_factory=lambda: NOT_GIVEN) - high_vad_sensitivity: Optional[bool] = field(default_factory=lambda: NOT_GIVEN) + prompt: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + mode: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + vad_signals: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + high_vad_sensitivity: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class SarvamSTTService(STTService): diff --git a/src/pipecat/services/sarvam/tts.py b/src/pipecat/services/sarvam/tts.py index aff96f1dd..332643fc9 100644 --- a/src/pipecat/services/sarvam/tts.py +++ b/src/pipecat/services/sarvam/tts.py @@ -62,7 +62,7 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.sarvam._sdk import sdk_headers -from pipecat.services.settings import NOT_GIVEN, TTSSettings, is_given +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven, is_given from pipecat.services.tts_service import InterruptibleTTSService, TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -266,13 +266,13 @@ class SarvamHttpTTSSettings(TTSSettings): sample_rate: Audio sample rate. """ - language: str = field(default_factory=lambda: NOT_GIVEN) - enable_preprocessing: bool = field(default_factory=lambda: NOT_GIVEN) - pace: float = field(default_factory=lambda: NOT_GIVEN) - pitch: float = field(default_factory=lambda: NOT_GIVEN) - loudness: float = field(default_factory=lambda: NOT_GIVEN) - temperature: float = field(default_factory=lambda: NOT_GIVEN) - sarvam_sample_rate: int = field(default_factory=lambda: NOT_GIVEN) + language: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_preprocessing: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + pace: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + pitch: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + loudness: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + temperature: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + sarvam_sample_rate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) @dataclass @@ -305,18 +305,18 @@ class SarvamTTSSettings(TTSSettings): **Note:** Only supported for bulbul:v3-beta. Ignored for v2. """ - target_language_code: str = field(default_factory=lambda: NOT_GIVEN) - speaker: str = field(default_factory=lambda: NOT_GIVEN) - speech_sample_rate: str = field(default_factory=lambda: NOT_GIVEN) - enable_preprocessing: bool = field(default_factory=lambda: NOT_GIVEN) - min_buffer_size: int = field(default_factory=lambda: NOT_GIVEN) - max_chunk_length: int = field(default_factory=lambda: NOT_GIVEN) - output_audio_codec: str = field(default_factory=lambda: NOT_GIVEN) - output_audio_bitrate: str = field(default_factory=lambda: NOT_GIVEN) - pace: float = field(default_factory=lambda: NOT_GIVEN) - pitch: float = field(default_factory=lambda: NOT_GIVEN) - loudness: float = field(default_factory=lambda: NOT_GIVEN) - temperature: float = field(default_factory=lambda: NOT_GIVEN) + target_language_code: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speaker: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speech_sample_rate: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_preprocessing: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + min_buffer_size: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + max_chunk_length: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + output_audio_codec: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + output_audio_bitrate: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + pace: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + pitch: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + loudness: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + temperature: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class SarvamHttpTTSService(TTSService): diff --git a/src/pipecat/services/soniox/stt.py b/src/pipecat/services/soniox/stt.py index 5c4b49cbe..1f34c061c 100644 --- a/src/pipecat/services/soniox/stt.py +++ b/src/pipecat/services/soniox/stt.py @@ -24,7 +24,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, STTSettings, is_given +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven, is_given from pipecat.services.stt_latency import SONIOX_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.transcriptions.language import Language @@ -144,7 +144,7 @@ class SonioxSTTSettings(STTSettings): input_params: Soniox ``SonioxInputParams`` for detailed configuration. """ - input_params: SonioxInputParams = field(default_factory=lambda: NOT_GIVEN) + input_params: SonioxInputParams | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class SonioxSTTService(WebsocketSTTService): diff --git a/src/pipecat/services/speechmatics/stt.py b/src/pipecat/services/speechmatics/stt.py index 166e19d97..2e23765b2 100644 --- a/src/pipecat/services/speechmatics/stt.py +++ b/src/pipecat/services/speechmatics/stt.py @@ -33,7 +33,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, STTSettings, is_given +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven, is_given from pipecat.services.stt_latency import SPEECHMATICS_TTFS_P99 from pipecat.services.stt_service import STTService from pipecat.transcriptions.language import Language, resolve_language @@ -115,28 +115,30 @@ class SpeechmaticsSTTSettings(STTSettings): extra_params: Extra parameters for the STT engine. """ - domain: str = field(default_factory=lambda: NOT_GIVEN) - turn_detection_mode: TurnDetectionMode = field(default_factory=lambda: NOT_GIVEN) - speaker_active_format: str = field(default_factory=lambda: NOT_GIVEN) - speaker_passive_format: str = field(default_factory=lambda: NOT_GIVEN) - focus_speakers: list = field(default_factory=lambda: NOT_GIVEN) - ignore_speakers: list = field(default_factory=lambda: NOT_GIVEN) - focus_mode: Any = field(default_factory=lambda: NOT_GIVEN) - known_speakers: list = field(default_factory=lambda: NOT_GIVEN) - additional_vocab: list = field(default_factory=lambda: NOT_GIVEN) - audio_encoding: Any = field(default_factory=lambda: NOT_GIVEN) - operating_point: Any = field(default_factory=lambda: NOT_GIVEN) - max_delay: float = field(default_factory=lambda: NOT_GIVEN) - end_of_utterance_silence_trigger: float = field(default_factory=lambda: NOT_GIVEN) - end_of_utterance_max_delay: float = field(default_factory=lambda: NOT_GIVEN) - punctuation_overrides: dict = field(default_factory=lambda: NOT_GIVEN) - include_partials: bool = field(default_factory=lambda: NOT_GIVEN) - split_sentences: bool = field(default_factory=lambda: NOT_GIVEN) - enable_diarization: bool = field(default_factory=lambda: NOT_GIVEN) - speaker_sensitivity: float = field(default_factory=lambda: NOT_GIVEN) - max_speakers: int = field(default_factory=lambda: NOT_GIVEN) - prefer_current_speaker: bool = field(default_factory=lambda: NOT_GIVEN) - extra_params: dict = field(default_factory=lambda: NOT_GIVEN) + domain: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + turn_detection_mode: TurnDetectionMode | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speaker_active_format: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speaker_passive_format: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + focus_speakers: list[str] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + ignore_speakers: list[str] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + focus_mode: SpeakerFocusMode | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + known_speakers: list[SpeakerIdentifier] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + additional_vocab: list[AdditionalVocabEntry] | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + audio_encoding: AudioEncoding | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + operating_point: OperatingPoint | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + max_delay: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + end_of_utterance_silence_trigger: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + end_of_utterance_max_delay: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + punctuation_overrides: dict[str, Any] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + include_partials: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + split_sentences: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_diarization: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speaker_sensitivity: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + max_speakers: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + prefer_current_speaker: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + extra_params: dict[str, Any] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) #: Fields that can be updated on a live connection via the Speechmatics #: diarization-config API — no reconnect needed. diff --git a/src/pipecat/services/ultravox/llm.py b/src/pipecat/services/ultravox/llm.py index ef8baacb4..436653c7e 100644 --- a/src/pipecat/services/ultravox/llm.py +++ b/src/pipecat/services/ultravox/llm.py @@ -56,7 +56,7 @@ from pipecat.processors.aggregators.openai_llm_context import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import FunctionCallFromLLM, LLMService -from pipecat.services.settings import NOT_GIVEN, LLMSettings +from pipecat.services.settings import NOT_GIVEN, LLMSettings, _NotGiven from pipecat.utils.time import time_now_iso8601 try: @@ -75,7 +75,7 @@ class UltravoxRealtimeLLMSettings(LLMSettings): output_medium: The output medium for the model ("voice" or "text"). """ - output_medium: str = field(default=NOT_GIVEN) + output_medium: str | _NotGiven = field(default=NOT_GIVEN) class AgentInputParams(BaseModel): diff --git a/src/pipecat/services/whisper/base_stt.py b/src/pipecat/services/whisper/base_stt.py index d50c24eb2..74ca2d102 100644 --- a/src/pipecat/services/whisper/base_stt.py +++ b/src/pipecat/services/whisper/base_stt.py @@ -18,7 +18,7 @@ from openai import AsyncOpenAI from openai.types.audio import Transcription from pipecat.frames.frames import ErrorFrame, Frame, TranscriptionFrame -from pipecat.services.settings import NOT_GIVEN, STTSettings +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import WHISPER_TTFS_P99 from pipecat.services.stt_service import SegmentedSTTService from pipecat.transcriptions.language import Language, resolve_language @@ -37,9 +37,9 @@ class BaseWhisperSTTSettings(STTSettings): temperature: Sampling temperature between 0 and 1. """ - base_url: Optional[str] = field(default_factory=lambda: NOT_GIVEN) - prompt: Optional[str] = field(default_factory=lambda: NOT_GIVEN) - temperature: Optional[float] = field(default_factory=lambda: NOT_GIVEN) + base_url: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + prompt: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + temperature: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) def language_to_whisper_language(language: Language) -> Optional[str]: diff --git a/src/pipecat/services/whisper/stt.py b/src/pipecat/services/whisper/stt.py index d4efcb166..033d815d9 100644 --- a/src/pipecat/services/whisper/stt.py +++ b/src/pipecat/services/whisper/stt.py @@ -20,7 +20,7 @@ from loguru import logger from typing_extensions import TYPE_CHECKING, override from pipecat.frames.frames import ErrorFrame, Frame, TranscriptionFrame -from pipecat.services.settings import NOT_GIVEN, STTSettings +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_service import SegmentedSTTService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.time import time_now_iso8601 @@ -184,9 +184,9 @@ class WhisperSTTSettings(STTSettings): no_speech_prob: Probability threshold for filtering non-speech segments. """ - device: str = field(default_factory=lambda: NOT_GIVEN) - compute_type: str = field(default_factory=lambda: NOT_GIVEN) - no_speech_prob: float = field(default_factory=lambda: NOT_GIVEN) + device: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + compute_type: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + no_speech_prob: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) @dataclass @@ -199,9 +199,9 @@ class WhisperMLXSTTSettings(STTSettings): engine: Whisper engine identifier. """ - no_speech_prob: float = field(default_factory=lambda: NOT_GIVEN) - temperature: float = field(default_factory=lambda: NOT_GIVEN) - engine: str = field(default_factory=lambda: NOT_GIVEN) + no_speech_prob: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + temperature: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + engine: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class WhisperSTTService(SegmentedSTTService): diff --git a/src/pipecat/services/xtts/tts.py b/src/pipecat/services/xtts/tts.py index 3ba332138..65aa25e36 100644 --- a/src/pipecat/services/xtts/tts.py +++ b/src/pipecat/services/xtts/tts.py @@ -25,7 +25,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) -from pipecat.services.settings import NOT_GIVEN, TTSSettings +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -78,7 +78,7 @@ class XTTSTTSSettings(TTSSettings): base_url: Base URL of the XTTS streaming server. """ - base_url: str = field(default_factory=lambda: NOT_GIVEN) + base_url: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class XTTSService(TTSService): From cc54ff4708ee69476ab0fdb160d2a3b1556d72c3 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Thu, 19 Feb 2026 14:42:08 -0500 Subject: [PATCH 047/189] Add more 55-series examples --- .../55n-update-settings-cartesia-http-tts.py | 133 +++++++++++++++++ ...55o-update-settings-elevenlabs-http-tts.py | 132 +++++++++++++++++ .../55o-update-settings-elevenlabs-tts.py | 2 +- .../55q-update-settings-deepgram-http-tts.py | 137 ++++++++++++++++++ .../55q-update-settings-deepgram-tts.py | 12 +- .../55r-update-settings-azure-http-tts.py | 127 ++++++++++++++++ 6 files changed, 536 insertions(+), 7 deletions(-) create mode 100644 examples/foundational/55n-update-settings-cartesia-http-tts.py create mode 100644 examples/foundational/55o-update-settings-elevenlabs-http-tts.py create mode 100644 examples/foundational/55q-update-settings-deepgram-http-tts.py create mode 100644 examples/foundational/55r-update-settings-azure-http-tts.py diff --git a/examples/foundational/55n-update-settings-cartesia-http-tts.py b/examples/foundational/55n-update-settings-cartesia-http-tts.py new file mode 100644 index 000000000..27cee5b8f --- /dev/null +++ b/examples/foundational/55n-update-settings-cartesia-http-tts.py @@ -0,0 +1,133 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import ( + CartesiaHttpTTSService, + CartesiaTTSSettings, + GenerationConfig, +) +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaHttpTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Cartesia HTTP TTS settings: speed increased to 1.5") + await task.queue_frame( + TTSUpdateSettingsFrame( + update=CartesiaTTSSettings(generation_config=GenerationConfig(speed=1.5)) + ) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55o-update-settings-elevenlabs-http-tts.py b/examples/foundational/55o-update-settings-elevenlabs-http-tts.py new file mode 100644 index 000000000..a67202702 --- /dev/null +++ b/examples/foundational/55o-update-settings-elevenlabs-http-tts.py @@ -0,0 +1,132 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + + +import asyncio +import os + +import aiohttp +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.elevenlabs.tts import ElevenLabsHttpTTSService, ElevenLabsHttpTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + async with aiohttp.ClientSession() as session: + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = ElevenLabsHttpTTSService( + api_key=os.getenv("ELEVENLABS_API_KEY"), + voice_id=os.getenv("ELEVENLABS_VOICE_ID"), + aiohttp_session=session, + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating ElevenLabs TTS settings: speed=0.7") + await task.queue_frame( + TTSUpdateSettingsFrame(update=ElevenLabsHttpTTSSettings(speed=0.7)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55o-update-settings-elevenlabs-tts.py b/examples/foundational/55o-update-settings-elevenlabs-tts.py index 6c85e2452..4186f07ae 100644 --- a/examples/foundational/55o-update-settings-elevenlabs-tts.py +++ b/examples/foundational/55o-update-settings-elevenlabs-tts.py @@ -101,7 +101,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await task.queue_frames([LLMRunFrame()]) await asyncio.sleep(10) - logger.info("Updating ElevenLabs TTS settings: speed=1.2") + logger.info("Updating ElevenLabs TTS settings: speed=0.7") await task.queue_frame(TTSUpdateSettingsFrame(update=ElevenLabsTTSSettings(speed=0.7))) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55q-update-settings-deepgram-http-tts.py b/examples/foundational/55q-update-settings-deepgram-http-tts.py new file mode 100644 index 000000000..64bbea587 --- /dev/null +++ b/examples/foundational/55q-update-settings-deepgram-http-tts.py @@ -0,0 +1,137 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + + +import asyncio +import os + +import aiohttp +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.deepgram.tts import DeepgramHttpTTSService, DeepgramTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + async with aiohttp.ClientSession() as session: + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = DeepgramHttpTTSService( + api_key=os.getenv("DEEPGRAM_API_KEY"), + aiohttp_session=session, + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info('Updating Deepgram TTS settings: voice="aura-2-aries-en"') + await task.queue_frame( + TTSUpdateSettingsFrame(update=DeepgramTTSSettings(voice="aura-2-aries-en")) + ) + + await asyncio.sleep(10) + logger.info('Updating Deepgram TTS settings: voice="aura-2-luna-en"') + await task.queue_frame( + TTSUpdateSettingsFrame(update=DeepgramTTSSettings(voice="aura-2-luna-en")) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55q-update-settings-deepgram-tts.py b/examples/foundational/55q-update-settings-deepgram-tts.py index 636342194..9d94a50da 100644 --- a/examples/foundational/55q-update-settings-deepgram-tts.py +++ b/examples/foundational/55q-update-settings-deepgram-tts.py @@ -96,18 +96,18 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) - await asyncio.sleep(10) - logger.info('Updating Deepgram TTS settings: voice="aura-2-luna-en"') - await task.queue_frame( - TTSUpdateSettingsFrame(update=DeepgramTTSSettings(voice="aura-2-luna-en")) - ) - await asyncio.sleep(10) logger.info('Updating Deepgram TTS settings: voice="aura-2-aries-en"') await task.queue_frame( TTSUpdateSettingsFrame(update=DeepgramTTSSettings(voice="aura-2-aries-en")) ) + await asyncio.sleep(10) + logger.info('Updating Deepgram TTS settings: voice="aura-2-luna-en"') + await task.queue_frame( + TTSUpdateSettingsFrame(update=DeepgramTTSSettings(voice="aura-2-luna-en")) + ) + @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") diff --git a/examples/foundational/55r-update-settings-azure-http-tts.py b/examples/foundational/55r-update-settings-azure-http-tts.py new file mode 100644 index 000000000..3132580ed --- /dev/null +++ b/examples/foundational/55r-update-settings-azure-http-tts.py @@ -0,0 +1,127 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.azure.tts import AzureHttpTTSService, AzureTTSSettings +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = AzureHttpTTSService( + api_key=os.getenv("AZURE_SPEECH_API_KEY"), + region=os.getenv("AZURE_SPEECH_REGION"), + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info('Updating Azure TTS settings: rate="0.7", style="sad"') + await task.queue_frame( + TTSUpdateSettingsFrame(update=AzureTTSSettings(rate="0.7", style="sad")) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() From ebb42a3c6d39fc6d9abdc305868c5dd0c855eeb2 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Thu, 19 Feb 2026 15:06:48 -0500 Subject: [PATCH 048/189] Fix forward reference crash in Google and Anthropic LLM ThinkingConfig ThinkingConfig was defined as an inner class on the service but referenced in the Settings dataclass declared before the service class, causing a crash at import time. Move ThinkingConfig to a standalone class defined before Settings, and keep a class attribute alias for backward compatibility. --- src/pipecat/services/anthropic/llm.py | 50 +++++++++--------- src/pipecat/services/google/llm.py | 76 ++++++++++++++------------- 2 files changed, 64 insertions(+), 62 deletions(-) diff --git a/src/pipecat/services/anthropic/llm.py b/src/pipecat/services/anthropic/llm.py index 4416aa018..68ebf7ab1 100644 --- a/src/pipecat/services/anthropic/llm.py +++ b/src/pipecat/services/anthropic/llm.py @@ -70,6 +70,25 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +class AnthropicThinkingConfig(BaseModel): + """Configuration for extended thinking. + + Parameters: + type: Type of thinking mode (currently only "enabled" or "disabled"). + budget_tokens: Maximum number of tokens for thinking. + With today's models, the minimum is 1024. + Only allowed if type is "enabled". + """ + + # Why `| str` here? To not break compatibility in case Anthropic adds + # more types in the future. + type: Literal["enabled", "disabled"] | str + + # Why not enforce minimnum of 1024 here? To not break compatibility in + # case Anthropic changes this requirement in the future. + budget_tokens: int + + @dataclass class AnthropicLLMSettings(LLMSettings): """Settings for Anthropic LLM services. @@ -80,20 +99,18 @@ class AnthropicLLMSettings(LLMSettings): """ enable_prompt_caching: bool | _NotGiven = field(default_factory=lambda: _NOT_GIVEN) - thinking: "AnthropicLLMService.ThinkingConfig" | _NotGiven = field( - default_factory=lambda: _NOT_GIVEN - ) + thinking: AnthropicThinkingConfig | _NotGiven = field(default_factory=lambda: _NOT_GIVEN) @classmethod def from_mapping(cls, settings): """Convert a plain dict to settings, coercing thinking dicts. For backward compatibility, a ``thinking`` value that is a plain dict - is converted to a :class:`AnthropicLLMService.ThinkingConfig`. + is converted to a :class:`AnthropicThinkingConfig`. """ instance = super().from_mapping(settings) if is_given(instance.thinking) and isinstance(instance.thinking, dict): - instance.thinking = AnthropicLLMService.ThinkingConfig(**instance.thinking) + instance.thinking = AnthropicThinkingConfig(**instance.thinking) return instance @@ -148,23 +165,8 @@ class AnthropicLLMService(LLMService): # Overriding the default adapter to use the Anthropic one. adapter_class = AnthropicLLMAdapter - class ThinkingConfig(BaseModel): - """Configuration for extended thinking. - - Parameters: - type: Type of thinking mode (currently only "enabled" or "disabled"). - budget_tokens: Maximum number of tokens for thinking. - With today's models, the minimum is 1024. - Only allowed if type is "enabled". - """ - - # Why `| str` here? To not break compatibility in case Anthropic adds - # more types in the future. - type: Literal["enabled", "disabled"] | str - - # Why not enforce minimnum of 1024 here? To not break compatibility in - # case Anthropic changes this requirement in the future. - budget_tokens: int + # Backward compatibility: ThinkingConfig used to be defined inline here. + ThinkingConfig = AnthropicThinkingConfig class InputParams(BaseModel): """Input parameters for Anthropic model inference. @@ -193,9 +195,7 @@ class AnthropicLLMService(LLMService): temperature: Optional[float] = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0) top_k: Optional[int] = Field(default_factory=lambda: NOT_GIVEN, ge=0) top_p: Optional[float] = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0) - thinking: Optional["AnthropicLLMService.ThinkingConfig"] = Field( - default_factory=lambda: NOT_GIVEN - ) + thinking: Optional[AnthropicThinkingConfig] = Field(default_factory=lambda: NOT_GIVEN) extra: Optional[Dict[str, Any]] = Field(default_factory=dict) def model_post_init(self, __context): diff --git a/src/pipecat/services/google/llm.py b/src/pipecat/services/google/llm.py index 0a097b770..f5a6db78c 100644 --- a/src/pipecat/services/google/llm.py +++ b/src/pipecat/services/google/llm.py @@ -673,6 +673,39 @@ class GoogleLLMContext(OpenAILLMContext): self._messages = [m for m in self._messages if m.parts] +class GoogleThinkingConfig(BaseModel): + """Configuration for controlling the model's internal "thinking" process used before generating a response. + + Gemini 2.5 and 3 series models have this thinking process. + + Parameters: + thinking_level: Thinking level for Gemini 3 models. + For Gemini 3 Pro, this can be "low" or "high". + For Gemini 3 Flash, this can be "minimal", "low", "medium", or "high". + If not provided, Gemini 3 models default to "high". + Note: Gemini 2.5 series must use thinking_budget instead. + thinking_budget: Token budget for thinking, for Gemini 2.5 series. + -1 for dynamic thinking (model decides), 0 to disable thinking, + or a specific token count (e.g., 128-32768 for 2.5 Pro). + If not provided, most models today default to dynamic thinking. + See https://ai.google.dev/gemini-api/docs/thinking#set-budget + for default values and allowed ranges. + Note: Gemini 3 models must use thinking_level instead. + include_thoughts: Whether to include thought summaries in the response. + Today's models default to not including thoughts (False). + """ + + thinking_budget: Optional[int] = Field(default=None) + + # Why `| str` here? To not break compatibility in case Google adds more + # levels in the future. + thinking_level: Optional[Literal["low", "high", "medium", "minimal"] | str] = Field( + default=None + ) + + include_thoughts: Optional[bool] = Field(default=None) + + @dataclass class GoogleLLMSettings(LLMSettings): """Settings for Google LLM services. @@ -681,20 +714,18 @@ class GoogleLLMSettings(LLMSettings): thinking: Thinking configuration. """ - thinking: "GoogleLLMService.ThinkingConfig" | _NotGiven = field( - default_factory=lambda: NOT_GIVEN - ) + thinking: GoogleThinkingConfig | _NotGiven = field(default_factory=lambda: NOT_GIVEN) @classmethod def from_mapping(cls, settings): """Convert a plain dict to settings, coercing thinking dicts. For backward compatibility, a ``thinking`` value that is a plain dict - is converted to a :class:`GoogleLLMService.ThinkingConfig`. + is converted to a :class:`GoogleThinkingConfig`. """ instance = super().from_mapping(settings) if is_given(instance.thinking) and isinstance(instance.thinking, dict): - instance.thinking = GoogleLLMService.ThinkingConfig(**instance.thinking) + instance.thinking = GoogleThinkingConfig(**instance.thinking) return instance @@ -711,37 +742,8 @@ class GoogleLLMService(LLMService): # Overriding the default adapter to use the Gemini one. adapter_class = GeminiLLMAdapter - class ThinkingConfig(BaseModel): - """Configuration for controlling the model's internal "thinking" process used before generating a response. - - Gemini 2.5 and 3 series models have this thinking process. - - Parameters: - thinking_level: Thinking level for Gemini 3 models. - For Gemini 3 Pro, this can be "low" or "high". - For Gemini 3 Flash, this can be "minimal", "low", "medium", or "high". - If not provided, Gemini 3 models default to "high". - Note: Gemini 2.5 series must use thinking_budget instead. - thinking_budget: Token budget for thinking, for Gemini 2.5 series. - -1 for dynamic thinking (model decides), 0 to disable thinking, - or a specific token count (e.g., 128-32768 for 2.5 Pro). - If not provided, most models today default to dynamic thinking. - See https://ai.google.dev/gemini-api/docs/thinking#set-budget - for default values and allowed ranges. - Note: Gemini 3 models must use thinking_level instead. - include_thoughts: Whether to include thought summaries in the response. - Today's models default to not including thoughts (False). - """ - - thinking_budget: Optional[int] = Field(default=None) - - # Why `| str` here? To not break compatibility in case Google adds more - # levels in the future. - thinking_level: Optional[Literal["low", "high", "medium", "minimal"] | str] = Field( - default=None - ) - - include_thoughts: Optional[bool] = Field(default=None) + # Backward compatibility: ThinkingConfig used to be defined inline here. + ThinkingConfig = GoogleThinkingConfig class InputParams(BaseModel): """Input parameters for Google AI models. @@ -764,7 +766,7 @@ class GoogleLLMService(LLMService): temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0) top_k: Optional[int] = Field(default=None, ge=0) top_p: Optional[float] = Field(default=None, ge=0.0, le=1.0) - thinking: Optional["GoogleLLMService.ThinkingConfig"] = Field(default=None) + thinking: Optional[GoogleThinkingConfig] = Field(default=None) extra: Optional[Dict[str, Any]] = Field(default_factory=dict) def __init__( From 6c609031ee8d6fc3262801d659ad6b700c76e817 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Thu, 19 Feb 2026 15:12:39 -0500 Subject: [PATCH 049/189] Add more 55-series examples Also: - remove unnecessary pass-through `_update_settings` implementation in `FalSTTService` - warn that `AsyncAITTSService` doesn't currently support runtime settings updates - update how `GradiumTTSService._update_settings` checks for voice changes - remove a couple of unnecessary args (because they specified defaults) in other examples --- .../14n-function-calling-perplexity.py | 2 +- .../14s-function-calling-sambanova.py | 5 +- .../55s-update-settings-google-stream-tts.py | 124 ++++++++++++++++ .../55u-update-settings-rime-http-tts.py | 128 ++++++++++++++++ .../55zb-update-settings-inworld-http-tts.py | 130 ++++++++++++++++ .../55ze-update-settings-sarvam-http-tts.py | 126 ++++++++++++++++ .../55zi-update-settings-azure-llm.py | 130 ++++++++++++++++ .../55zk-update-settings-google-vertex-llm.py | 130 ++++++++++++++++ .../55zl-update-settings-azure-realtime.py | 140 ++++++++++++++++++ ...55zm-update-settings-gemini-live-vertex.py | 117 +++++++++++++++ .../55zq-update-settings-fal-stt.py | 125 ++++++++++++++++ .../55zr-update-settings-gradium-stt.py | 128 ++++++++++++++++ .../55zs-update-settings-hathora-stt.py | 129 ++++++++++++++++ ...zt-update-settings-nvidia-segmented-stt.py | 127 ++++++++++++++++ .../55zt-update-settings-nvidia-stt.py | 128 ++++++++++++++++ ...5zu-update-settings-openai-realtime-stt.py | 128 ++++++++++++++++ .../55zv-update-settings-asyncai-http-tts.py | 133 +++++++++++++++++ .../55zv-update-settings-asyncai-tts.py | 128 ++++++++++++++++ .../55zw-update-settings-gradium-tts.py | 128 ++++++++++++++++ .../55zx-update-settings-cerebras-llm.py | 126 ++++++++++++++++ .../55zy-update-settings-deepseek-llm.py | 126 ++++++++++++++++ .../55zz-update-settings-fireworks-llm.py | 129 ++++++++++++++++ .../55zza-update-settings-grok-llm.py | 126 ++++++++++++++++ .../55zzb-update-settings-groq-llm.py | 128 ++++++++++++++++ .../55zzc-update-settings-mistral-llm.py | 126 ++++++++++++++++ .../55zzd-update-settings-nvidia-llm.py | 128 ++++++++++++++++ .../55zze-update-settings-ollama-llm.py | 126 ++++++++++++++++ .../55zzf-update-settings-openrouter-llm.py | 126 ++++++++++++++++ .../55zzg-update-settings-perplexity-llm.py | 125 ++++++++++++++++ .../55zzh-update-settings-qwen-llm.py | 126 ++++++++++++++++ .../55zzi-update-settings-sambanova-llm.py | 126 ++++++++++++++++ .../55zzj-update-settings-together-llm.py | 129 ++++++++++++++++ src/pipecat/services/asyncai/tts.py | 14 ++ src/pipecat/services/fal/stt.py | 5 - src/pipecat/services/gradium/tts.py | 3 +- 35 files changed, 3843 insertions(+), 12 deletions(-) create mode 100644 examples/foundational/55s-update-settings-google-stream-tts.py create mode 100644 examples/foundational/55u-update-settings-rime-http-tts.py create mode 100644 examples/foundational/55zb-update-settings-inworld-http-tts.py create mode 100644 examples/foundational/55ze-update-settings-sarvam-http-tts.py create mode 100644 examples/foundational/55zi-update-settings-azure-llm.py create mode 100644 examples/foundational/55zk-update-settings-google-vertex-llm.py create mode 100644 examples/foundational/55zl-update-settings-azure-realtime.py create mode 100644 examples/foundational/55zm-update-settings-gemini-live-vertex.py create mode 100644 examples/foundational/55zq-update-settings-fal-stt.py create mode 100644 examples/foundational/55zr-update-settings-gradium-stt.py create mode 100644 examples/foundational/55zs-update-settings-hathora-stt.py create mode 100644 examples/foundational/55zt-update-settings-nvidia-segmented-stt.py create mode 100644 examples/foundational/55zt-update-settings-nvidia-stt.py create mode 100644 examples/foundational/55zu-update-settings-openai-realtime-stt.py create mode 100644 examples/foundational/55zv-update-settings-asyncai-http-tts.py create mode 100644 examples/foundational/55zv-update-settings-asyncai-tts.py create mode 100644 examples/foundational/55zw-update-settings-gradium-tts.py create mode 100644 examples/foundational/55zx-update-settings-cerebras-llm.py create mode 100644 examples/foundational/55zy-update-settings-deepseek-llm.py create mode 100644 examples/foundational/55zz-update-settings-fireworks-llm.py create mode 100644 examples/foundational/55zza-update-settings-grok-llm.py create mode 100644 examples/foundational/55zzb-update-settings-groq-llm.py create mode 100644 examples/foundational/55zzc-update-settings-mistral-llm.py create mode 100644 examples/foundational/55zzd-update-settings-nvidia-llm.py create mode 100644 examples/foundational/55zze-update-settings-ollama-llm.py create mode 100644 examples/foundational/55zzf-update-settings-openrouter-llm.py create mode 100644 examples/foundational/55zzg-update-settings-perplexity-llm.py create mode 100644 examples/foundational/55zzh-update-settings-qwen-llm.py create mode 100644 examples/foundational/55zzi-update-settings-sambanova-llm.py create mode 100644 examples/foundational/55zzj-update-settings-together-llm.py diff --git a/examples/foundational/14n-function-calling-perplexity.py b/examples/foundational/14n-function-calling-perplexity.py index 40041aa34..2f1a18d52 100644 --- a/examples/foundational/14n-function-calling-perplexity.py +++ b/examples/foundational/14n-function-calling-perplexity.py @@ -65,7 +65,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady ) - llm = PerplexityLLMService(api_key=os.getenv("PERPLEXITY_API_KEY"), model="sonar") + llm = PerplexityLLMService(api_key=os.getenv("PERPLEXITY_API_KEY")) messages = [ { diff --git a/examples/foundational/14s-function-calling-sambanova.py b/examples/foundational/14s-function-calling-sambanova.py index 79c43a473..76eb390c0 100644 --- a/examples/foundational/14s-function-calling-sambanova.py +++ b/examples/foundational/14s-function-calling-sambanova.py @@ -70,10 +70,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady ) - llm = SambaNovaLLMService( - api_key=os.getenv("SAMBANOVA_API_KEY"), - model="Llama-4-Maverick-17B-128E-Instruct", - ) + llm = SambaNovaLLMService(api_key=os.getenv("SAMBANOVA_API_KEY")) # You can also register a function_name of None to get all functions # sent to the same callback with an additional function_name parameter. llm.register_function("get_current_weather", fetch_weather_from_api) diff --git a/examples/foundational/55s-update-settings-google-stream-tts.py b/examples/foundational/55s-update-settings-google-stream-tts.py new file mode 100644 index 000000000..42e07c64b --- /dev/null +++ b/examples/foundational/55s-update-settings-google-stream-tts.py @@ -0,0 +1,124 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.google.tts import GoogleStreamTTSSettings, GoogleTTSService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = GoogleTTSService(credentials=os.getenv("GOOGLE_TEST_CREDENTIALS")) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Google Stream TTS settings: speaking_rate=1.4") + await task.queue_frame( + TTSUpdateSettingsFrame(update=GoogleStreamTTSSettings(speaking_rate=1.4)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55u-update-settings-rime-http-tts.py b/examples/foundational/55u-update-settings-rime-http-tts.py new file mode 100644 index 000000000..7b1c9b0fe --- /dev/null +++ b/examples/foundational/55u-update-settings-rime-http-tts.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + + +import asyncio +import os + +import aiohttp +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.rime.tts import RimeHttpTTSService, RimeTTSSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + async with aiohttp.ClientSession() as session: + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = RimeHttpTTSService( + api_key=os.getenv("RIME_API_KEY"), voice_id="eva", aiohttp_session=session + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Rime TTS settings: voice=rex") + await task.queue_frame(TTSUpdateSettingsFrame(update=RimeTTSSettings(voice="rex"))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zb-update-settings-inworld-http-tts.py b/examples/foundational/55zb-update-settings-inworld-http-tts.py new file mode 100644 index 000000000..933a27013 --- /dev/null +++ b/examples/foundational/55zb-update-settings-inworld-http-tts.py @@ -0,0 +1,130 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + + +import asyncio +import os + +import aiohttp +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.inworld.tts import InworldHttpTTSService, InworldTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + async with aiohttp.ClientSession() as session: + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = InworldHttpTTSService(api_key=os.getenv("INWORLD_API_KEY"), aiohttp_session=session) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Inworld TTS settings: speaking_rate=1.5, temperature=0.8") + await task.queue_frame( + TTSUpdateSettingsFrame( + update=InworldTTSSettings(speaking_rate=1.5, temperature=0.8) + ) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55ze-update-settings-sarvam-http-tts.py b/examples/foundational/55ze-update-settings-sarvam-http-tts.py new file mode 100644 index 000000000..0afce361a --- /dev/null +++ b/examples/foundational/55ze-update-settings-sarvam-http-tts.py @@ -0,0 +1,126 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + + +import asyncio +import os + +import aiohttp +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.sarvam.tts import SarvamHttpTTSService, SarvamHttpTTSSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + async with aiohttp.ClientSession() as session: + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = SarvamHttpTTSService(api_key=os.getenv("SARVAM_API_KEY"), aiohttp_session=session) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Sarvam TTS settings: pace=1.5") + await task.queue_frame(TTSUpdateSettingsFrame(update=SarvamHttpTTSSettings(pace=1.5))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zi-update-settings-azure-llm.py b/examples/foundational/55zi-update-settings-azure-llm.py new file mode 100644 index 000000000..94cb723e3 --- /dev/null +++ b/examples/foundational/55zi-update-settings-azure-llm.py @@ -0,0 +1,130 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.azure.llm import AzureLLMService +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = AzureLLMService( + api_key=os.getenv("AZURE_CHATGPT_API_KEY"), + endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"), + model=os.getenv("AZURE_CHATGPT_MODEL"), + ) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Azure LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zk-update-settings-google-vertex-llm.py b/examples/foundational/55zk-update-settings-google-vertex-llm.py new file mode 100644 index 000000000..41c0b8a37 --- /dev/null +++ b/examples/foundational/55zk-update-settings-google-vertex-llm.py @@ -0,0 +1,130 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.google.llm import GoogleLLMSettings +from pipecat.services.google.llm_vertex import GoogleVertexLLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = GoogleVertexLLMService( + credentials=os.getenv("GOOGLE_VERTEX_TEST_CREDENTIALS"), + project_id=os.getenv("GOOGLE_CLOUD_PROJECT_ID"), + location=os.getenv("GOOGLE_CLOUD_LOCATION"), + ) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Google Vertex LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(update=GoogleLLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zl-update-settings-azure-realtime.py b/examples/foundational/55zl-update-settings-azure-realtime.py new file mode 100644 index 000000000..b8f049db0 --- /dev/null +++ b/examples/foundational/55zl-update-settings-azure-realtime.py @@ -0,0 +1,140 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + AssistantTurnStoppedMessage, + LLMContextAggregatorPair, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.azure.realtime.llm import AzureRealtimeLLMService +from pipecat.services.openai.realtime import events +from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + llm = AzureRealtimeLLMService( + api_key=os.getenv("AZURE_REALTIME_API_KEY"), + base_url=os.getenv("AZURE_REALTIME_BASE_URL"), + ) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair(context) + + pipeline = Pipeline( + [ + transport.input(), + user_aggregator, + llm, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @assistant_aggregator.event_handler("on_assistant_turn_stopped") + async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage): + timestamp = f"[{message.timestamp}] " if message.timestamp else "" + line = f"{timestamp}assistant: {message.content}" + logger.info(f"Transcript: {line}") + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Azure Realtime LLM settings: output_modalities=['text']") + await task.queue_frame( + LLMUpdateSettingsFrame( + update=OpenAIRealtimeLLMSettings( + session_properties=events.SessionProperties(output_modalities=["text"]) + ) + ) + ) + + await asyncio.sleep(10) + logger.info("Updating Azure Realtime LLM settings: output_modalities=['audio']") + await task.queue_frame( + LLMUpdateSettingsFrame( + update=OpenAIRealtimeLLMSettings( + session_properties=events.SessionProperties(output_modalities=["audio"]) + ) + ) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zm-update-settings-gemini-live-vertex.py b/examples/foundational/55zm-update-settings-gemini-live-vertex.py new file mode 100644 index 000000000..575fbe090 --- /dev/null +++ b/examples/foundational/55zm-update-settings-gemini-live-vertex.py @@ -0,0 +1,117 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.google.gemini_live.llm import GeminiLiveLLMSettings +from pipecat.services.google.gemini_live.llm_vertex import GeminiLiveVertexLLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + llm = GeminiLiveVertexLLMService( + credentials=os.getenv("GOOGLE_VERTEX_TEST_CREDENTIALS"), + project_id=os.getenv("GOOGLE_CLOUD_PROJECT_ID"), + location=os.getenv("GOOGLE_CLOUD_LOCATION"), + ) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair(context) + + pipeline = Pipeline( + [ + transport.input(), + user_aggregator, + llm, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Gemini Live Vertex LLM settings: temperature=0.1") + await task.queue_frame( + LLMUpdateSettingsFrame(update=GeminiLiveLLMSettings(temperature=0.1)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zq-update-settings-fal-stt.py b/examples/foundational/55zq-update-settings-fal-stt.py new file mode 100644 index 000000000..9792961f2 --- /dev/null +++ b/examples/foundational/55zq-update-settings-fal-stt.py @@ -0,0 +1,125 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.fal.stt import FalSTTService, FalSTTSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = FalSTTService(api_key=os.getenv("FAL_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info('Updating Fal STT settings: task="translate"') + await task.queue_frame(STTUpdateSettingsFrame(update=FalSTTSettings(task="translate"))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zr-update-settings-gradium-stt.py b/examples/foundational/55zr-update-settings-gradium-stt.py new file mode 100644 index 000000000..6a1a25c3c --- /dev/null +++ b/examples/foundational/55zr-update-settings-gradium-stt.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.gradium.stt import GradiumSTTService, GradiumSTTSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = GradiumSTTService( + api_key=os.getenv("GRADIUM_API_KEY"), + api_endpoint_base_url="wss://us.api.gradium.ai/api/speech/asr", + ) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Gradium STT settings: delay_in_frames=5") + await task.queue_frame(STTUpdateSettingsFrame(update=GradiumSTTSettings(delay_in_frames=5))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zs-update-settings-hathora-stt.py b/examples/foundational/55zs-update-settings-hathora-stt.py new file mode 100644 index 000000000..f3aca9c89 --- /dev/null +++ b/examples/foundational/55zs-update-settings-hathora-stt.py @@ -0,0 +1,129 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.hathora.stt import HathoraSTTService, HathoraSTTSettings +from pipecat.services.hathora.utils import ConfigOption +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = HathoraSTTService(api_key=os.getenv("HATHORA_API_KEY"), model="deepgram-nova3") + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Hathora STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=HathoraSTTSettings(language=Language.ES)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zt-update-settings-nvidia-segmented-stt.py b/examples/foundational/55zt-update-settings-nvidia-segmented-stt.py new file mode 100644 index 000000000..624da149e --- /dev/null +++ b/examples/foundational/55zt-update-settings-nvidia-segmented-stt.py @@ -0,0 +1,127 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.nvidia.stt import NvidiaSegmentedSTTService, NvidiaSegmentedSTTSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = NvidiaSegmentedSTTService(api_key=os.getenv("NVIDIA_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating NVIDIA Segmented STT settings: profanity_filter=True") + await task.queue_frame( + STTUpdateSettingsFrame(update=NvidiaSegmentedSTTSettings(profanity_filter=True)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zt-update-settings-nvidia-stt.py b/examples/foundational/55zt-update-settings-nvidia-stt.py new file mode 100644 index 000000000..0e7b6a74a --- /dev/null +++ b/examples/foundational/55zt-update-settings-nvidia-stt.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.nvidia.stt import NvidiaSTTService, NvidiaSTTSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = NvidiaSTTService(api_key=os.getenv("NVIDIA_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating NVIDIA STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=NvidiaSTTSettings(language=Language.ES)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zu-update-settings-openai-realtime-stt.py b/examples/foundational/55zu-update-settings-openai-realtime-stt.py new file mode 100644 index 000000000..1f1592df7 --- /dev/null +++ b/examples/foundational/55zu-update-settings-openai-realtime-stt.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.openai.stt import OpenAIRealtimeSTTService, OpenAIRealtimeSTTSettings +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = OpenAIRealtimeSTTService(api_key=os.getenv("OPENAI_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating OpenAI Realtime STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=OpenAIRealtimeSTTSettings(language=Language.ES)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zv-update-settings-asyncai-http-tts.py b/examples/foundational/55zv-update-settings-asyncai-http-tts.py new file mode 100644 index 000000000..206a80eed --- /dev/null +++ b/examples/foundational/55zv-update-settings-asyncai-http-tts.py @@ -0,0 +1,133 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + + +import asyncio +import os + +import aiohttp +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.asyncai.tts import AsyncAIHttpTTSService, AsyncAITTSSettings +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + async with aiohttp.ClientSession() as session: + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = AsyncAIHttpTTSService( + api_key=os.getenv("ASYNCAI_API_KEY", ""), + voice_id=os.getenv("ASYNCAI_VOICE_ID", "e0f39dc4-f691-4e78-bba5-5c636692cc04"), + aiohttp_session=session, + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating AsyncAI HTTP TTS settings: language=es") + await task.queue_frame( + TTSUpdateSettingsFrame(update=AsyncAITTSSettings(language=Language.ES)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zv-update-settings-asyncai-tts.py b/examples/foundational/55zv-update-settings-asyncai-tts.py new file mode 100644 index 000000000..f910e5fe3 --- /dev/null +++ b/examples/foundational/55zv-update-settings-asyncai-tts.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.asyncai.tts import AsyncAITTSService, AsyncAITTSSettings +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = AsyncAITTSService( + api_key=os.getenv("ASYNCAI_API_KEY", ""), + voice_id=os.getenv("ASYNCAI_VOICE_ID", "e0f39dc4-f691-4e78-bba5-5c636692cc04"), + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating AsyncAI TTS settings: language=es") + await task.queue_frame( + TTSUpdateSettingsFrame(update=AsyncAITTSSettings(language=Language.ES)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zw-update-settings-gradium-tts.py b/examples/foundational/55zw-update-settings-gradium-tts.py new file mode 100644 index 000000000..39090d5fa --- /dev/null +++ b/examples/foundational/55zw-update-settings-gradium-tts.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.gradium.tts import GradiumTTSService, GradiumTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = GradiumTTSService( + api_key=os.getenv("GRADIUM_API_KEY"), + voice_id="YTpq7expH9539ERJ", + url="wss://us.api.gradium.ai/api/speech/tts", + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info('Updating Gradium TTS settings: voice="LFZvm12tW_z0xfGo"') + await task.queue_frame( + TTSUpdateSettingsFrame(update=GradiumTTSSettings(voice="LFZvm12tW_z0xfGo")) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zx-update-settings-cerebras-llm.py b/examples/foundational/55zx-update-settings-cerebras-llm.py new file mode 100644 index 000000000..72aa8518d --- /dev/null +++ b/examples/foundational/55zx-update-settings-cerebras-llm.py @@ -0,0 +1,126 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.cerebras.llm import CerebrasLLMService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = CerebrasLLMService(api_key=os.getenv("CEREBRAS_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Cerebras LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zy-update-settings-deepseek-llm.py b/examples/foundational/55zy-update-settings-deepseek-llm.py new file mode 100644 index 000000000..de4e4149e --- /dev/null +++ b/examples/foundational/55zy-update-settings-deepseek-llm.py @@ -0,0 +1,126 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.deepseek.llm import DeepSeekLLMService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = DeepSeekLLMService(api_key=os.getenv("DEEPSEEK_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating DeepSeek LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zz-update-settings-fireworks-llm.py b/examples/foundational/55zz-update-settings-fireworks-llm.py new file mode 100644 index 000000000..d864cacb2 --- /dev/null +++ b/examples/foundational/55zz-update-settings-fireworks-llm.py @@ -0,0 +1,129 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.fireworks.llm import FireworksLLMService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = FireworksLLMService( + api_key=os.getenv("FIREWORKS_API_KEY"), + model="accounts/fireworks/models/gpt-oss-20b", + ) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Fireworks LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zza-update-settings-grok-llm.py b/examples/foundational/55zza-update-settings-grok-llm.py new file mode 100644 index 000000000..dbf07f21d --- /dev/null +++ b/examples/foundational/55zza-update-settings-grok-llm.py @@ -0,0 +1,126 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.grok.llm import GrokLLMService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = GrokLLMService(api_key=os.getenv("GROK_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Grok LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zzb-update-settings-groq-llm.py b/examples/foundational/55zzb-update-settings-groq-llm.py new file mode 100644 index 000000000..8244f611a --- /dev/null +++ b/examples/foundational/55zzb-update-settings-groq-llm.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.groq.llm import GroqLLMService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = GroqLLMService( + api_key=os.getenv("GROQ_API_KEY"), model="meta-llama/llama-4-maverick-17b-128e-instruct" + ) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Groq LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zzc-update-settings-mistral-llm.py b/examples/foundational/55zzc-update-settings-mistral-llm.py new file mode 100644 index 000000000..642eda3c5 --- /dev/null +++ b/examples/foundational/55zzc-update-settings-mistral-llm.py @@ -0,0 +1,126 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.mistral.llm import MistralLLMService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = MistralLLMService(api_key=os.getenv("MISTRAL_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Mistral LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zzd-update-settings-nvidia-llm.py b/examples/foundational/55zzd-update-settings-nvidia-llm.py new file mode 100644 index 000000000..5ffa0ff23 --- /dev/null +++ b/examples/foundational/55zzd-update-settings-nvidia-llm.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.nvidia.llm import NvidiaLLMService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = NvidiaLLMService( + api_key=os.getenv("NVIDIA_API_KEY"), model="meta/llama-3.1-405b-instruct" + ) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating NVIDIA LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zze-update-settings-ollama-llm.py b/examples/foundational/55zze-update-settings-ollama-llm.py new file mode 100644 index 000000000..ca3714943 --- /dev/null +++ b/examples/foundational/55zze-update-settings-ollama-llm.py @@ -0,0 +1,126 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.ollama.llm import OLLamaLLMService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OLLamaLLMService(model="llama3.2") # Update to the model you're running locally + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating OLLama LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zzf-update-settings-openrouter-llm.py b/examples/foundational/55zzf-update-settings-openrouter-llm.py new file mode 100644 index 000000000..90606a572 --- /dev/null +++ b/examples/foundational/55zzf-update-settings-openrouter-llm.py @@ -0,0 +1,126 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.services.openrouter.llm import OpenRouterLLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenRouterLLMService(api_key=os.getenv("OPENROUTER_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating OpenRouter LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zzg-update-settings-perplexity-llm.py b/examples/foundational/55zzg-update-settings-perplexity-llm.py new file mode 100644 index 000000000..771b1c794 --- /dev/null +++ b/examples/foundational/55zzg-update-settings-perplexity-llm.py @@ -0,0 +1,125 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.services.perplexity.llm import PerplexityLLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = PerplexityLLMService(api_key=os.getenv("PERPLEXITY_API_KEY")) + + messages = [ + { + "role": "user", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way. Start by introducing yourself.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Perplexity LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zzh-update-settings-qwen-llm.py b/examples/foundational/55zzh-update-settings-qwen-llm.py new file mode 100644 index 000000000..81ace2117 --- /dev/null +++ b/examples/foundational/55zzh-update-settings-qwen-llm.py @@ -0,0 +1,126 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.services.qwen.llm import QwenLLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = QwenLLMService(api_key=os.getenv("QWEN_API_KEY"), model="qwen2.5-72b-instruct") + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Qwen LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zzi-update-settings-sambanova-llm.py b/examples/foundational/55zzi-update-settings-sambanova-llm.py new file mode 100644 index 000000000..82382a6bd --- /dev/null +++ b/examples/foundational/55zzi-update-settings-sambanova-llm.py @@ -0,0 +1,126 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.services.sambanova.llm import SambaNovaLLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = SambaNovaLLMService(api_key=os.getenv("SAMBANOVA_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating SambaNova LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zzj-update-settings-together-llm.py b/examples/foundational/55zzj-update-settings-together-llm.py new file mode 100644 index 000000000..1f0a0557f --- /dev/null +++ b/examples/foundational/55zzj-update-settings-together-llm.py @@ -0,0 +1,129 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.services.together.llm import TogetherLLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = TogetherLLMService( + api_key=os.getenv("TOGETHER_API_KEY"), + model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", + ) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Together LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/src/pipecat/services/asyncai/tts.py b/src/pipecat/services/asyncai/tts.py index d01fd4396..c19aa08f8 100644 --- a/src/pipecat/services/asyncai/tts.py +++ b/src/pipecat/services/asyncai/tts.py @@ -179,6 +179,20 @@ class AsyncAITTSService(AudioContextTTSService): self._keepalive_task = None self._context_id = None + async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: + """Apply a settings update. + + Settings are stored but not applied to the active connection. + """ + changed = await super()._update_settings(update) + + if not changed: + return changed + + self._warn_unhandled_updated_settings(changed) + + return changed + def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. diff --git a/src/pipecat/services/fal/stt.py b/src/pipecat/services/fal/stt.py index bcfc583c6..91b5a25c8 100644 --- a/src/pipecat/services/fal/stt.py +++ b/src/pipecat/services/fal/stt.py @@ -251,11 +251,6 @@ class FalSTTService(SegmentedSTTService): """ return language_to_fal_language(language) - async def _update_settings(self, update: STTSettings) -> dict[str, Any]: - """Apply a settings update.""" - changed = await super()._update_settings(update) - return changed - @traced_stt async def _handle_transcription( self, transcript: str, is_final: bool, language: Optional[str] = None diff --git a/src/pipecat/services/gradium/tts.py b/src/pipecat/services/gradium/tts.py index c41c77436..8c18c9208 100644 --- a/src/pipecat/services/gradium/tts.py +++ b/src/pipecat/services/gradium/tts.py @@ -128,9 +128,8 @@ class GradiumTTSService(InterruptibleWordTTSService): Returns: Dict mapping changed field names to their previous values. """ - prev_voice = self._voice_id changed = await super()._update_settings(update) - if self._voice_id != prev_voice: + if "voice" in changed: await self._disconnect() await self._connect() else: From 463ea3725b1f6784941a40497960a475a823f586 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Thu, 19 Feb 2026 17:12:24 -0500 Subject: [PATCH 050/189] Update Deepgram Flux with the new service settings pattern --- .../55a-update-settings-deepgram-flux-stt.py | 128 ++++++++++++++++++ src/pipecat/services/deepgram/flux/stt.py | 86 ++++++++---- 2 files changed, 191 insertions(+), 23 deletions(-) create mode 100644 examples/foundational/55a-update-settings-deepgram-flux-stt.py diff --git a/examples/foundational/55a-update-settings-deepgram-flux-stt.py b/examples/foundational/55a-update-settings-deepgram-flux-stt.py new file mode 100644 index 000000000..d5fb66a2e --- /dev/null +++ b/examples/foundational/55a-update-settings-deepgram-flux-stt.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.flux.stt import DeepgramFluxSTTService, DeepgramFluxSTTSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramFluxSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Deepgram Flux STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(update=DeepgramFluxSTTSettings(language=Language.ES)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/src/pipecat/services/deepgram/flux/stt.py b/src/pipecat/services/deepgram/flux/stt.py index dcb5e3429..e82fc4dd8 100644 --- a/src/pipecat/services/deepgram/flux/stt.py +++ b/src/pipecat/services/deepgram/flux/stt.py @@ -9,6 +9,7 @@ import asyncio import json import time +from dataclasses import dataclass, field from enum import Enum from typing import Any, AsyncGenerator, Dict, Optional from urllib.parse import urlencode @@ -27,7 +28,7 @@ from pipecat.frames.frames import ( UserStartedSpeakingFrame, UserStoppedSpeakingFrame, ) -from pipecat.services.settings import STTSettings +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_service import WebsocketSTTService from pipecat.transcriptions.language import Language from pipecat.utils.time import time_now_iso8601 @@ -68,6 +69,34 @@ class FluxEventType(str, Enum): UPDATE = "Update" +@dataclass +class DeepgramFluxSTTSettings(STTSettings): + """Settings for the Deepgram Flux STT service. + + Parameters: + eager_eot_threshold: EagerEndOfTurn/TurnResumed threshold. Off by default. + Lower values = more aggressive (faster response, more LLM calls). + Higher values = more conservative (slower response, fewer LLM calls). + eot_threshold: End-of-turn confidence required to finish a turn (default 0.7). + eot_timeout_ms: Time in ms after speech to finish a turn regardless of EOT + confidence (default 5000). + keyterm: Keyterms to boost recognition accuracy for specialized terminology. + mip_opt_out: Opt out of the Deepgram Model Improvement Program (default False). + tag: Tags to label requests for identification during usage reporting. + min_confidence: Minimum confidence required to create a TranscriptionFrame. + encoding: Audio encoding format (e.g. ``"linear16"``). + """ + + eager_eot_threshold: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + eot_threshold: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + eot_timeout_ms: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + keyterm: list | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + mip_opt_out: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + tag: list | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + min_confidence: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + encoding: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class DeepgramFluxSTTService(WebsocketSTTService): """Deepgram Flux speech-to-text service. @@ -76,6 +105,8 @@ class DeepgramFluxSTTService(WebsocketSTTService): including advanced turn detection and EagerEndOfTurn events for improved conversational AI performance. """ + _settings: DeepgramFluxSTTSettings + class InputParams(BaseModel): """Configuration parameters for Deepgram Flux API. @@ -168,14 +199,23 @@ class DeepgramFluxSTTService(WebsocketSTTService): **kwargs, ) + params = params or DeepgramFluxSTTService.InputParams() + self._settings = DeepgramFluxSTTSettings( + model=model, + language=Language.EN, + encoding=flux_encoding, + eager_eot_threshold=params.eager_eot_threshold, + eot_threshold=params.eot_threshold, + eot_timeout_ms=params.eot_timeout_ms, + keyterm=params.keyterm or [], + mip_opt_out=params.mip_opt_out, + tag=params.tag or [], + min_confidence=params.min_confidence, + ) + self.set_model_name(model) self._api_key = api_key self._url = url - self._model = model - self._params = params or DeepgramFluxSTTService.InputParams() self._should_interrupt = should_interrupt - self._flux_encoding = flux_encoding - # This is the currently only supported language - self._language = Language.EN self._websocket_url = None self._receive_task = None # Flux event handlers @@ -330,7 +370,7 @@ class DeepgramFluxSTTService(WebsocketSTTService): """ return True - async def _update_settings(self, update: STTSettings) -> dict[str, Any]: + async def _update_settings(self, update: DeepgramFluxSTTSettings) -> dict[str, Any]: """Apply a settings update. Settings are stored but not applied to the active connection. @@ -361,29 +401,29 @@ class DeepgramFluxSTTService(WebsocketSTTService): await super().start(frame) url_params = [ - f"model={self._model}", + f"model={self._settings.model}", f"sample_rate={self.sample_rate}", - f"encoding={self._flux_encoding}", + f"encoding={self._settings.encoding}", ] - if self._params.eager_eot_threshold is not None: - url_params.append(f"eager_eot_threshold={self._params.eager_eot_threshold}") + if self._settings.eager_eot_threshold is not None: + url_params.append(f"eager_eot_threshold={self._settings.eager_eot_threshold}") - if self._params.eot_threshold is not None: - url_params.append(f"eot_threshold={self._params.eot_threshold}") + if self._settings.eot_threshold is not None: + url_params.append(f"eot_threshold={self._settings.eot_threshold}") - if self._params.eot_timeout_ms is not None: - url_params.append(f"eot_timeout_ms={self._params.eot_timeout_ms}") + if self._settings.eot_timeout_ms is not None: + url_params.append(f"eot_timeout_ms={self._settings.eot_timeout_ms}") - if self._params.mip_opt_out is not None: - url_params.append(f"mip_opt_out={str(self._params.mip_opt_out).lower()}") + if self._settings.mip_opt_out is not None: + url_params.append(f"mip_opt_out={str(self._settings.mip_opt_out).lower()}") # Add keyterm parameters (can have multiple) - for keyterm in self._params.keyterm: + for keyterm in self._settings.keyterm: url_params.append(urlencode({"keyterm": keyterm})) # Add tag parameters (can have multiple) - for tag_value in self._params.tag: + for tag_value in self._settings.tag: url_params.append(urlencode({"tag": tag_value})) self._websocket_url = f"{self._url}?{'&'.join(url_params)}" @@ -682,7 +722,7 @@ class DeepgramFluxSTTService(WebsocketSTTService): # Compute the average confidence average_confidence = self._calculate_average_confidence(data) - if not self._params.min_confidence or average_confidence > self._params.min_confidence: + if not self._settings.min_confidence or average_confidence > self._settings.min_confidence: # EndOfTurn means Flux has determined the turn is complete, # so this TranscriptionFrame is always finalized await self.push_frame( @@ -690,7 +730,7 @@ class DeepgramFluxSTTService(WebsocketSTTService): transcript, self._user_id, time_now_iso8601(), - self._language, + self._settings.language, result=data, finalized=True, ) @@ -700,7 +740,7 @@ class DeepgramFluxSTTService(WebsocketSTTService): f"Transcription confidence below min_confidence threshold: {average_confidence}" ) - await self._handle_transcription(transcript, True, self._language) + await self._handle_transcription(transcript, True, self._settings.language) await self.stop_processing_metrics() await self.broadcast_frame(UserStoppedSpeakingFrame) await self._call_event_handler("on_end_of_turn", transcript) @@ -744,7 +784,7 @@ class DeepgramFluxSTTService(WebsocketSTTService): transcript, self._user_id, time_now_iso8601(), - self._language, + self._settings.language, result=data, ) ) From fb27642190b170dd2e785628dbd86b257d730eba Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Thu, 19 Feb 2026 18:35:59 -0500 Subject: [PATCH 051/189] Add `self._settings` to 6 remaining services - AWSNovaSonicLLMService: new `AWSNovaSonicLLMSettings` with `voice_id` and `endpointing_sensitivity`; remove `self._params` entirely, storing audio I/O config as plain instance variables - NeuphonicHttpTTSService: reuse `NeuphonicTTSSettings`; use inherited `language` field instead of bespoke `lang_code` - NvidiaTTSService: new `NvidiaTTSSettings` with `quality` - PiperTTSService / PiperHttpTTSService: new `PiperTTSSettings` / `PiperHttpTTSSettings` (no extra fields) - SpeechmaticsTTSService: new `SpeechmaticsTTSSettings` with `max_retries` Also remove redundant `lang_code` from `NeuphonicTTSSettings` (both WS and HTTP services now use the inherited `TTSSettings.language` field, with automatic enum conversion via the base class). HTTP services (Neuphonic HTTP, Piper HTTP, Speechmatics) don't override `_update_settings` since the base class applies changes to `self._settings` and subsequent requests read from it automatically. --- ...55za-update-settings-neuphonic-http-tts.py | 127 +++++++++++++++++ ...5zzk-update-settings-aws-nova-sonic-llm.py | 124 +++++++++++++++++ .../55zzl-update-settings-nvidia-tts.py | 125 +++++++++++++++++ .../55zzm-update-settings-speechmatics-tts.py | 129 ++++++++++++++++++ src/pipecat/services/aws/nova_sonic/llm.py | 89 ++++++++---- src/pipecat/services/neuphonic/tts.py | 33 +++-- src/pipecat/services/nvidia/tts.py | 44 ++++-- src/pipecat/services/piper/tts.py | 40 +++++- src/pipecat/services/speechmatics/tts.py | 31 +++-- 9 files changed, 678 insertions(+), 64 deletions(-) create mode 100644 examples/foundational/55za-update-settings-neuphonic-http-tts.py create mode 100644 examples/foundational/55zzk-update-settings-aws-nova-sonic-llm.py create mode 100644 examples/foundational/55zzl-update-settings-nvidia-tts.py create mode 100644 examples/foundational/55zzm-update-settings-speechmatics-tts.py diff --git a/examples/foundational/55za-update-settings-neuphonic-http-tts.py b/examples/foundational/55za-update-settings-neuphonic-http-tts.py new file mode 100644 index 000000000..056b32349 --- /dev/null +++ b/examples/foundational/55za-update-settings-neuphonic-http-tts.py @@ -0,0 +1,127 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +import aiohttp +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.neuphonic.tts import NeuphonicHttpTTSService, NeuphonicTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + async with aiohttp.ClientSession() as session: + tts = NeuphonicHttpTTSService( + api_key=os.getenv("NEUPHONIC_API_KEY"), + aiohttp_session=session, + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Neuphonic HTTP TTS settings: speed=1.4") + await task.queue_frame(TTSUpdateSettingsFrame(update=NeuphonicTTSSettings(speed=1.4))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zzk-update-settings-aws-nova-sonic-llm.py b/examples/foundational/55zzk-update-settings-aws-nova-sonic-llm.py new file mode 100644 index 000000000..1faafdbac --- /dev/null +++ b/examples/foundational/55zzk-update-settings-aws-nova-sonic-llm.py @@ -0,0 +1,124 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.aws.nova_sonic.llm import AWSNovaSonicLLMService, AWSNovaSonicLLMSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + llm = AWSNovaSonicLLMService( + secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"), + access_key_id=os.getenv("AWS_ACCESS_KEY_ID"), + region=os.getenv("AWS_REGION"), + ) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + { + "role": "user", + "content": "Tell me a fun fact!", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + user_aggregator, + llm, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating AWS Nova Sonic LLM settings: temperature=0.1") + await task.queue_frame( + LLMUpdateSettingsFrame(update=AWSNovaSonicLLMSettings(temperature=0.1)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zzl-update-settings-nvidia-tts.py b/examples/foundational/55zzl-update-settings-nvidia-tts.py new file mode 100644 index 000000000..b92651496 --- /dev/null +++ b/examples/foundational/55zzl-update-settings-nvidia-tts.py @@ -0,0 +1,125 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.nvidia.tts import NvidiaTTSService, NvidiaTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = NvidiaTTSService(api_key=os.getenv("NVIDIA_API_KEY")) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info('Updating NVIDIA TTS settings: language="ES_US"') + await task.queue_frame( + TTSUpdateSettingsFrame(update=NvidiaTTSSettings(language=Language.ES_US)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zzm-update-settings-speechmatics-tts.py b/examples/foundational/55zzm-update-settings-speechmatics-tts.py new file mode 100644 index 000000000..36b66fe53 --- /dev/null +++ b/examples/foundational/55zzm-update-settings-speechmatics-tts.py @@ -0,0 +1,129 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +import aiohttp +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.speechmatics.tts import SpeechmaticsTTSService, SpeechmaticsTTSSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + async with aiohttp.ClientSession() as session: + tts = SpeechmaticsTTSService( + api_key=os.getenv("SPEECHMATICS_API_KEY"), + aiohttp_session=session, + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info('Updating Speechmatics TTS settings: voice="theo"') + await task.queue_frame( + TTSUpdateSettingsFrame(update=SpeechmaticsTTSSettings(voice="theo")) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/src/pipecat/services/aws/nova_sonic/llm.py b/src/pipecat/services/aws/nova_sonic/llm.py index 91c2374e3..7d2b9c05e 100644 --- a/src/pipecat/services/aws/nova_sonic/llm.py +++ b/src/pipecat/services/aws/nova_sonic/llm.py @@ -16,7 +16,7 @@ import json import time import uuid import wave -from dataclasses import dataclass +from dataclasses import dataclass, field from enum import Enum from importlib.resources import files from typing import Any, List, Optional @@ -60,7 +60,7 @@ from pipecat.processors.aggregators.openai_llm_context import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import LLMService -from pipecat.services.settings import LLMSettings +from pipecat.services.settings import NOT_GIVEN, LLMSettings, _NotGiven from pipecat.utils.time import time_now_iso8601 try: @@ -186,6 +186,20 @@ class Params(BaseModel): endpointing_sensitivity: Optional[str] = Field(default=None) +@dataclass +class AWSNovaSonicLLMSettings(LLMSettings): + """Settings for AWS Nova Sonic LLM service. + + Parameters: + voice_id: Voice for speech synthesis. + endpointing_sensitivity: Controls how quickly Nova Sonic decides the + user has stopped speaking. Can be "LOW", "MEDIUM", or "HIGH". + """ + + voice_id: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + endpointing_sensitivity: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class AWSNovaSonicLLMService(LLMService): """AWS Nova Sonic speech-to-speech LLM service. @@ -193,6 +207,8 @@ class AWSNovaSonicLLMService(LLMService): and function calling capabilities using AWS Nova Sonic model. """ + _settings: AWSNovaSonicLLMSettings + # Override the default adapter to use the AWSNovaSonicLLMAdapter one adapter_class = AWSNovaSonicLLMAdapter @@ -243,23 +259,38 @@ class AWSNovaSonicLLMService(LLMService): self._access_key_id = access_key_id self._session_token = session_token self._region = region - self._model = model self._client: Optional[BedrockRuntimeClient] = None - self._voice_id = voice_id - self._params = params or Params() + params = params or Params() + self._settings = AWSNovaSonicLLMSettings( + model=model, + voice_id=voice_id, + temperature=params.temperature, + max_tokens=params.max_tokens, + top_p=params.top_p, + endpointing_sensitivity=params.endpointing_sensitivity, + ) + self.set_model_name(model) + + # Audio I/O config (hardware settings, not runtime-tunable) + self._input_sample_rate = params.input_sample_rate + self._input_sample_size = params.input_sample_size + self._input_channel_count = params.input_channel_count + self._output_sample_rate = params.output_sample_rate + self._output_sample_size = params.output_sample_size + self._output_channel_count = params.output_channel_count self._system_instruction = system_instruction self._tools = tools # Validate endpointing_sensitivity parameter if ( - self._params.endpointing_sensitivity + self._settings.endpointing_sensitivity and not self._is_endpointing_sensitivity_supported() ): logger.warning( f"endpointing_sensitivity is not supported for model '{model}' and will be ignored. " "This parameter is only supported starting with Nova 2 Sonic (amazon.nova-2-sonic-v1:0)." ) - self._params.endpointing_sensitivity = None + self._settings.endpointing_sensitivity = None if not send_transcription_frames: import warnings @@ -307,7 +338,7 @@ class AWSNovaSonicLLMService(LLMService): # settings # - async def _update_settings(self, update: LLMSettings) -> dict[str, Any]: + async def _update_settings(self, update: AWSNovaSonicLLMSettings) -> dict[str, Any]: """Apply a settings update. Settings are stored but not applied to the active connection. @@ -320,7 +351,7 @@ class AWSNovaSonicLLMService(LLMService): # TODO: someday we could reconnect here to apply updated settings. # Code might look something like the below: # await self._disconnect() - # await self._connect() + # await self._start_connecting() self._warn_unhandled_updated_settings(changed) @@ -496,7 +527,7 @@ class AWSNovaSonicLLMService(LLMService): # Start the bidirectional stream self._stream = await self._client.invoke_model_with_bidirectional_stream( - InvokeModelWithBidirectionalStreamOperationInput(model_id=self._model) + InvokeModelWithBidirectionalStreamOperationInput(model_id=self._settings.model) ) # Send session start event @@ -663,7 +694,7 @@ class AWSNovaSonicLLMService(LLMService): def _is_first_generation_sonic_model(self) -> bool: # Nova Sonic (the older model) is identified by "amazon.nova-sonic-v1:0" - return self._model == "amazon.nova-sonic-v1:0" + return self._settings.model == "amazon.nova-sonic-v1:0" def _is_endpointing_sensitivity_supported(self) -> bool: # endpointing_sensitivity is only supported with Nova 2 Sonic (and, @@ -682,9 +713,9 @@ class AWSNovaSonicLLMService(LLMService): turn_detection_config = ( f""", "turnDetectionConfiguration": {{ - "endpointingSensitivity": "{self._params.endpointing_sensitivity}" + "endpointingSensitivity": "{self._settings.endpointing_sensitivity}" }}""" - if self._params.endpointing_sensitivity + if self._settings.endpointing_sensitivity else "" ) @@ -693,9 +724,9 @@ class AWSNovaSonicLLMService(LLMService): "event": {{ "sessionStart": {{ "inferenceConfiguration": {{ - "maxTokens": {self._params.max_tokens}, - "topP": {self._params.top_p}, - "temperature": {self._params.temperature} + "maxTokens": {self._settings.max_tokens}, + "topP": {self._settings.top_p}, + "temperature": {self._settings.temperature} }}{turn_detection_config} }} }} @@ -730,10 +761,10 @@ class AWSNovaSonicLLMService(LLMService): }}, "audioOutputConfiguration": {{ "mediaType": "audio/lpcm", - "sampleRateHertz": {self._params.output_sample_rate}, - "sampleSizeBits": {self._params.output_sample_size}, - "channelCount": {self._params.output_channel_count}, - "voiceId": "{self._voice_id}", + "sampleRateHertz": {self._output_sample_rate}, + "sampleSizeBits": {self._output_sample_size}, + "channelCount": {self._output_channel_count}, + "voiceId": "{self._settings.voice_id}", "encoding": "base64", "audioType": "SPEECH" }}{tools_config} @@ -758,9 +789,9 @@ class AWSNovaSonicLLMService(LLMService): "role": "USER", "audioInputConfiguration": {{ "mediaType": "audio/lpcm", - "sampleRateHertz": {self._params.input_sample_rate}, - "sampleSizeBits": {self._params.input_sample_size}, - "channelCount": {self._params.input_channel_count}, + "sampleRateHertz": {self._input_sample_rate}, + "sampleSizeBits": {self._input_sample_size}, + "channelCount": {self._input_channel_count}, "audioType": "SPEECH", "encoding": "base64" }} @@ -1043,8 +1074,8 @@ class AWSNovaSonicLLMService(LLMService): audio = base64.b64decode(audio_content) frame = TTSAudioRawFrame( audio=audio, - sample_rate=self._params.output_sample_rate, - num_channels=self._params.output_channel_count, + sample_rate=self._output_sample_rate, + num_channels=self._output_channel_count, ) await self.push_frame(frame) @@ -1328,7 +1359,7 @@ class AWSNovaSonicLLMService(LLMService): """ if not self._is_assistant_response_trigger_needed(): logger.warning( - f"Assistant response trigger not needed for model '{self._model}'; skipping. " + f"Assistant response trigger not needed for model '{self._settings.model}'; skipping. " "An LLMRunFrame() should be sufficient to prompt the assistant to respond, " "assuming the context ends in a user message." ) @@ -1356,9 +1387,9 @@ class AWSNovaSonicLLMService(LLMService): chunk_duration = 0.02 # what we might get from InputAudioRawFrame chunk_size = int( chunk_duration - * self._params.input_sample_rate - * self._params.input_channel_count - * (self._params.input_sample_size / 8) + * self._input_sample_rate + * self._input_channel_count + * (self._input_sample_size / 8) ) # e.g. 0.02 seconds of 16-bit (2-byte) PCM mono audio at 16kHz is 640 bytes # Lead with a bit of blank audio, if needed. diff --git a/src/pipecat/services/neuphonic/tts.py b/src/pipecat/services/neuphonic/tts.py index 2e51297ab..5f58eb3b4 100644 --- a/src/pipecat/services/neuphonic/tts.py +++ b/src/pipecat/services/neuphonic/tts.py @@ -79,13 +79,11 @@ class NeuphonicTTSSettings(TTSSettings): """Settings for Neuphonic TTS service. Parameters: - lang_code: Neuphonic language code. speed: Speech speed multiplier. Defaults to 1.0. encoding: Audio encoding format. sampling_rate: Audio sample rate. """ - lang_code: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) speed: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) encoding: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) sampling_rate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) @@ -149,7 +147,7 @@ class NeuphonicTTSService(InterruptibleTTSService): self._api_key = api_key self._url = url self._settings = NeuphonicTTSSettings( - lang_code=self.language_to_service_language(params.language), + language=self.language_to_service_language(params.language), speed=params.speed, encoding=encoding, sampling_rate=sample_rate, @@ -286,7 +284,7 @@ class NeuphonicTTSService(InterruptibleTTSService): logger.debug("Connecting to Neuphonic") tts_config = { - "lang_code": self._settings.lang_code, + "lang_code": self._settings.language, "speed": self._settings.speed, "encoding": self._settings.encoding, "sampling_rate": self._settings.sampling_rate, @@ -298,7 +296,7 @@ class NeuphonicTTSService(InterruptibleTTSService): if value is not None: query_params.append(f"{key}={value}") - url = f"{self._url}/speak/{self._settings.lang_code}" + url = f"{self._url}/speak/{self._settings.language}" if query_params: url += f"?{'&'.join(query_params)}" @@ -407,6 +405,8 @@ class NeuphonicHttpTTSService(TTSService): HTTP-based communication over WebSocket connections. """ + _settings: NeuphonicTTSSettings + class InputParams(BaseModel): """Input parameters for Neuphonic HTTP TTS configuration. @@ -449,10 +449,13 @@ class NeuphonicHttpTTSService(TTSService): self._api_key = api_key self._session = aiohttp_session self._base_url = url.rstrip("/") - self._lang_code = self.language_to_service_language(params.language) or "en" - self._speed = params.speed - self._encoding = encoding - self._voice_id = voice_id + self._settings = NeuphonicTTSSettings( + voice=voice_id, + language=self.language_to_service_language(params.language) or "en", + speed=params.speed, + encoding=encoding, + sampling_rate=sample_rate, + ) def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -536,7 +539,7 @@ class NeuphonicHttpTTSService(TTSService): """ logger.debug(f"Generating TTS: [{text}]") - url = f"{self._base_url}/sse/speak/{self._lang_code}" + url = f"{self._base_url}/sse/speak/{self._settings.language}" headers = { "X-API-KEY": self._api_key, @@ -545,14 +548,14 @@ class NeuphonicHttpTTSService(TTSService): payload = { "text": text, - "lang_code": self._lang_code, - "encoding": self._encoding, + "lang_code": self._settings.language, + "encoding": self._settings.encoding, "sampling_rate": self.sample_rate, - "speed": self._speed, + "speed": self._settings.speed, } - if self._voice_id: - payload["voice_id"] = self._voice_id + if self._settings.voice: + payload["voice_id"] = self._settings.voice try: await self.start_ttfb_metrics() diff --git a/src/pipecat/services/nvidia/tts.py b/src/pipecat/services/nvidia/tts.py index 27ace15fb..22cffc6c1 100644 --- a/src/pipecat/services/nvidia/tts.py +++ b/src/pipecat/services/nvidia/tts.py @@ -12,7 +12,8 @@ gRPC API for high-quality speech synthesis. import asyncio import os -from typing import AsyncGenerator, AsyncIterator, Generator, Mapping, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, AsyncIterator, Generator, Mapping, Optional from pipecat.utils.tracing.service_decorators import traced_tts @@ -30,6 +31,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import TTSService from pipecat.transcriptions.language import Language @@ -42,6 +44,17 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class NvidiaTTSSettings(TTSSettings): + """Settings for NVIDIA Riva TTS service. + + Parameters: + quality: Audio quality setting (0-100). + """ + + quality: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class NvidiaTTSService(TTSService): """NVIDIA Riva text-to-speech service. @@ -50,6 +63,8 @@ class NvidiaTTSService(TTSService): configurable quality settings. """ + _settings: NvidiaTTSSettings + class InputParams(BaseModel): """Input parameters for Riva TTS configuration. @@ -94,13 +109,14 @@ class NvidiaTTSService(TTSService): self._server = server self._api_key = api_key - self._voice_id = voice_id - self._language_code = params.language - self._quality = params.quality self._function_id = model_function_map.get("function_id") self._use_ssl = use_ssl + self._settings = NvidiaTTSSettings( + voice=voice_id, + language=params.language, + quality=params.quality, + ) self.set_model_name(model_function_map.get("model_name")) - self._voice_id = voice_id self._service = None self._config = None @@ -133,6 +149,18 @@ class NvidiaTTSService(TTSService): stacklevel=2, ) + async def _update_settings(self, update: NvidiaTTSSettings) -> dict[str, Any]: + """Apply a settings update. + + Settings are stored but not applied to the active connection. + """ + changed = await super()._update_settings(update) + if not changed: + return changed + # TODO: reconnect gRPC client to apply changed settings. + self._warn_unhandled_updated_settings(changed) + return changed + def _initialize_client(self): if self._service is not None: return @@ -181,11 +209,11 @@ class NvidiaTTSService(TTSService): def read_audio_responses() -> Generator[rtts.SynthesizeSpeechResponse, None, None]: responses = self._service.synthesize_online( text, - self._voice_id, - self._language_code, + self._settings.voice, + self._settings.language, sample_rate_hz=self.sample_rate, zero_shot_audio_prompt_file=None, - zero_shot_quality=self._quality, + zero_shot_quality=self._settings.quality, custom_dictionary={}, ) return responses diff --git a/src/pipecat/services/piper/tts.py b/src/pipecat/services/piper/tts.py index a1a038826..0b43d96d2 100644 --- a/src/pipecat/services/piper/tts.py +++ b/src/pipecat/services/piper/tts.py @@ -7,8 +7,9 @@ """Piper TTS service implementation.""" import asyncio +from dataclasses import dataclass from pathlib import Path -from typing import AsyncGenerator, AsyncIterator, Optional +from typing import Any, AsyncGenerator, AsyncIterator, Optional import aiohttp from loguru import logger @@ -19,6 +20,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) +from pipecat.services.settings import TTSSettings from pipecat.services.tts_service import TTSService from pipecat.utils.tracing.service_decorators import traced_tts @@ -31,6 +33,13 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class PiperTTSSettings(TTSSettings): + """Settings for Piper TTS service.""" + + pass + + class PiperTTSService(TTSService): """Piper TTS service implementation. @@ -39,6 +48,8 @@ class PiperTTSService(TTSService): match the configured sample rate. """ + _settings: PiperTTSSettings + def __init__( self, *, @@ -60,7 +71,7 @@ class PiperTTSService(TTSService): """ super().__init__(**kwargs) - self._voice_id = voice_id + self._settings = PiperTTSSettings(voice=voice_id) download_dir = download_dir or Path.cwd() @@ -85,6 +96,18 @@ class PiperTTSService(TTSService): """ return True + async def _update_settings(self, update: PiperTTSSettings) -> dict[str, Any]: + """Apply a settings update. + + Settings are stored but not applied to the active connection. + """ + changed = await super()._update_settings(update) + if not changed: + return changed + # TODO: voice changes would require re-downloading and loading the model. + self._warn_unhandled_updated_settings(changed) + return changed + @traced_tts async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: """Generate speech from text using Piper. @@ -143,6 +166,13 @@ class PiperTTSService(TTSService): # $ uv pip install "piper-tts[http]" # $ uv run python -m piper.http_server -m en_US-ryan-high # +@dataclass +class PiperHttpTTSSettings(TTSSettings): + """Settings for Piper HTTP TTS service.""" + + pass + + class PiperHttpTTSService(TTSService): """Piper HTTP TTS service implementation. @@ -151,6 +181,8 @@ class PiperHttpTTSService(TTSService): rates and automatic WAV header removal. """ + _settings: PiperHttpTTSSettings + def __init__( self, *, @@ -175,7 +207,7 @@ class PiperHttpTTSService(TTSService): self._base_url = base_url self._session = aiohttp_session - self._model_id = voice_id + self._settings = PiperHttpTTSSettings(voice=voice_id) def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -205,7 +237,7 @@ class PiperHttpTTSService(TTSService): data = { "text": text, - "voice": self._model_id, + "voice": self._settings.voice, } async with self._session.post(self._base_url, json=data, headers=headers) as response: diff --git a/src/pipecat/services/speechmatics/tts.py b/src/pipecat/services/speechmatics/tts.py index 0907b4e26..7c8d9fca5 100644 --- a/src/pipecat/services/speechmatics/tts.py +++ b/src/pipecat/services/speechmatics/tts.py @@ -7,7 +7,8 @@ """Speechmatics TTS service integration.""" import asyncio -from typing import AsyncGenerator, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, Optional from urllib.parse import urlencode import aiohttp @@ -21,6 +22,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import TTSService from pipecat.utils.network import exponential_backoff_time from pipecat.utils.tracing.service_decorators import traced_tts @@ -35,6 +37,17 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class SpeechmaticsTTSSettings(TTSSettings): + """Settings for Speechmatics TTS service. + + Parameters: + max_retries: Maximum number of retries for HTTP requests. + """ + + max_retries: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class SpeechmaticsTTSService(TTSService): """Speechmatics TTS service implementation. @@ -42,6 +55,8 @@ class SpeechmaticsTTSService(TTSService): It converts text to speech and returns raw PCM audio data for real-time playback. """ + _settings: SpeechmaticsTTSSettings + SPEECHMATICS_SAMPLE_RATE = 16000 class InputParams(BaseModel): @@ -91,11 +106,11 @@ class SpeechmaticsTTSService(TTSService): if not self._api_key: raise ValueError("Missing Speechmatics API key") - # Default parameters - self._params = params or SpeechmaticsTTSService.InputParams() - - # Set voice from constructor parameter - self._voice_id = voice_id + params = params or SpeechmaticsTTSService.InputParams() + self._settings = SpeechmaticsTTSSettings( + voice=voice_id, + max_retries=params.max_retries, + ) def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -131,7 +146,7 @@ class SpeechmaticsTTSService(TTSService): } # Complete HTTP URL - url = _get_endpoint_url(self._base_url, self._voice_id, self.sample_rate) + url = _get_endpoint_url(self._base_url, self._settings.voice, self.sample_rate) try: # Start TTS TTFB metrics @@ -159,7 +174,7 @@ class SpeechmaticsTTSService(TTSService): attempt += 1 # Check if we've exceeded the maximum number of attempts - if attempt >= self._params.max_retries: + if attempt >= self._settings.max_retries: raise ValueError() # Report error frame From 5d8a5bf750a314efb82720e336c6264648c2afb9 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Fri, 20 Feb 2026 09:31:22 -0500 Subject: [PATCH 052/189] Add initialization of `self._settings` to service superclasses (`STTService`, `TTSService`, `LLMService`), using "generic" settings for those services (`STTSettings`, `TTSSettings`, `LLMSettings`) --- src/pipecat/services/llm_service.py | 1 + src/pipecat/services/stt_service.py | 1 + src/pipecat/services/tts_service.py | 1 + 3 files changed, 3 insertions(+) diff --git a/src/pipecat/services/llm_service.py b/src/pipecat/services/llm_service.py index 83d60defb..9db7ad27a 100644 --- a/src/pipecat/services/llm_service.py +++ b/src/pipecat/services/llm_service.py @@ -205,6 +205,7 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): self._tracing_enabled: bool = False self._skip_tts: Optional[bool] = None self._summary_task: Optional[asyncio.Task] = None + self._settings = LLMSettings() # Here in case subclass doesn't implement more specific settings (hopefully shouldn't happen) self._register_event_handler("on_function_calls_started") self._register_event_handler("on_completion_timeout") diff --git a/src/pipecat/services/stt_service.py b/src/pipecat/services/stt_service.py index fdcefcbd5..47b83ab56 100644 --- a/src/pipecat/services/stt_service.py +++ b/src/pipecat/services/stt_service.py @@ -105,6 +105,7 @@ class STTService(AIService): self._audio_passthrough = audio_passthrough self._init_sample_rate = sample_rate self._sample_rate = 0 + self._settings = STTSettings() # Here in case subclass doesn't implement more specific settings (hopefully shouldn't happen) self._tracing_enabled: bool = False self._muted: bool = False self._user_id: str = "" diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index 6cd33b3e4..ebf0e602a 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -192,6 +192,7 @@ class TTSService(AIService): self._init_sample_rate = sample_rate self._sample_rate = 0 self._voice_id: str = "" + self._settings = TTSSettings() # Here in case subclass doesn't implement more specific settings (hopefully shouldn't happen) self._text_aggregator: BaseTextAggregator = text_aggregator or SimpleTextAggregator() if text_aggregator: import warnings From f4e9825c03b14648704e296745af40a9e7f54c9f Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Fri, 20 Feb 2026 10:48:16 -0500 Subject: [PATCH 053/189] Remove `self._voice_id` from TTS Service implementations in favor of `self._settings.voice` --- .claude/skills/cleanup/SKILL.md | 3 +-- src/pipecat/services/asyncai/tts.py | 9 +++---- src/pipecat/services/aws/tts.py | 6 ++--- src/pipecat/services/azure/tts.py | 7 ++--- src/pipecat/services/camb/tts.py | 5 ++-- src/pipecat/services/cartesia/tts.py | 10 +++---- src/pipecat/services/deepgram/tts.py | 9 +++---- src/pipecat/services/elevenlabs/tts.py | 8 +++--- src/pipecat/services/google/tts.py | 26 +++++++++---------- src/pipecat/services/gradium/tts.py | 4 +-- src/pipecat/services/groq/tts.py | 4 +-- src/pipecat/services/hathora/tts.py | 6 ++--- src/pipecat/services/hume/tts.py | 4 +-- src/pipecat/services/inworld/tts.py | 4 +-- src/pipecat/services/kokoro/tts.py | 5 ++-- src/pipecat/services/lmnt/tts.py | 4 +-- src/pipecat/services/minimax/tts.py | 8 +++--- src/pipecat/services/neuphonic/tts.py | 7 ++--- src/pipecat/services/openai/tts.py | 5 ++-- src/pipecat/services/playht/tts.py | 9 +++---- src/pipecat/services/resembleai/tts.py | 6 ++--- src/pipecat/services/rime/tts.py | 10 +++---- src/pipecat/services/sarvam/tts.py | 25 ++++++++---------- src/pipecat/services/tts_service.py | 24 +++++++++-------- src/pipecat/services/xtts/tts.py | 5 ++-- .../utils/tracing/service_decorators.py | 5 ++-- 26 files changed, 103 insertions(+), 115 deletions(-) diff --git a/.claude/skills/cleanup/SKILL.md b/.claude/skills/cleanup/SKILL.md index c0f4945b7..5e699d588 100644 --- a/.claude/skills/cleanup/SKILL.md +++ b/.claude/skills/cleanup/SKILL.md @@ -291,9 +291,8 @@ class NewTTSService(TTSService): voice: Voice identifier to use. **kwargs: Additional arguments passed to the parent service. """ - super().__init__(**kwargs) + super().__init__(voice=voice, **kwargs) self._api_key = api_key - self._voice_id = voice ``` --- diff --git a/src/pipecat/services/asyncai/tts.py b/src/pipecat/services/asyncai/tts.py index c19aa08f8..323fa3906 100644 --- a/src/pipecat/services/asyncai/tts.py +++ b/src/pipecat/services/asyncai/tts.py @@ -153,6 +153,7 @@ class AsyncAITTSService(AudioContextTTSService): pause_frame_processing=True, push_stop_frames=True, sample_rate=sample_rate, + voice=voice_id, **kwargs, ) @@ -173,7 +174,6 @@ class AsyncAITTSService(AudioContextTTSService): ) self.set_model_name(model) - self._voice_id = voice_id self._receive_task = None self._keepalive_task = None @@ -278,7 +278,7 @@ class AsyncAITTSService(AudioContextTTSService): ) init_msg = { "model_id": self._model_name, - "voice": {"mode": "id", "id": self._voice_id}, + "voice": {"mode": "id", "id": self._settings.voice}, "output_format": { "container": self._settings.output_container, "encoding": self._settings.output_encoding, @@ -497,7 +497,7 @@ class AsyncAIHttpTTSService(TTSService): params: Additional input parameters for voice customization. **kwargs: Additional arguments passed to the parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) + super().__init__(sample_rate=sample_rate, voice=voice_id, **kwargs) params = params or AsyncAIHttpTTSService.InputParams() @@ -514,7 +514,6 @@ class AsyncAIHttpTTSService(TTSService): if params.language else None, ) - self._voice_id = voice_id self.set_model_name(model) self._session = aiohttp_session @@ -561,7 +560,7 @@ class AsyncAIHttpTTSService(TTSService): logger.debug(f"{self}: Generating TTS [{text}]") try: - voice_config = {"mode": "id", "id": self._voice_id} + voice_config = {"mode": "id", "id": self._settings.voice} await self.start_ttfb_metrics() payload = { "model_id": self._model_name, diff --git a/src/pipecat/services/aws/tts.py b/src/pipecat/services/aws/tts.py index e223a1abc..a277bc0b2 100644 --- a/src/pipecat/services/aws/tts.py +++ b/src/pipecat/services/aws/tts.py @@ -195,7 +195,7 @@ class AWSPollyTTSService(TTSService): params: Additional input parameters for voice customization. **kwargs: Additional arguments passed to parent TTSService class. """ - super().__init__(sample_rate=sample_rate, **kwargs) + super().__init__(sample_rate=sample_rate, voice=voice_id, **kwargs) params = params or AWSPollyTTSService.InputParams() @@ -222,8 +222,6 @@ class AWSPollyTTSService(TTSService): self._resampler = create_stream_resampler() - self._voice_id = voice_id - def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -299,7 +297,7 @@ class AWSPollyTTSService(TTSService): "Text": ssml, "TextType": "ssml", "OutputFormat": "pcm", - "VoiceId": self._voice_id, + "VoiceId": self._settings.voice, "Engine": self._settings.engine, # AWS only supports 8000 and 16000 for PCM. We select 16000. "SampleRate": "16000", diff --git a/src/pipecat/services/azure/tts.py b/src/pipecat/services/azure/tts.py index b69e60b69..2d4c01dc9 100644 --- a/src/pipecat/services/azure/tts.py +++ b/src/pipecat/services/azure/tts.py @@ -165,12 +165,12 @@ class AzureBaseTTSService: role=params.role, style=params.style, style_degree=params.style_degree, + voice=voice, volume=params.volume, ) self._api_key = api_key self._region = region - self._voice_id = voice self._speech_synthesizer = None def language_to_service_language(self, language: Language) -> Optional[str]: @@ -194,7 +194,7 @@ class AzureBaseTTSService: f"" - f"" + f"" "" ) @@ -295,6 +295,7 @@ class AzureTTSService(WordTTSService, AzureBaseTTSService): push_stop_frames=True, pause_frame_processing=True, sample_rate=sample_rate, + voice=voice, **kwargs, ) @@ -733,7 +734,7 @@ class AzureHttpTTSService(TTSService, AzureBaseTTSService): params: Voice and synthesis parameters configuration. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) + super().__init__(sample_rate=sample_rate, voice=voice, **kwargs) # Initialize Azure-specific functionality from mixin self._init_azure_base(api_key=api_key, region=region, voice=voice, params=params) diff --git a/src/pipecat/services/camb/tts.py b/src/pipecat/services/camb/tts.py index 40dabd17e..ec0853424 100644 --- a/src/pipecat/services/camb/tts.py +++ b/src/pipecat/services/camb/tts.py @@ -213,7 +213,7 @@ class CambTTSService(TTSService): params: Additional voice parameters. If None, uses defaults. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) + super().__init__(sample_rate=sample_rate, voice=voice_id, **kwargs) self._api_key = api_key self._timeout = timeout @@ -238,7 +238,6 @@ class CambTTSService(TTSService): ) self.set_model_name(model) - self._voice_id = voice_id self._client = None @@ -299,7 +298,7 @@ class CambTTSService(TTSService): # Build SDK parameters tts_kwargs: Dict[str, Any] = { "text": text, - "voice_id": self._voice_id, + "voice_id": self._settings.voice, "language": self._settings.language, "speech_model": self.model_name, "output_configuration": StreamTtsOutputConfiguration(format="pcm_s16le"), diff --git a/src/pipecat/services/cartesia/tts.py b/src/pipecat/services/cartesia/tts.py index 0d8936fdd..a16aa2b39 100644 --- a/src/pipecat/services/cartesia/tts.py +++ b/src/pipecat/services/cartesia/tts.py @@ -313,6 +313,7 @@ class CartesiaTTSService(AudioContextWordTTSService): pause_frame_processing=True, sample_rate=sample_rate, text_aggregator=text_aggregator, + voice=voice_id, **kwargs, ) @@ -340,9 +341,9 @@ class CartesiaTTSService(AudioContextWordTTSService): emotion=params.emotion, generation_config=params.generation_config, pronunciation_dict_id=params.pronunciation_dict_id, + voice=voice_id, ) self.set_model_name(model) - self._voice_id = voice_id self._context_id = None self._receive_task = None @@ -440,7 +441,7 @@ class CartesiaTTSService(AudioContextWordTTSService): ): voice_config = {} voice_config["mode"] = "id" - voice_config["id"] = self._voice_id + voice_config["id"] = self._settings.voice if is_given(self._settings.emotion) and self._settings.emotion: with warnings.catch_warnings(): @@ -720,7 +721,7 @@ class CartesiaHttpTTSService(TTSService): params: Additional input parameters for voice customization. **kwargs: Additional arguments passed to the parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) + super().__init__(sample_rate=sample_rate, voice=voice_id, **kwargs) params = params or CartesiaHttpTTSService.InputParams() @@ -741,7 +742,6 @@ class CartesiaHttpTTSService(TTSService): generation_config=params.generation_config, pronunciation_dict_id=params.pronunciation_dict_id, ) - self._voice_id = voice_id self.set_model_name(model) self._client = AsyncCartesia( @@ -809,7 +809,7 @@ class CartesiaHttpTTSService(TTSService): logger.debug(f"{self}: Generating TTS [{text}]") try: - voice_config = {"mode": "id", "id": self._voice_id} + voice_config = {"mode": "id", "id": self._settings.voice} if is_given(self._settings.emotion) and self._settings.emotion: with warnings.catch_warnings(): diff --git a/src/pipecat/services/deepgram/tts.py b/src/pipecat/services/deepgram/tts.py index 3458a4529..a8b46ce7e 100644 --- a/src/pipecat/services/deepgram/tts.py +++ b/src/pipecat/services/deepgram/tts.py @@ -101,6 +101,7 @@ class DeepgramTTSService(WebsocketTTSService): pause_frame_processing=True, push_stop_frames=True, append_trailing_space=True, + voice=voice, **kwargs, ) @@ -111,7 +112,6 @@ class DeepgramTTSService(WebsocketTTSService): voice=voice, encoding=encoding, ) - self._voice_id = voice self._receive_task = None self._context_id: Optional[str] = None @@ -210,7 +210,7 @@ class DeepgramTTSService(WebsocketTTSService): # Build WebSocket URL with query parameters params = [] - params.append(f"model={self._voice_id}") + params.append(f"model={self._settings.voice}") params.append(f"encoding={self._settings.encoding}") params.append(f"sample_rate={self.sample_rate}") @@ -388,7 +388,7 @@ class DeepgramHttpTTSService(TTSService): encoding: Audio encoding format. Defaults to "linear16". **kwargs: Additional arguments passed to parent TTSService class. """ - super().__init__(sample_rate=sample_rate, **kwargs) + super().__init__(sample_rate=sample_rate, voice=voice, **kwargs) self._api_key = api_key self._session = aiohttp_session @@ -398,7 +398,6 @@ class DeepgramHttpTTSService(TTSService): voice=voice, encoding=encoding, ) - self._voice_id = voice def can_generate_metrics(self) -> bool: """Check if the service can generate metrics. @@ -427,7 +426,7 @@ class DeepgramHttpTTSService(TTSService): headers = {"Authorization": f"Token {self._api_key}", "Content-Type": "application/json"} params = { - "model": self._voice_id, + "model": self._settings.voice, "encoding": self._settings.encoding, "sample_rate": self.sample_rate, "container": "none", diff --git a/src/pipecat/services/elevenlabs/tts.py b/src/pipecat/services/elevenlabs/tts.py index 9503866a7..73e9027d3 100644 --- a/src/pipecat/services/elevenlabs/tts.py +++ b/src/pipecat/services/elevenlabs/tts.py @@ -400,6 +400,7 @@ class ElevenLabsTTSService(AudioContextWordTTSService): push_stop_frames=True, pause_frame_processing=True, sample_rate=sample_rate, + voice=voice_id, **kwargs, ) @@ -424,7 +425,6 @@ class ElevenLabsTTSService(AudioContextWordTTSService): apply_text_normalization=params.apply_text_normalization, ) self.set_model_name(model) - self._voice_id = voice_id self._output_format = "" # initialized in start() self._voice_settings = self._set_voice_settings() @@ -607,7 +607,7 @@ class ElevenLabsTTSService(AudioContextWordTTSService): logger.debug("Connecting to ElevenLabs") - voice_id = self._voice_id + voice_id = self._settings.voice model = self.model_name output_format = self._output_format url = f"{self._url}/v1/text-to-speech/{voice_id}/multi-stream-input?model_id={model}&output_format={output_format}&auto_mode={self._settings.auto_mode}" @@ -906,6 +906,7 @@ class ElevenLabsHttpTTSService(WordTTSService): push_text_frames=False, push_stop_frames=True, sample_rate=sample_rate, + voice=voice_id, **kwargs, ) @@ -931,7 +932,6 @@ class ElevenLabsHttpTTSService(WordTTSService): apply_text_normalization=params.apply_text_normalization, ) self.set_model_name(model) - self._voice_id = voice_id self._output_format = "" # initialized in start() self._voice_settings = self._set_voice_settings() self._pronunciation_dictionary_locators = params.pronunciation_dictionary_locators @@ -1098,7 +1098,7 @@ class ElevenLabsHttpTTSService(WordTTSService): logger.debug(f"{self}: Generating TTS [{text}]") # Use the with-timestamps endpoint - url = f"{self._base_url}/v1/text-to-speech/{self._voice_id}/stream/with-timestamps" + url = f"{self._base_url}/v1/text-to-speech/{self._settings.voice}/stream/with-timestamps" payload: Dict[str, Union[str, Dict[str, Union[float, bool]]]] = { "text": text, diff --git a/src/pipecat/services/google/tts.py b/src/pipecat/services/google/tts.py index 60bed9c6d..33fa1b2a8 100644 --- a/src/pipecat/services/google/tts.py +++ b/src/pipecat/services/google/tts.py @@ -602,7 +602,7 @@ class GoogleHttpTTSService(TTSService): params: Voice customization parameters including pitch, rate, volume, etc. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) + super().__init__(sample_rate=sample_rate, voice=voice_id, **kwargs) params = params or GoogleHttpTTSService.InputParams() @@ -618,8 +618,8 @@ class GoogleHttpTTSService(TTSService): else "en-US", gender=params.gender, google_style=params.google_style, + voice=voice_id, ) - self._voice_id = voice_id self._client: texttospeech_v1.TextToSpeechAsyncClient = self._create_client( credentials, credentials_path ) @@ -707,7 +707,7 @@ class GoogleHttpTTSService(TTSService): ssml = "" # Voice tag - voice_attrs = [f"name='{self._voice_id}'"] + voice_attrs = [f"name='{self._settings.voice}'"] language = self._settings.language voice_attrs.append(f"language='{language}'") @@ -766,8 +766,8 @@ class GoogleHttpTTSService(TTSService): await self.start_ttfb_metrics() # Check if the voice is a Chirp voice (including Chirp 3) or Journey voice - is_chirp_voice = "chirp" in self._voice_id.lower() - is_journey_voice = "journey" in self._voice_id.lower() + is_chirp_voice = "chirp" in self._settings.voice.lower() + is_journey_voice = "journey" in self._settings.voice.lower() # Create synthesis input based on voice_id if is_chirp_voice or is_journey_voice: @@ -778,7 +778,7 @@ class GoogleHttpTTSService(TTSService): synthesis_input = texttospeech_v1.SynthesisInput(ssml=ssml) voice = texttospeech_v1.VoiceSelectionParams( - language_code=self._settings.language, name=self._voice_id + language_code=self._settings.language, name=self._settings.voice ) # Build audio config with conditional speaking_rate audio_config_params = { @@ -1015,7 +1015,7 @@ class GoogleTTSService(GoogleBaseTTSService): params: Language configuration parameters. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) + super().__init__(sample_rate=sample_rate, voice=voice_id, **kwargs) params = params or GoogleTTSService.InputParams() @@ -1025,8 +1025,8 @@ class GoogleTTSService(GoogleBaseTTSService): if params.language else "en-US", speaking_rate=params.speaking_rate, + voice=voice_id, ) - self._voice_id = voice_id self._voice_cloning_key = voice_cloning_key self._client: texttospeech_v1.TextToSpeechAsyncClient = self._create_client( credentials, credentials_path @@ -1073,7 +1073,7 @@ class GoogleTTSService(GoogleBaseTTSService): ) else: voice = texttospeech_v1.VoiceSelectionParams( - language_code=self._settings.language, name=self._voice_id + language_code=self._settings.language, name=self._settings.voice ) # Create streaming config @@ -1220,7 +1220,7 @@ class GeminiTTSService(GoogleBaseTTSService): f"Google TTS only supports {self.GOOGLE_SAMPLE_RATE}Hz sample rate. " f"Current rate of {sample_rate}Hz may cause issues." ) - super().__init__(sample_rate=sample_rate, **kwargs) + super().__init__(sample_rate=sample_rate, voice=voice_id, **kwargs) params = params or GeminiTTSService.InputParams() @@ -1229,7 +1229,6 @@ class GeminiTTSService(GoogleBaseTTSService): self._location = location self._model = model - self._voice_id = voice_id self._settings = GeminiTTSSettings( language=self.language_to_service_language(params.language) if params.language @@ -1237,6 +1236,7 @@ class GeminiTTSService(GoogleBaseTTSService): prompt=params.prompt, multi_speaker=params.multi_speaker, speaker_configs=params.speaker_configs, + voice=voice_id, ) self._client: texttospeech_v1.TextToSpeechAsyncClient = self._create_client( @@ -1306,7 +1306,7 @@ class GeminiTTSService(GoogleBaseTTSService): speaker_voice_configs.append( texttospeech_v1.MultispeakerPrebuiltVoice( speaker_alias=speaker_config["speaker_alias"], - speaker_id=speaker_config.get("speaker_id", self._voice_id), + speaker_id=speaker_config.get("speaker_id", self._settings.voice), ) ) @@ -1323,7 +1323,7 @@ class GeminiTTSService(GoogleBaseTTSService): # Single speaker mode voice = texttospeech_v1.VoiceSelectionParams( language_code=self._settings.language, - name=self._voice_id, + name=self._settings.voice, model_name=self._model, ) diff --git a/src/pipecat/services/gradium/tts.py b/src/pipecat/services/gradium/tts.py index 8c18c9208..e3d855c5c 100644 --- a/src/pipecat/services/gradium/tts.py +++ b/src/pipecat/services/gradium/tts.py @@ -91,6 +91,7 @@ class GradiumTTSService(InterruptibleWordTTSService): push_stop_frames=True, pause_frame_processing=True, sample_rate=SAMPLE_RATE, + voice=voice_id, **kwargs, ) @@ -99,7 +100,6 @@ class GradiumTTSService(InterruptibleWordTTSService): # Store service configuration self._api_key = api_key self._url = url - self._voice_id = voice_id self._json_config = json_config self._settings = GradiumTTSSettings( model=model, @@ -208,7 +208,7 @@ class GradiumTTSService(InterruptibleWordTTSService): setup_msg = { "type": "setup", "output_format": "pcm", - "voice_id": self._voice_id, + "voice_id": self._settings.voice, } if self._json_config is not None: setup_msg["json_config"] = self._json_config diff --git a/src/pipecat/services/groq/tts.py b/src/pipecat/services/groq/tts.py index b3b4c5f57..78d744461 100644 --- a/src/pipecat/services/groq/tts.py +++ b/src/pipecat/services/groq/tts.py @@ -102,6 +102,7 @@ class GroqTTSService(TTSService): super().__init__( pause_frame_processing=True, sample_rate=sample_rate, + voice=voice_id, **kwargs, ) @@ -110,7 +111,6 @@ class GroqTTSService(TTSService): self._api_key = api_key self._model_name = model_name self._output_format = output_format - self._voice_id = voice_id self._params = params self._settings = GroqTTSSettings( @@ -151,7 +151,7 @@ class GroqTTSService(TTSService): try: response = await self._client.audio.speech.create( model=self._model_name, - voice=self._voice_id, + voice=self._settings.voice, response_format=self._output_format, input=text, ) diff --git a/src/pipecat/services/hathora/tts.py b/src/pipecat/services/hathora/tts.py index e15dfcc54..6e75feeca 100644 --- a/src/pipecat/services/hathora/tts.py +++ b/src/pipecat/services/hathora/tts.py @@ -109,6 +109,7 @@ class HathoraTTSService(TTSService): """ super().__init__( sample_rate=sample_rate, + voice=voice_id, **kwargs, ) self._model = model @@ -125,7 +126,6 @@ class HathoraTTSService(TTSService): ) self.set_model_name(model) - self._voice_id = voice_id def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -154,8 +154,8 @@ class HathoraTTSService(TTSService): payload = {"model": self._model, "text": text} - if self._voice_id is not None: - payload["voice"] = self._voice_id + if self._settings.voice is not None: + payload["voice"] = self._settings.voice if self._settings.speed is not None: payload["speed"] = self._settings.speed if self._settings.config is not None: diff --git a/src/pipecat/services/hume/tts.py b/src/pipecat/services/hume/tts.py index 27c4b417e..a52922787 100644 --- a/src/pipecat/services/hume/tts.py +++ b/src/pipecat/services/hume/tts.py @@ -126,6 +126,7 @@ class HumeTTSService(WordTTSService): sample_rate=sample_rate, push_text_frames=False, push_stop_frames=True, + voice=voice_id, **kwargs, ) @@ -142,7 +143,6 @@ class HumeTTSService(WordTTSService): speed=params.speed, trailing_silence=params.trailing_silence, ) - self._voice_id = voice_id self._audio_bytes = b"" @@ -263,7 +263,7 @@ class HumeTTSService(WordTTSService): # Build the request payload utterance_kwargs: dict[str, Any] = { "text": text, - "voice": PostedUtteranceVoiceWithId(id=self._voice_id), + "voice": PostedUtteranceVoiceWithId(id=self._settings.voice), } if self._settings.description is not None: utterance_kwargs["description"] = self._settings.description diff --git a/src/pipecat/services/inworld/tts.py b/src/pipecat/services/inworld/tts.py index bdbbb82d7..34dc34933 100644 --- a/src/pipecat/services/inworld/tts.py +++ b/src/pipecat/services/inworld/tts.py @@ -144,6 +144,7 @@ class InworldHttpTTSService(WordTTSService): push_text_frames=False, push_stop_frames=True, sample_rate=sample_rate, + voice=voice_id, **kwargs, ) @@ -173,7 +174,6 @@ class InworldHttpTTSService(WordTTSService): self._cumulative_time = 0.0 - self._voice_id = voice_id self.set_model_name(model) def can_generate_metrics(self) -> bool: @@ -519,6 +519,7 @@ class InworldTTSService(AudioContextWordTTSService): sample_rate=sample_rate, aggregate_sentences=aggregate_sentences, append_trailing_space=append_trailing_space, + voice=voice_id, **kwargs, ) @@ -563,7 +564,6 @@ class InworldTTSService(AudioContextWordTTSService): # Track the end time of the last word in the current generation self._generation_end_time = 0.0 - self._voice_id = voice_id self.set_model_name(model) def can_generate_metrics(self) -> bool: diff --git a/src/pipecat/services/kokoro/tts.py b/src/pipecat/services/kokoro/tts.py index 735145da7..9f2aac368 100644 --- a/src/pipecat/services/kokoro/tts.py +++ b/src/pipecat/services/kokoro/tts.py @@ -137,11 +137,10 @@ class KokoroTTSService(TTSService): **kwargs: Additional arguments passed to parent `TTSService`. """ - super().__init__(**kwargs) + super().__init__(voice=voice_id, **kwargs) params = params or KokoroTTSService.InputParams() - self._voice_id = voice_id self._lang_code = language_to_kokoro_language(params.language) self._settings = KokoroTTSSettings( @@ -182,7 +181,7 @@ class KokoroTTSService(TTSService): yield TTSStartedFrame(context_id=context_id) stream = self._kokoro.create_stream( - text, voice=self._voice_id, lang=self._lang_code, speed=1.0 + text, voice=self._settings.voice, lang=self._lang_code, speed=1.0 ) async for samples, sample_rate in stream: diff --git a/src/pipecat/services/lmnt/tts.py b/src/pipecat/services/lmnt/tts.py index 94f4a1a9e..ab56f2296 100644 --- a/src/pipecat/services/lmnt/tts.py +++ b/src/pipecat/services/lmnt/tts.py @@ -118,11 +118,11 @@ class LmntTTSService(InterruptibleTTSService): push_stop_frames=True, pause_frame_processing=True, sample_rate=sample_rate, + voice=voice_id, **kwargs, ) self._api_key = api_key - self._voice_id = voice_id self.set_model_name(model) self._settings = LmntTTSSettings( model=model, @@ -235,7 +235,7 @@ class LmntTTSService(InterruptibleTTSService): # Build initial connection message init_msg = { "X-API-Key": self._api_key, - "voice": self._voice_id, + "voice": self._settings.voice, "format": self._settings.format, "sample_rate": self.sample_rate, "language": self._settings.language, diff --git a/src/pipecat/services/minimax/tts.py b/src/pipecat/services/minimax/tts.py index 290439704..ca6cfd7bc 100644 --- a/src/pipecat/services/minimax/tts.py +++ b/src/pipecat/services/minimax/tts.py @@ -227,7 +227,7 @@ class MiniMaxHttpTTSService(TTSService): params: Additional configuration parameters. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) + super().__init__(sample_rate=sample_rate, voice=voice_id, **kwargs) params = params or MiniMaxHttpTTSService.InputParams() @@ -236,7 +236,6 @@ class MiniMaxHttpTTSService(TTSService): self._base_url = f"{base_url}?GroupId={group_id}" self._session = aiohttp_session self._model_name = model - self._voice_id = voice_id # Create voice settings self._settings = MiniMaxTTSSettings( @@ -251,8 +250,7 @@ class MiniMaxHttpTTSService(TTSService): audio_channel=1, ) - # Set voice and model - self._voice_id = voice_id + # Set model self.set_model_name(model) # Add language boost if provided @@ -359,7 +357,7 @@ class MiniMaxHttpTTSService(TTSService): # Build voice_setting dict for API voice_setting = { - "voice_id": self._voice_id, + "voice_id": self._settings.voice, "speed": self._settings.speed, "vol": self._settings.volume, "pitch": self._settings.pitch, diff --git a/src/pipecat/services/neuphonic/tts.py b/src/pipecat/services/neuphonic/tts.py index 5f58eb3b4..ffcbdcd8c 100644 --- a/src/pipecat/services/neuphonic/tts.py +++ b/src/pipecat/services/neuphonic/tts.py @@ -139,6 +139,7 @@ class NeuphonicTTSService(InterruptibleTTSService): push_stop_frames=True, stop_frame_timeout_s=2.0, sample_rate=sample_rate, + voice=voice_id, **kwargs, ) @@ -151,8 +152,8 @@ class NeuphonicTTSService(InterruptibleTTSService): speed=params.speed, encoding=encoding, sampling_rate=sample_rate, + voice=voice_id, ) - self._voice_id = voice_id self._cumulative_time = 0 @@ -288,7 +289,7 @@ class NeuphonicTTSService(InterruptibleTTSService): "speed": self._settings.speed, "encoding": self._settings.encoding, "sampling_rate": self._settings.sampling_rate, - "voice_id": self._voice_id, + "voice_id": self._settings.voice, } query_params = [] @@ -442,7 +443,7 @@ class NeuphonicHttpTTSService(TTSService): params: Additional input parameters for TTS configuration. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) + super().__init__(sample_rate=sample_rate, voice=voice_id, **kwargs) params = params or NeuphonicHttpTTSService.InputParams() diff --git a/src/pipecat/services/openai/tts.py b/src/pipecat/services/openai/tts.py index 2253e369a..764688125 100644 --- a/src/pipecat/services/openai/tts.py +++ b/src/pipecat/services/openai/tts.py @@ -132,10 +132,9 @@ class OpenAITTSService(TTSService): f"OpenAI TTS only supports {self.OPENAI_SAMPLE_RATE}Hz sample rate. " f"Current rate of {sample_rate}Hz may cause issues." ) - super().__init__(sample_rate=sample_rate, **kwargs) + super().__init__(sample_rate=sample_rate, voice=voice, **kwargs) self.set_model_name(model) - self._voice_id = voice self._client = AsyncOpenAI(api_key=api_key, base_url=base_url) if instructions or speed: @@ -196,7 +195,7 @@ class OpenAITTSService(TTSService): create_params = { "input": text, "model": self.model_name, - "voice": VALID_VOICES[self._voice_id], + "voice": VALID_VOICES[self._settings.voice], "response_format": "pcm", } diff --git a/src/pipecat/services/playht/tts.py b/src/pipecat/services/playht/tts.py index b5c683fbe..0242eaac1 100644 --- a/src/pipecat/services/playht/tts.py +++ b/src/pipecat/services/playht/tts.py @@ -173,6 +173,7 @@ class PlayHTTTSService(InterruptibleTTSService): super().__init__( pause_frame_processing=True, sample_rate=sample_rate, + voice=voice_url, **kwargs, ) @@ -205,7 +206,6 @@ class PlayHTTTSService(InterruptibleTTSService): seed=params.seed, ) self.set_model_name(voice_engine) - self._voice_id = voice_url def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -425,7 +425,7 @@ class PlayHTTTSService(InterruptibleTTSService): tts_command = { "text": text, - "voice": self._voice_id, + "voice": self._settings.voice, "voice_engine": self._settings.voice_engine, "output_format": self._settings.output_format, "sample_rate": self.sample_rate, @@ -511,7 +511,7 @@ class PlayHTHttpTTSService(TTSService): params: Additional input parameters for voice customization. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) + super().__init__(sample_rate=sample_rate, voice=voice_url, **kwargs) # Warn about deprecated protocol parameter if explicitly provided if protocol: @@ -556,7 +556,6 @@ class PlayHTHttpTTSService(TTSService): seed=params.seed, ) self.set_model_name(voice_engine) - self._voice_id = voice_url async def start(self, frame: StartFrame): """Start the PlayHT HTTP TTS service. @@ -605,7 +604,7 @@ class PlayHTHttpTTSService(TTSService): # Prepare the request payload payload = { "text": text, - "voice": self._voice_id, + "voice": self._settings.voice, "voice_engine": self._settings.voice_engine, "output_format": self._settings.output_format, "sample_rate": self.sample_rate, diff --git a/src/pipecat/services/resembleai/tts.py b/src/pipecat/services/resembleai/tts.py index f2873a8a1..79fdf54a9 100644 --- a/src/pipecat/services/resembleai/tts.py +++ b/src/pipecat/services/resembleai/tts.py @@ -94,11 +94,11 @@ class ResembleAITTSService(AudioContextWordTTSService): """ super().__init__( sample_rate=sample_rate, + voice=voice_id, **kwargs, ) self._api_key = api_key - self._voice_id = voice_id self._url = url self._settings = ResembleAITTSSettings( voice=voice_id, @@ -126,8 +126,6 @@ class ResembleAITTSService(AudioContextWordTTSService): self._jitter_buffer_bytes = 44100 # ~1000ms at 22050Hz to handle 400ms+ network gaps self._playback_started: dict[str, bool] = {} # Track if we've started playback per request - self._voice_id = voice_id - def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -146,7 +144,7 @@ class ResembleAITTSService(AudioContextWordTTSService): JSON string containing the request payload. """ msg = { - "voice_uuid": self._voice_id, + "voice_uuid": self._settings.voice, "data": text, "binary_response": False, # Use JSON frames to get timestamps "request_id": self._request_id_counter, # ResembleAI only accepts number diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index 87596cefd..99250bce0 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -191,6 +191,7 @@ class RimeTTSService(AudioContextWordTTSService): pause_frame_processing=True, append_trailing_space=True, sample_rate=sample_rate, + voice=voice_id, **kwargs, ) @@ -207,7 +208,6 @@ class RimeTTSService(AudioContextWordTTSService): # Store service configuration self._api_key = api_key self._url = url - self._voice_id = voice_id self._model = model self._settings = RimeTTSSettings( voice=voice_id, @@ -582,7 +582,7 @@ class RimeHttpTTSService(TTSService): params: Additional configuration parameters. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) + super().__init__(sample_rate=sample_rate, voice=voice_id, **kwargs) params = params or RimeHttpTTSService.InputParams() @@ -596,8 +596,8 @@ class RimeHttpTTSService(TTSService): pauseBetweenBrackets=params.pause_between_brackets, phonemizeBetweenBrackets=params.phonemize_between_brackets, inlineSpeedAlpha=params.inline_speed_alpha if params.inline_speed_alpha else NOT_GIVEN, + voice=voice_id, ) - self._voice_id = voice_id self.set_model_name(model) def can_generate_metrics(self) -> bool: @@ -648,7 +648,7 @@ class RimeHttpTTSService(TTSService): if is_given(self._settings.inlineSpeedAlpha): payload["inlineSpeedAlpha"] = self._settings.inlineSpeedAlpha payload["text"] = text - payload["speaker"] = self._voice_id + payload["speaker"] = self._settings.voice payload["modelId"] = self._model_name payload["samplingRate"] = self.sample_rate @@ -762,12 +762,12 @@ class RimeNonJsonTTSService(InterruptibleTTSService): aggregate_sentences=aggregate_sentences, push_stop_frames=True, pause_frame_processing=True, + voice=voice_id, **kwargs, ) params = params or RimeNonJsonTTSService.InputParams() self._api_key = api_key self._url = url - self._voice_id = voice_id self._model = model self._settings = RimeNonJsonTTSSettings( voice=voice_id, diff --git a/src/pipecat/services/sarvam/tts.py b/src/pipecat/services/sarvam/tts.py index 332643fc9..ba93c7c26 100644 --- a/src/pipecat/services/sarvam/tts.py +++ b/src/pipecat/services/sarvam/tts.py @@ -281,7 +281,6 @@ class SarvamTTSSettings(TTSSettings): Parameters: target_language_code: Sarvam language code. - speaker: Voice speaker ID. speech_sample_rate: Audio sample rate as string. enable_preprocessing: Enable text preprocessing. Defaults to False. **Note:** Always enabled for bulbul:v3-beta. @@ -306,7 +305,6 @@ class SarvamTTSSettings(TTSSettings): """ target_language_code: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - speaker: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) speech_sample_rate: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) enable_preprocessing: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) min_buffer_size: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) @@ -460,14 +458,14 @@ class SarvamHttpTTSService(TTSService): if sample_rate is None: sample_rate = self._config.default_sample_rate - super().__init__(sample_rate=sample_rate, **kwargs) - params = params or SarvamHttpTTSService.InputParams() # Set default voice based on model if not specified if voice_id is None: voice_id = self._config.default_speaker + super().__init__(sample_rate=sample_rate, voice=voice_id, **kwargs) + self._api_key = api_key self._base_url = base_url self._session = aiohttp_session @@ -489,6 +487,7 @@ class SarvamHttpTTSService(TTSService): ), pace=pace, model=model, + voice=voice_id, ) # Add parameters based on model support @@ -508,7 +507,6 @@ class SarvamHttpTTSService(TTSService): logger.warning(f"temperature parameter is ignored for {model}") self.set_model_name(model) - self._voice_id = voice_id def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -558,7 +556,7 @@ class SarvamHttpTTSService(TTSService): payload = { "text": text, "target_language_code": self._settings.language, - "speaker": self._voice_id, + "speaker": self._settings.voice, "sample_rate": self.sample_rate, "enable_preprocessing": self._settings.enable_preprocessing, "model": self._model_name, @@ -812,6 +810,10 @@ class SarvamTTSService(InterruptibleTTSService): if sample_rate is None: sample_rate = self._config.default_sample_rate + # Set default voice based on model if not specified + if voice_id is None: + voice_id = self._config.default_speaker + # Initialize parent class first super().__init__( aggregate_sentences=aggregate_sentences, @@ -819,19 +821,15 @@ class SarvamTTSService(InterruptibleTTSService): pause_frame_processing=True, push_stop_frames=True, sample_rate=sample_rate, + voice=voice_id, **kwargs, ) params = params or SarvamTTSService.InputParams() - # Set default voice based on model if not specified - if voice_id is None: - voice_id = self._config.default_speaker - # WebSocket endpoint URL with model query parameter self._websocket_url = f"{url}?model={model}" self._api_key = api_key self.set_model_name(model) - self._voice_id = voice_id # Validate and clamp pace to model's valid range pace = params.pace @@ -845,7 +843,6 @@ class SarvamTTSService(InterruptibleTTSService): target_language_code=( self.language_to_service_language(params.language) if params.language else "en-IN" ), - speaker=voice_id, speech_sample_rate=str(sample_rate), enable_preprocessing=( True if self._config.preprocessing_always_enabled else params.enable_preprocessing @@ -856,6 +853,7 @@ class SarvamTTSService(InterruptibleTTSService): output_audio_bitrate=params.output_audio_bitrate, pace=pace, model=model, + voice=voice_id, ) # Add parameters based on model support @@ -1018,11 +1016,10 @@ class SarvamTTSService(InterruptibleTTSService): """Send initial configuration message.""" if not self._websocket: raise Exception("WebSocket not connected") - self._settings.speaker = self._voice_id # Build config dict for the API config_data = { "target_language_code": self._settings.target_language_code, - "speaker": self._settings.speaker, + "speaker": self._settings.voice, "speech_sample_rate": self._settings.speech_sample_rate, "enable_preprocessing": self._settings.enable_preprocessing, "min_buffer_size": self._settings.min_buffer_size, diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index ebf0e602a..4567e2db3 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -145,6 +145,11 @@ class TTSService(AIService): text_filter: Optional[BaseTextFilter] = None, # Audio transport destination of the generated frames. transport_destination: Optional[str] = None, + # Voice identifier or name to use for speech synthesis + voice: Optional[str] = None, + # Language to use for speech synthesis. This will be translated to a + # service-specific language identifier before being applied + language: Optional[Language] = None, **kwargs, ): """Initialize the TTS service. @@ -178,6 +183,10 @@ class TTSService(AIService): Use `text_filters` instead, which allows multiple filters. transport_destination: Destination for generated audio frames. + voice: Voice identifier or name to use for speech synthesis. + language: Language to use for speech synthesis. This will be + translated to a service-specific language identifier before + being applied. **kwargs: Additional arguments passed to the parent AIService. """ super().__init__(**kwargs) @@ -191,8 +200,9 @@ class TTSService(AIService): self._append_trailing_space: bool = append_trailing_space self._init_sample_rate = sample_rate self._sample_rate = 0 - self._voice_id: str = "" - self._settings = TTSSettings() # Here in case subclass doesn't implement more specific settings (hopefully shouldn't happen) + self._settings = TTSSettings( + voice=voice, language=language + ) # Here in case subclass doesn't implement more specific settings (hopefully shouldn't happen) self._text_aggregator: BaseTextAggregator = text_aggregator or SimpleTextAggregator() if text_aggregator: import warnings @@ -427,11 +437,7 @@ class TTSService(AIService): async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: """Apply a TTS settings update. - Handles ``model`` (via parent) and syncs ``_voice_id`` when voice - changes. Translates language values before applying. Does **not** - call ``set_voice`` or ``set_model`` directly — concrete services - should override this method and handle reconnect logic based on the - returned changed-field dict. + Translates language to service-specific value before applying. Args: update: A TTS settings delta. @@ -447,10 +453,6 @@ class TTSService(AIService): changed = await super()._update_settings(update) - # Keep _voice_id in sync for code that reads it directly - if "voice" in changed: - self._voice_id = self._settings.voice - return changed async def say(self, text: str): diff --git a/src/pipecat/services/xtts/tts.py b/src/pipecat/services/xtts/tts.py index 65aa25e36..5cc91709f 100644 --- a/src/pipecat/services/xtts/tts.py +++ b/src/pipecat/services/xtts/tts.py @@ -111,14 +111,13 @@ class XTTSService(TTSService): sample_rate: Audio sample rate. If None, uses default. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) + super().__init__(sample_rate=sample_rate, voice=voice_id, **kwargs) self._settings = XTTSTTSSettings( voice=voice_id, language=self.language_to_service_language(language), base_url=base_url, ) - self._voice_id = voice_id self._studio_speakers: Optional[Dict[str, Any]] = None self._aiohttp_session = aiohttp_session @@ -180,7 +179,7 @@ class XTTSService(TTSService): logger.error(f"{self} no studio speakers available") return - embeddings = self._studio_speakers[self._voice_id] + embeddings = self._studio_speakers[self._settings.voice] url = self._settings.base_url + "/tts_stream" diff --git a/src/pipecat/utils/tracing/service_decorators.py b/src/pipecat/utils/tracing/service_decorators.py index 42babd5cd..3b23f337a 100644 --- a/src/pipecat/utils/tracing/service_decorators.py +++ b/src/pipecat/utils/tracing/service_decorators.py @@ -190,13 +190,14 @@ def traced_tts(func: Optional[Callable] = None, *, name: Optional[str] = None) - tracer = trace.get_tracer("pipecat") with tracer.start_as_current_span(span_name, context=parent_context) as span: try: + settings = getattr(self, "_settings", {}) add_tts_span_attributes( span=span, service_name=service_class_name, model=getattr(self, "model_name") or "unknown", - voice_id=getattr(self, "_voice_id", "unknown"), + voice_id=getattr(settings, "voice", "unknown"), text=text, - settings=getattr(self, "_settings", {}), + settings=settings, character_count=len(text), operation_name="tts", cartesia_version=getattr(self, "_cartesia_version", None), From f5b86d9cdc616013c28f314ed4541dee2a34c0e3 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Fri, 20 Feb 2026 11:26:28 -0500 Subject: [PATCH 054/189] Actually, revert the change making it so that `STTService` takes `model` and `language` args at init time. It'll be up to the subclasses to append those to `_settings` (or better yet, provide their own service-specific `_settings`). This avoids rocking the boat too too much. --- .claude/skills/cleanup/SKILL.md | 2 +- src/pipecat/services/asyncai/tts.py | 3 +-- src/pipecat/services/aws/tts.py | 2 +- src/pipecat/services/azure/tts.py | 3 +-- src/pipecat/services/camb/tts.py | 2 +- src/pipecat/services/cartesia/tts.py | 3 +-- src/pipecat/services/deepgram/tts.py | 3 +-- src/pipecat/services/elevenlabs/tts.py | 2 -- src/pipecat/services/google/tts.py | 6 +++--- src/pipecat/services/gradium/tts.py | 1 - src/pipecat/services/groq/tts.py | 1 - src/pipecat/services/hathora/tts.py | 1 - src/pipecat/services/hume/tts.py | 1 - src/pipecat/services/inworld/tts.py | 2 -- src/pipecat/services/kokoro/tts.py | 2 +- src/pipecat/services/lmnt/tts.py | 1 - src/pipecat/services/minimax/tts.py | 2 +- src/pipecat/services/neuphonic/tts.py | 3 +-- src/pipecat/services/openai/tts.py | 2 +- src/pipecat/services/playht/tts.py | 3 +-- src/pipecat/services/resembleai/tts.py | 1 - src/pipecat/services/rime/tts.py | 4 +--- src/pipecat/services/sarvam/tts.py | 3 +-- src/pipecat/services/tts_service.py | 11 +---------- src/pipecat/services/xtts/tts.py | 2 +- 25 files changed, 19 insertions(+), 47 deletions(-) diff --git a/.claude/skills/cleanup/SKILL.md b/.claude/skills/cleanup/SKILL.md index 5e699d588..48c5e0ee8 100644 --- a/.claude/skills/cleanup/SKILL.md +++ b/.claude/skills/cleanup/SKILL.md @@ -291,7 +291,7 @@ class NewTTSService(TTSService): voice: Voice identifier to use. **kwargs: Additional arguments passed to the parent service. """ - super().__init__(voice=voice, **kwargs) + super().__init__(**kwargs) self._api_key = api_key ``` diff --git a/src/pipecat/services/asyncai/tts.py b/src/pipecat/services/asyncai/tts.py index 323fa3906..31ce83810 100644 --- a/src/pipecat/services/asyncai/tts.py +++ b/src/pipecat/services/asyncai/tts.py @@ -153,7 +153,6 @@ class AsyncAITTSService(AudioContextTTSService): pause_frame_processing=True, push_stop_frames=True, sample_rate=sample_rate, - voice=voice_id, **kwargs, ) @@ -497,7 +496,7 @@ class AsyncAIHttpTTSService(TTSService): params: Additional input parameters for voice customization. **kwargs: Additional arguments passed to the parent TTSService. """ - super().__init__(sample_rate=sample_rate, voice=voice_id, **kwargs) + super().__init__(sample_rate=sample_rate, **kwargs) params = params or AsyncAIHttpTTSService.InputParams() diff --git a/src/pipecat/services/aws/tts.py b/src/pipecat/services/aws/tts.py index a277bc0b2..4e071465d 100644 --- a/src/pipecat/services/aws/tts.py +++ b/src/pipecat/services/aws/tts.py @@ -195,7 +195,7 @@ class AWSPollyTTSService(TTSService): params: Additional input parameters for voice customization. **kwargs: Additional arguments passed to parent TTSService class. """ - super().__init__(sample_rate=sample_rate, voice=voice_id, **kwargs) + super().__init__(sample_rate=sample_rate, **kwargs) params = params or AWSPollyTTSService.InputParams() diff --git a/src/pipecat/services/azure/tts.py b/src/pipecat/services/azure/tts.py index 2d4c01dc9..f1bf9d400 100644 --- a/src/pipecat/services/azure/tts.py +++ b/src/pipecat/services/azure/tts.py @@ -295,7 +295,6 @@ class AzureTTSService(WordTTSService, AzureBaseTTSService): push_stop_frames=True, pause_frame_processing=True, sample_rate=sample_rate, - voice=voice, **kwargs, ) @@ -734,7 +733,7 @@ class AzureHttpTTSService(TTSService, AzureBaseTTSService): params: Voice and synthesis parameters configuration. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, voice=voice, **kwargs) + super().__init__(sample_rate=sample_rate, **kwargs) # Initialize Azure-specific functionality from mixin self._init_azure_base(api_key=api_key, region=region, voice=voice, params=params) diff --git a/src/pipecat/services/camb/tts.py b/src/pipecat/services/camb/tts.py index ec0853424..411b4cabf 100644 --- a/src/pipecat/services/camb/tts.py +++ b/src/pipecat/services/camb/tts.py @@ -213,7 +213,7 @@ class CambTTSService(TTSService): params: Additional voice parameters. If None, uses defaults. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, voice=voice_id, **kwargs) + super().__init__(sample_rate=sample_rate, **kwargs) self._api_key = api_key self._timeout = timeout diff --git a/src/pipecat/services/cartesia/tts.py b/src/pipecat/services/cartesia/tts.py index a16aa2b39..daddfa6c8 100644 --- a/src/pipecat/services/cartesia/tts.py +++ b/src/pipecat/services/cartesia/tts.py @@ -313,7 +313,6 @@ class CartesiaTTSService(AudioContextWordTTSService): pause_frame_processing=True, sample_rate=sample_rate, text_aggregator=text_aggregator, - voice=voice_id, **kwargs, ) @@ -721,7 +720,7 @@ class CartesiaHttpTTSService(TTSService): params: Additional input parameters for voice customization. **kwargs: Additional arguments passed to the parent TTSService. """ - super().__init__(sample_rate=sample_rate, voice=voice_id, **kwargs) + super().__init__(sample_rate=sample_rate, **kwargs) params = params or CartesiaHttpTTSService.InputParams() diff --git a/src/pipecat/services/deepgram/tts.py b/src/pipecat/services/deepgram/tts.py index a8b46ce7e..23bfecb03 100644 --- a/src/pipecat/services/deepgram/tts.py +++ b/src/pipecat/services/deepgram/tts.py @@ -101,7 +101,6 @@ class DeepgramTTSService(WebsocketTTSService): pause_frame_processing=True, push_stop_frames=True, append_trailing_space=True, - voice=voice, **kwargs, ) @@ -388,7 +387,7 @@ class DeepgramHttpTTSService(TTSService): encoding: Audio encoding format. Defaults to "linear16". **kwargs: Additional arguments passed to parent TTSService class. """ - super().__init__(sample_rate=sample_rate, voice=voice, **kwargs) + super().__init__(sample_rate=sample_rate, **kwargs) self._api_key = api_key self._session = aiohttp_session diff --git a/src/pipecat/services/elevenlabs/tts.py b/src/pipecat/services/elevenlabs/tts.py index 73e9027d3..7de346826 100644 --- a/src/pipecat/services/elevenlabs/tts.py +++ b/src/pipecat/services/elevenlabs/tts.py @@ -400,7 +400,6 @@ class ElevenLabsTTSService(AudioContextWordTTSService): push_stop_frames=True, pause_frame_processing=True, sample_rate=sample_rate, - voice=voice_id, **kwargs, ) @@ -906,7 +905,6 @@ class ElevenLabsHttpTTSService(WordTTSService): push_text_frames=False, push_stop_frames=True, sample_rate=sample_rate, - voice=voice_id, **kwargs, ) diff --git a/src/pipecat/services/google/tts.py b/src/pipecat/services/google/tts.py index 33fa1b2a8..1103c69e4 100644 --- a/src/pipecat/services/google/tts.py +++ b/src/pipecat/services/google/tts.py @@ -602,7 +602,7 @@ class GoogleHttpTTSService(TTSService): params: Voice customization parameters including pitch, rate, volume, etc. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, voice=voice_id, **kwargs) + super().__init__(sample_rate=sample_rate, **kwargs) params = params or GoogleHttpTTSService.InputParams() @@ -1015,7 +1015,7 @@ class GoogleTTSService(GoogleBaseTTSService): params: Language configuration parameters. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, voice=voice_id, **kwargs) + super().__init__(sample_rate=sample_rate, **kwargs) params = params or GoogleTTSService.InputParams() @@ -1220,7 +1220,7 @@ class GeminiTTSService(GoogleBaseTTSService): f"Google TTS only supports {self.GOOGLE_SAMPLE_RATE}Hz sample rate. " f"Current rate of {sample_rate}Hz may cause issues." ) - super().__init__(sample_rate=sample_rate, voice=voice_id, **kwargs) + super().__init__(sample_rate=sample_rate, **kwargs) params = params or GeminiTTSService.InputParams() diff --git a/src/pipecat/services/gradium/tts.py b/src/pipecat/services/gradium/tts.py index e3d855c5c..85e77f057 100644 --- a/src/pipecat/services/gradium/tts.py +++ b/src/pipecat/services/gradium/tts.py @@ -91,7 +91,6 @@ class GradiumTTSService(InterruptibleWordTTSService): push_stop_frames=True, pause_frame_processing=True, sample_rate=SAMPLE_RATE, - voice=voice_id, **kwargs, ) diff --git a/src/pipecat/services/groq/tts.py b/src/pipecat/services/groq/tts.py index 78d744461..5c3eab636 100644 --- a/src/pipecat/services/groq/tts.py +++ b/src/pipecat/services/groq/tts.py @@ -102,7 +102,6 @@ class GroqTTSService(TTSService): super().__init__( pause_frame_processing=True, sample_rate=sample_rate, - voice=voice_id, **kwargs, ) diff --git a/src/pipecat/services/hathora/tts.py b/src/pipecat/services/hathora/tts.py index 6e75feeca..9778a2f14 100644 --- a/src/pipecat/services/hathora/tts.py +++ b/src/pipecat/services/hathora/tts.py @@ -109,7 +109,6 @@ class HathoraTTSService(TTSService): """ super().__init__( sample_rate=sample_rate, - voice=voice_id, **kwargs, ) self._model = model diff --git a/src/pipecat/services/hume/tts.py b/src/pipecat/services/hume/tts.py index a52922787..d15f13ce1 100644 --- a/src/pipecat/services/hume/tts.py +++ b/src/pipecat/services/hume/tts.py @@ -126,7 +126,6 @@ class HumeTTSService(WordTTSService): sample_rate=sample_rate, push_text_frames=False, push_stop_frames=True, - voice=voice_id, **kwargs, ) diff --git a/src/pipecat/services/inworld/tts.py b/src/pipecat/services/inworld/tts.py index 34dc34933..1b6dee46f 100644 --- a/src/pipecat/services/inworld/tts.py +++ b/src/pipecat/services/inworld/tts.py @@ -144,7 +144,6 @@ class InworldHttpTTSService(WordTTSService): push_text_frames=False, push_stop_frames=True, sample_rate=sample_rate, - voice=voice_id, **kwargs, ) @@ -519,7 +518,6 @@ class InworldTTSService(AudioContextWordTTSService): sample_rate=sample_rate, aggregate_sentences=aggregate_sentences, append_trailing_space=append_trailing_space, - voice=voice_id, **kwargs, ) diff --git a/src/pipecat/services/kokoro/tts.py b/src/pipecat/services/kokoro/tts.py index 9f2aac368..6e848ae87 100644 --- a/src/pipecat/services/kokoro/tts.py +++ b/src/pipecat/services/kokoro/tts.py @@ -137,7 +137,7 @@ class KokoroTTSService(TTSService): **kwargs: Additional arguments passed to parent `TTSService`. """ - super().__init__(voice=voice_id, **kwargs) + super().__init__(**kwargs) params = params or KokoroTTSService.InputParams() diff --git a/src/pipecat/services/lmnt/tts.py b/src/pipecat/services/lmnt/tts.py index ab56f2296..71ffb4f5f 100644 --- a/src/pipecat/services/lmnt/tts.py +++ b/src/pipecat/services/lmnt/tts.py @@ -118,7 +118,6 @@ class LmntTTSService(InterruptibleTTSService): push_stop_frames=True, pause_frame_processing=True, sample_rate=sample_rate, - voice=voice_id, **kwargs, ) diff --git a/src/pipecat/services/minimax/tts.py b/src/pipecat/services/minimax/tts.py index ca6cfd7bc..507febf2d 100644 --- a/src/pipecat/services/minimax/tts.py +++ b/src/pipecat/services/minimax/tts.py @@ -227,7 +227,7 @@ class MiniMaxHttpTTSService(TTSService): params: Additional configuration parameters. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, voice=voice_id, **kwargs) + super().__init__(sample_rate=sample_rate, **kwargs) params = params or MiniMaxHttpTTSService.InputParams() diff --git a/src/pipecat/services/neuphonic/tts.py b/src/pipecat/services/neuphonic/tts.py index ffcbdcd8c..dd2360e4c 100644 --- a/src/pipecat/services/neuphonic/tts.py +++ b/src/pipecat/services/neuphonic/tts.py @@ -139,7 +139,6 @@ class NeuphonicTTSService(InterruptibleTTSService): push_stop_frames=True, stop_frame_timeout_s=2.0, sample_rate=sample_rate, - voice=voice_id, **kwargs, ) @@ -443,7 +442,7 @@ class NeuphonicHttpTTSService(TTSService): params: Additional input parameters for TTS configuration. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, voice=voice_id, **kwargs) + super().__init__(sample_rate=sample_rate, **kwargs) params = params or NeuphonicHttpTTSService.InputParams() diff --git a/src/pipecat/services/openai/tts.py b/src/pipecat/services/openai/tts.py index 764688125..853647f7f 100644 --- a/src/pipecat/services/openai/tts.py +++ b/src/pipecat/services/openai/tts.py @@ -132,7 +132,7 @@ class OpenAITTSService(TTSService): f"OpenAI TTS only supports {self.OPENAI_SAMPLE_RATE}Hz sample rate. " f"Current rate of {sample_rate}Hz may cause issues." ) - super().__init__(sample_rate=sample_rate, voice=voice, **kwargs) + super().__init__(sample_rate=sample_rate, **kwargs) self.set_model_name(model) self._client = AsyncOpenAI(api_key=api_key, base_url=base_url) diff --git a/src/pipecat/services/playht/tts.py b/src/pipecat/services/playht/tts.py index 0242eaac1..630351164 100644 --- a/src/pipecat/services/playht/tts.py +++ b/src/pipecat/services/playht/tts.py @@ -173,7 +173,6 @@ class PlayHTTTSService(InterruptibleTTSService): super().__init__( pause_frame_processing=True, sample_rate=sample_rate, - voice=voice_url, **kwargs, ) @@ -511,7 +510,7 @@ class PlayHTHttpTTSService(TTSService): params: Additional input parameters for voice customization. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, voice=voice_url, **kwargs) + super().__init__(sample_rate=sample_rate, **kwargs) # Warn about deprecated protocol parameter if explicitly provided if protocol: diff --git a/src/pipecat/services/resembleai/tts.py b/src/pipecat/services/resembleai/tts.py index 79fdf54a9..8e436923b 100644 --- a/src/pipecat/services/resembleai/tts.py +++ b/src/pipecat/services/resembleai/tts.py @@ -94,7 +94,6 @@ class ResembleAITTSService(AudioContextWordTTSService): """ super().__init__( sample_rate=sample_rate, - voice=voice_id, **kwargs, ) diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index 99250bce0..2f006efc7 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -191,7 +191,6 @@ class RimeTTSService(AudioContextWordTTSService): pause_frame_processing=True, append_trailing_space=True, sample_rate=sample_rate, - voice=voice_id, **kwargs, ) @@ -582,7 +581,7 @@ class RimeHttpTTSService(TTSService): params: Additional configuration parameters. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, voice=voice_id, **kwargs) + super().__init__(sample_rate=sample_rate, **kwargs) params = params or RimeHttpTTSService.InputParams() @@ -762,7 +761,6 @@ class RimeNonJsonTTSService(InterruptibleTTSService): aggregate_sentences=aggregate_sentences, push_stop_frames=True, pause_frame_processing=True, - voice=voice_id, **kwargs, ) params = params or RimeNonJsonTTSService.InputParams() diff --git a/src/pipecat/services/sarvam/tts.py b/src/pipecat/services/sarvam/tts.py index ba93c7c26..cbccd0130 100644 --- a/src/pipecat/services/sarvam/tts.py +++ b/src/pipecat/services/sarvam/tts.py @@ -464,7 +464,7 @@ class SarvamHttpTTSService(TTSService): if voice_id is None: voice_id = self._config.default_speaker - super().__init__(sample_rate=sample_rate, voice=voice_id, **kwargs) + super().__init__(sample_rate=sample_rate, **kwargs) self._api_key = api_key self._base_url = base_url @@ -821,7 +821,6 @@ class SarvamTTSService(InterruptibleTTSService): pause_frame_processing=True, push_stop_frames=True, sample_rate=sample_rate, - voice=voice_id, **kwargs, ) params = params or SarvamTTSService.InputParams() diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index 4567e2db3..e5602d469 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -145,11 +145,6 @@ class TTSService(AIService): text_filter: Optional[BaseTextFilter] = None, # Audio transport destination of the generated frames. transport_destination: Optional[str] = None, - # Voice identifier or name to use for speech synthesis - voice: Optional[str] = None, - # Language to use for speech synthesis. This will be translated to a - # service-specific language identifier before being applied - language: Optional[Language] = None, **kwargs, ): """Initialize the TTS service. @@ -183,10 +178,6 @@ class TTSService(AIService): Use `text_filters` instead, which allows multiple filters. transport_destination: Destination for generated audio frames. - voice: Voice identifier or name to use for speech synthesis. - language: Language to use for speech synthesis. This will be - translated to a service-specific language identifier before - being applied. **kwargs: Additional arguments passed to the parent AIService. """ super().__init__(**kwargs) @@ -201,7 +192,7 @@ class TTSService(AIService): self._init_sample_rate = sample_rate self._sample_rate = 0 self._settings = TTSSettings( - voice=voice, language=language + voice="" ) # Here in case subclass doesn't implement more specific settings (hopefully shouldn't happen) self._text_aggregator: BaseTextAggregator = text_aggregator or SimpleTextAggregator() if text_aggregator: diff --git a/src/pipecat/services/xtts/tts.py b/src/pipecat/services/xtts/tts.py index 5cc91709f..ba2eb4fc2 100644 --- a/src/pipecat/services/xtts/tts.py +++ b/src/pipecat/services/xtts/tts.py @@ -111,7 +111,7 @@ class XTTSService(TTSService): sample_rate: Audio sample rate. If None, uses default. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, voice=voice_id, **kwargs) + super().__init__(sample_rate=sample_rate, **kwargs) self._settings = XTTSTTSSettings( voice=voice_id, From 29e2a861dc35ede003854fbc2c0f3bac5794b2f2 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Fri, 20 Feb 2026 11:42:24 -0500 Subject: [PATCH 055/189] Update `AIService.set_model_name` to `AIService._sync_model_name_to_metrics` to: - indicate clearly that it's not meant for public use - make it clear the `self._settings` is the single source of truth for model information - set the stage for an upcoming change where `AIService` subclasses won't have to ever worry about explicitly calling an `AIService` method to sync model name to metrics Across all services, switch from accessing `self._model_name` or `self.model_name` in favor of `self._settings.model`. --- src/pipecat/services/ai_service.py | 26 ++++++------- src/pipecat/services/anthropic/llm.py | 6 +-- src/pipecat/services/asyncai/tts.py | 7 ++-- src/pipecat/services/aws/llm.py | 6 +-- src/pipecat/services/aws/nova_sonic/llm.py | 2 +- src/pipecat/services/azure/image.py | 3 +- src/pipecat/services/camb/tts.py | 9 ++--- src/pipecat/services/cartesia/stt.py | 2 +- src/pipecat/services/cartesia/tts.py | 11 +++--- src/pipecat/services/cerebras/llm.py | 2 +- src/pipecat/services/deepgram/flux/stt.py | 2 +- src/pipecat/services/deepgram/stt.py | 4 +- .../services/deepgram/stt_sagemaker.py | 4 +- src/pipecat/services/deepseek/llm.py | 2 +- src/pipecat/services/elevenlabs/stt.py | 4 +- src/pipecat/services/elevenlabs/tts.py | 10 ++--- src/pipecat/services/fal/image.py | 5 ++- src/pipecat/services/fireworks/llm.py | 2 +- src/pipecat/services/fish/tts.py | 6 +-- src/pipecat/services/gladia/stt.py | 4 +- .../services/google/gemini_live/llm.py | 6 ++- src/pipecat/services/google/image.py | 4 +- src/pipecat/services/google/llm.py | 10 ++--- src/pipecat/services/groq/stt.py | 2 +- src/pipecat/services/groq/tts.py | 4 +- src/pipecat/services/hathora/stt.py | 3 +- src/pipecat/services/hathora/tts.py | 3 +- src/pipecat/services/inworld/tts.py | 4 +- src/pipecat/services/lmnt/tts.py | 4 +- src/pipecat/services/minimax/tts.py | 15 +------- src/pipecat/services/mistral/llm.py | 2 +- src/pipecat/services/moondream/vision.py | 3 +- src/pipecat/services/nvidia/stt.py | 13 +++---- src/pipecat/services/nvidia/tts.py | 5 ++- src/pipecat/services/openai/base_llm.py | 4 +- src/pipecat/services/openai/image.py | 5 ++- src/pipecat/services/openai/realtime/llm.py | 2 +- src/pipecat/services/openai/stt.py | 18 ++++----- src/pipecat/services/openai/tts.py | 4 +- .../services/openai_realtime_beta/openai.py | 2 +- src/pipecat/services/openrouter/llm.py | 3 +- src/pipecat/services/perplexity/llm.py | 2 +- src/pipecat/services/playht/tts.py | 5 ++- src/pipecat/services/rime/tts.py | 23 +++++------ src/pipecat/services/sambanova/llm.py | 2 +- src/pipecat/services/sambanova/stt.py | 2 +- src/pipecat/services/sarvam/stt.py | 18 +++++---- src/pipecat/services/sarvam/tts.py | 7 ++-- src/pipecat/services/soniox/stt.py | 6 +-- src/pipecat/services/speechmatics/stt.py | 2 +- src/pipecat/services/stt_service.py | 2 +- src/pipecat/services/whisper/base_stt.py | 2 +- src/pipecat/services/whisper/stt.py | 8 ++-- .../utils/tracing/service_decorators.py | 38 +++++++++++-------- 54 files changed, 173 insertions(+), 177 deletions(-) diff --git a/src/pipecat/services/ai_service.py b/src/pipecat/services/ai_service.py index ec78549c2..61ed7d456 100644 --- a/src/pipecat/services/ai_service.py +++ b/src/pipecat/services/ai_service.py @@ -42,27 +42,25 @@ class AIService(FrameProcessor): **kwargs: Additional arguments passed to the parent FrameProcessor. """ super().__init__(**kwargs) - self._model_name: str = "" - self._settings: ServiceSettings = ServiceSettings() + self._settings: ServiceSettings = ServiceSettings(model="") self._session_properties: Dict[str, Any] = {} - @property - def model_name(self) -> str: - """Get the current model name. + def _sync_model_name_to_metrics(self): + """Sync the current AI model name (in `self._settings.model`) for usage in metrics. - Returns: - The name of the AI model being used. - """ - return self._model_name + We don't store model name here because there's already a single source + of truth for it in `self._settings.model`. This method is just for + syncing the model name to the metrics data. - def set_model_name(self, model: str): - """Set the AI model name and update metrics. + TODO: as a next step we should make it so that service classes pass + model into `super().__init__` and `AIService` can be responsible for + syncing its initial value to metrics, just as it's responsible for + syncing any updates to its value to metrics via `_update_settings`. Args: model: The name of the AI model to use. """ - self._model_name = model - self.set_core_metrics_data(MetricsData(processor=self.name, model=self._model_name)) + self.set_core_metrics_data(MetricsData(processor=self.name, model=self._settings.model)) async def start(self, frame: StartFrame): """Start the AI service. @@ -117,7 +115,7 @@ class AIService(FrameProcessor): changed = self._settings.apply_update(update) if "model" in changed: - self.set_model_name(self._settings.model) + self._sync_model_name_to_metrics() if changed: logger.info(f"{self.name}: updated settings fields: {set(changed)}") diff --git a/src/pipecat/services/anthropic/llm.py b/src/pipecat/services/anthropic/llm.py index 68ebf7ab1..c7f27c0d1 100644 --- a/src/pipecat/services/anthropic/llm.py +++ b/src/pipecat/services/anthropic/llm.py @@ -237,7 +237,6 @@ class AnthropicLLMService(LLMService): self._client = client or AsyncAnthropic( api_key=api_key ) # if the client is provided, use it and remove it, otherwise create a new one - self.set_model_name(model) self._retry_timeout_secs = retry_timeout_secs self._retry_on_timeout = retry_on_timeout self._settings = AnthropicLLMSettings( @@ -258,6 +257,7 @@ class AnthropicLLMService(LLMService): thinking=params.thinking, extra=params.extra if isinstance(params.extra, dict) else {}, ) + self._sync_model_name_to_metrics() def can_generate_metrics(self) -> bool: """Check if this service can generate usage metrics. @@ -324,7 +324,7 @@ class AnthropicLLMService(LLMService): # Build params using the same method as streaming completions params = { - "model": self.model_name, + "model": self._settings.model, "max_tokens": max_tokens if max_tokens is not None else self._settings.max_tokens, "stream": False, "temperature": self._settings.temperature, @@ -438,7 +438,7 @@ class AnthropicLLMService(LLMService): await self.start_ttfb_metrics() params = { - "model": self.model_name, + "model": self._settings.model, "max_tokens": self._settings.max_tokens, "stream": True, "temperature": self._settings.temperature, diff --git a/src/pipecat/services/asyncai/tts.py b/src/pipecat/services/asyncai/tts.py index 31ce83810..36b4ab4de 100644 --- a/src/pipecat/services/asyncai/tts.py +++ b/src/pipecat/services/asyncai/tts.py @@ -171,8 +171,7 @@ class AsyncAITTSService(AudioContextTTSService): if params.language else None, ) - - self.set_model_name(model) + self._sync_model_name_to_metrics() self._receive_task = None self._keepalive_task = None @@ -513,7 +512,7 @@ class AsyncAIHttpTTSService(TTSService): if params.language else None, ) - self.set_model_name(model) + self._sync_model_name_to_metrics() self._session = aiohttp_session @@ -562,7 +561,7 @@ class AsyncAIHttpTTSService(TTSService): voice_config = {"mode": "id", "id": self._settings.voice} await self.start_ttfb_metrics() payload = { - "model_id": self._model_name, + "model_id": self._settings.model, "transcript": text, "voice": voice_config, "output_format": { diff --git a/src/pipecat/services/aws/llm.py b/src/pipecat/services/aws/llm.py index b39d518ec..de994463b 100644 --- a/src/pipecat/services/aws/llm.py +++ b/src/pipecat/services/aws/llm.py @@ -820,7 +820,6 @@ class AWSBedrockLLMService(LLMService): "config": client_config, } - self.set_model_name(model) self._retry_timeout_secs = retry_timeout_secs self._retry_on_timeout = retry_on_timeout self._settings = AWSBedrockLLMSettings( @@ -833,6 +832,7 @@ class AWSBedrockLLMService(LLMService): if isinstance(params.additional_model_request_fields, dict) else {}, ) + self._sync_model_name_to_metrics() logger.info(f"Using AWS Bedrock model: {model}") @@ -895,7 +895,7 @@ class AWSBedrockLLMService(LLMService): inference_config["maxTokens"] = max_tokens request_params = { - "modelId": self.model_name, + "modelId": self._settings.model, "messages": messages, "additionalModelRequestFields": self._settings.additional_model_request_fields, } @@ -1052,7 +1052,7 @@ class AWSBedrockLLMService(LLMService): # Prepare request parameters request_params = { - "modelId": self.model_name, + "modelId": self._settings.model, "messages": messages, "additionalModelRequestFields": self._settings.additional_model_request_fields, } diff --git a/src/pipecat/services/aws/nova_sonic/llm.py b/src/pipecat/services/aws/nova_sonic/llm.py index 7d2b9c05e..eba5cc21b 100644 --- a/src/pipecat/services/aws/nova_sonic/llm.py +++ b/src/pipecat/services/aws/nova_sonic/llm.py @@ -269,7 +269,7 @@ class AWSNovaSonicLLMService(LLMService): top_p=params.top_p, endpointing_sensitivity=params.endpointing_sensitivity, ) - self.set_model_name(model) + self._sync_model_name_to_metrics() # Audio I/O config (hardware settings, not runtime-tunable) self._input_sample_rate = params.input_sample_rate diff --git a/src/pipecat/services/azure/image.py b/src/pipecat/services/azure/image.py index 2bddf6c43..f5ce4a9f1 100644 --- a/src/pipecat/services/azure/image.py +++ b/src/pipecat/services/azure/image.py @@ -54,7 +54,8 @@ class AzureImageGenServiceREST(ImageGenService): self._api_key = api_key self._azure_endpoint = endpoint self._api_version = api_version - self.set_model_name(model) + self._settings.model = model + self._sync_model_name_to_metrics() self._image_size = image_size self._aiohttp_session = aiohttp_session diff --git a/src/pipecat/services/camb/tts.py b/src/pipecat/services/camb/tts.py index 411b4cabf..a2887df28 100644 --- a/src/pipecat/services/camb/tts.py +++ b/src/pipecat/services/camb/tts.py @@ -236,8 +236,7 @@ class CambTTSService(TTSService): ), user_instructions=params.user_instructions, ) - - self.set_model_name(model) + self._sync_model_name_to_metrics() self._client = None @@ -272,7 +271,7 @@ class CambTTSService(TTSService): # Use model-specific sample rate if not explicitly specified if not self._init_sample_rate: - self._sample_rate = MODEL_SAMPLE_RATES.get(self.model_name, 22050) + self._sample_rate = MODEL_SAMPLE_RATES.get(self._settings.model, 22050) @traced_tts async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: @@ -300,12 +299,12 @@ class CambTTSService(TTSService): "text": text, "voice_id": self._settings.voice, "language": self._settings.language, - "speech_model": self.model_name, + "speech_model": self._settings.model, "output_configuration": StreamTtsOutputConfiguration(format="pcm_s16le"), } # Add user instructions if using mars-instruct model - if self._model_name == "mars-instruct" and self._settings.user_instructions: + if self._settings.model == "mars-instruct" and self._settings.user_instructions: tts_kwargs["user_instructions"] = self._settings.user_instructions await self.start_tts_usage_metrics(text) diff --git a/src/pipecat/services/cartesia/stt.py b/src/pipecat/services/cartesia/stt.py index e3270936b..6a30f9a53 100644 --- a/src/pipecat/services/cartesia/stt.py +++ b/src/pipecat/services/cartesia/stt.py @@ -201,7 +201,7 @@ class CartesiaSTTService(WebsocketSTTService): language=merged_options.get("language"), encoding=merged_options.get("encoding", "pcm_s16le"), ) - self.set_model_name(merged_options["model"]) + self._sync_model_name_to_metrics() self._api_key = api_key self._base_url = base_url or "api.cartesia.ai" self._receive_task = None diff --git a/src/pipecat/services/cartesia/tts.py b/src/pipecat/services/cartesia/tts.py index daddfa6c8..1185a2305 100644 --- a/src/pipecat/services/cartesia/tts.py +++ b/src/pipecat/services/cartesia/tts.py @@ -330,6 +330,7 @@ class CartesiaTTSService(AudioContextWordTTSService): self._cartesia_version = cartesia_version self._url = url self._settings = CartesiaTTSSettings( + model=model, output_container=container, output_encoding=encoding, output_sample_rate=0, @@ -342,7 +343,7 @@ class CartesiaTTSService(AudioContextWordTTSService): pronunciation_dict_id=params.pronunciation_dict_id, voice=voice_id, ) - self.set_model_name(model) + self._sync_model_name_to_metrics() self._context_id = None self._receive_task = None @@ -457,7 +458,7 @@ class CartesiaTTSService(AudioContextWordTTSService): "transcript": text, "continue": continue_transcript, "context_id": self._context_id, - "model_id": self.model_name, + "model_id": self._settings.model, "voice": voice_config, "output_format": { "container": self._settings.output_container, @@ -465,7 +466,7 @@ class CartesiaTTSService(AudioContextWordTTSService): "sample_rate": self._settings.output_sample_rate, }, "add_timestamps": add_timestamps, - "use_original_timestamps": False if self.model_name == "sonic" else True, + "use_original_timestamps": False if self._settings.model == "sonic" else True, } if is_given(self._settings.language) and self._settings.language: @@ -741,7 +742,7 @@ class CartesiaHttpTTSService(TTSService): generation_config=params.generation_config, pronunciation_dict_id=params.pronunciation_dict_id, ) - self.set_model_name(model) + self._sync_model_name_to_metrics() self._client = AsyncCartesia( api_key=api_key, @@ -829,7 +830,7 @@ class CartesiaHttpTTSService(TTSService): } payload = { - "model_id": self._model_name, + "model_id": self._settings.model, "transcript": text, "voice": voice_config, "output_format": output_format, diff --git a/src/pipecat/services/cerebras/llm.py b/src/pipecat/services/cerebras/llm.py index 01a8165f8..e1ecceef7 100644 --- a/src/pipecat/services/cerebras/llm.py +++ b/src/pipecat/services/cerebras/llm.py @@ -66,7 +66,7 @@ class CerebrasLLMService(OpenAILLMService): Dictionary of parameters for the chat completion request. """ params = { - "model": self.model_name, + "model": self._settings.model, "stream": True, "seed": self._settings.seed, "temperature": self._settings.temperature, diff --git a/src/pipecat/services/deepgram/flux/stt.py b/src/pipecat/services/deepgram/flux/stt.py index e82fc4dd8..3b6ede086 100644 --- a/src/pipecat/services/deepgram/flux/stt.py +++ b/src/pipecat/services/deepgram/flux/stt.py @@ -212,7 +212,7 @@ class DeepgramFluxSTTService(WebsocketSTTService): tag=params.tag or [], min_confidence=params.min_confidence, ) - self.set_model_name(model) + self._sync_model_name_to_metrics() self._api_key = api_key self._url = url self._should_interrupt = should_interrupt diff --git a/src/pipecat/services/deepgram/stt.py b/src/pipecat/services/deepgram/stt.py index 8d4a72fc3..6e5955450 100644 --- a/src/pipecat/services/deepgram/stt.py +++ b/src/pipecat/services/deepgram/stt.py @@ -143,13 +143,13 @@ class DeepgramSTTService(STTService): if "language" in merged_options and isinstance(merged_options["language"], Language): merged_options["language"] = merged_options["language"].value - self.set_model_name(merged_options["model"]) merged_live_options = LiveOptions(**merged_options) self._settings = DeepgramSTTSettings( model=merged_options.get("model"), language=merged_options.get("language"), live_options=merged_live_options, ) + self._sync_model_name_to_metrics() self._addons = addons self._should_interrupt = should_interrupt @@ -225,7 +225,7 @@ class DeepgramSTTService(STTService): elif "live_options" in changed and self._settings.live_options.model is not None: # Only live_options was given → pull model up. self._settings.model = self._settings.live_options.model - self.set_model_name(self._settings.model) + self._sync_model_name_to_metrics() # --- Sync language ----------------------------------------------- if language_given: diff --git a/src/pipecat/services/deepgram/stt_sagemaker.py b/src/pipecat/services/deepgram/stt_sagemaker.py index 3184bf7f8..71fdf4417 100644 --- a/src/pipecat/services/deepgram/stt_sagemaker.py +++ b/src/pipecat/services/deepgram/stt_sagemaker.py @@ -143,13 +143,13 @@ class DeepgramSageMakerSTTService(STTService): if "language" in merged_options and isinstance(merged_options["language"], Language): merged_options["language"] = merged_options["language"].value - self.set_model_name(merged_options["model"]) merged_live_options = LiveOptions(**merged_options) self._settings = DeepgramSageMakerSTTSettings( model=merged_options.get("model"), language=merged_options.get("language"), live_options=merged_live_options, ) + self._sync_model_name_to_metrics() self._client: Optional[SageMakerBidiClient] = None self._response_task: Optional[asyncio.Task] = None @@ -193,7 +193,7 @@ class DeepgramSageMakerSTTService(STTService): elif "live_options" in changed and self._settings.live_options.model is not None: # Only live_options was given → pull model up. self._settings.model = self._settings.live_options.model - self.set_model_name(self._settings.model) + self._sync_model_name_to_metrics() # --- Sync language ----------------------------------------------- if language_given: diff --git a/src/pipecat/services/deepseek/llm.py b/src/pipecat/services/deepseek/llm.py index 806dce13d..70318c9ba 100644 --- a/src/pipecat/services/deepseek/llm.py +++ b/src/pipecat/services/deepseek/llm.py @@ -65,7 +65,7 @@ class DeepSeekLLMService(OpenAILLMService): Dictionary of parameters for the chat completion request. """ params = { - "model": self.model_name, + "model": self._settings.model, "stream": True, "stream_options": {"include_usage": True}, "frequency_penalty": self._settings.frequency_penalty, diff --git a/src/pipecat/services/elevenlabs/stt.py b/src/pipecat/services/elevenlabs/stt.py index c3e6b29e7..5ff91f597 100644 --- a/src/pipecat/services/elevenlabs/stt.py +++ b/src/pipecat/services/elevenlabs/stt.py @@ -281,7 +281,7 @@ class ElevenLabsSTTService(SegmentedSTTService): else "eng", tag_audio_events=params.tag_audio_events, ) - self.set_model_name(model) + self._sync_model_name_to_metrics() def can_generate_metrics(self) -> bool: """Check if the service can generate processing metrics. @@ -531,7 +531,7 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): enable_logging=params.enable_logging, include_language_detection=params.include_language_detection, ) - self.set_model_name(model) + self._sync_model_name_to_metrics() def can_generate_metrics(self) -> bool: """Check if the service can generate processing metrics. diff --git a/src/pipecat/services/elevenlabs/tts.py b/src/pipecat/services/elevenlabs/tts.py index 7de346826..55875cb69 100644 --- a/src/pipecat/services/elevenlabs/tts.py +++ b/src/pipecat/services/elevenlabs/tts.py @@ -423,7 +423,7 @@ class ElevenLabsTTSService(AudioContextWordTTSService): enable_logging=params.enable_logging, apply_text_normalization=params.apply_text_normalization, ) - self.set_model_name(model) + self._sync_model_name_to_metrics() self._output_format = "" # initialized in start() self._voice_settings = self._set_voice_settings() @@ -607,7 +607,7 @@ class ElevenLabsTTSService(AudioContextWordTTSService): logger.debug("Connecting to ElevenLabs") voice_id = self._settings.voice - model = self.model_name + model = self._settings.model output_format = self._output_format url = f"{self._url}/v1/text-to-speech/{voice_id}/multi-stream-input?model_id={model}&output_format={output_format}&auto_mode={self._settings.auto_mode}" @@ -929,7 +929,7 @@ class ElevenLabsHttpTTSService(WordTTSService): speed=params.speed, apply_text_normalization=params.apply_text_normalization, ) - self.set_model_name(model) + self._sync_model_name_to_metrics() self._output_format = "" # initialized in start() self._voice_settings = self._set_voice_settings() self._pronunciation_dictionary_locators = params.pronunciation_dictionary_locators @@ -1100,7 +1100,7 @@ class ElevenLabsHttpTTSService(WordTTSService): payload: Dict[str, Union[str, Dict[str, Union[float, bool]]]] = { "text": text, - "model_id": self._model_name, + "model_id": self._settings.model, } # Include previous text as context if available @@ -1122,7 +1122,7 @@ class ElevenLabsHttpTTSService(WordTTSService): payload["apply_text_normalization"] = self._settings.apply_text_normalization language = self._settings.language - if self._model_name in ELEVENLABS_MULTILINGUAL_MODELS and language: + if self._settings.model in ELEVENLABS_MULTILINGUAL_MODELS and language: payload["language_code"] = language logger.debug(f"Using language code: {language}") elif language: diff --git a/src/pipecat/services/fal/image.py b/src/pipecat/services/fal/image.py index 412cedfbd..fd9d9a22d 100644 --- a/src/pipecat/services/fal/image.py +++ b/src/pipecat/services/fal/image.py @@ -78,7 +78,8 @@ class FalImageGenService(ImageGenService): **kwargs: Additional arguments passed to parent ImageGenService. """ super().__init__(**kwargs) - self.set_model_name(model) + self._settings.model = model + self._sync_model_name_to_metrics() self._params = params self._aiohttp_session = aiohttp_session if key: @@ -103,7 +104,7 @@ class FalImageGenService(ImageGenService): logger.debug(f"Generating image from prompt: {prompt}") response = await fal_client.run_async( - self.model_name, + self._settings.model, arguments={"prompt": prompt, **self._params.model_dump(exclude_none=True)}, ) diff --git a/src/pipecat/services/fireworks/llm.py b/src/pipecat/services/fireworks/llm.py index 9338d8c5a..92deb00b9 100644 --- a/src/pipecat/services/fireworks/llm.py +++ b/src/pipecat/services/fireworks/llm.py @@ -66,7 +66,7 @@ class FireworksLLMService(OpenAILLMService): Dictionary of parameters for the chat completion request. """ params = { - "model": self.model_name, + "model": self._settings.model, "stream": True, "frequency_penalty": self._settings.frequency_penalty, "presence_penalty": self._settings.presence_penalty, diff --git a/src/pipecat/services/fish/tts.py b/src/pipecat/services/fish/tts.py index 131495769..0d84ea7ab 100644 --- a/src/pipecat/services/fish/tts.py +++ b/src/pipecat/services/fish/tts.py @@ -176,6 +176,7 @@ class FishAudioTTSService(InterruptibleTTSService): self._request_id = None self._settings = FishAudioTTSSettings( + model=model_id, voice=reference_id, fish_sample_rate=0, latency=params.latency, @@ -185,8 +186,7 @@ class FishAudioTTSService(InterruptibleTTSService): prosody_volume=params.prosody_volume, reference_id=reference_id, ) - - self.set_model_name(model_id) + self._sync_model_name_to_metrics() def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -267,7 +267,7 @@ class FishAudioTTSService(InterruptibleTTSService): logger.debug("Connecting to Fish Audio") headers = {"Authorization": f"Bearer {self._api_key}"} - headers["model"] = self.model_name + headers["model"] = self._settings.model self._websocket = await websocket_connect(self._base_url, additional_headers=headers) # Send initial start message with ormsgpack diff --git a/src/pipecat/services/gladia/stt.py b/src/pipecat/services/gladia/stt.py index c92d9469f..8d968e00f 100644 --- a/src/pipecat/services/gladia/stt.py +++ b/src/pipecat/services/gladia/stt.py @@ -279,9 +279,9 @@ class GladiaSTTService(WebsocketSTTService): self._api_key = api_key self._region = region self._url = url - self.set_model_name(model) self._receive_task = None self._settings = GladiaSTTSettings(model=model, input_params=params) + self._sync_model_name_to_metrics() # Session management self._session_url = None @@ -328,7 +328,7 @@ class GladiaSTTService(WebsocketSTTService): "bit_depth": params.bit_depth or 16, "sample_rate": self.sample_rate, "channels": params.channels or 1, - "model": self._model_name, + "model": self._settings.model, } # Add custom_metadata if provided diff --git a/src/pipecat/services/google/gemini_live/llm.py b/src/pipecat/services/google/gemini_live/llm.py index 00b540385..84b06a86d 100644 --- a/src/pipecat/services/google/gemini_live/llm.py +++ b/src/pipecat/services/google/gemini_live/llm.py @@ -701,7 +701,6 @@ class GeminiLiveLLMService(LLMService): self._last_sent_time = 0 self._base_url = base_url - self.set_model_name(model) self._voice_id = voice_id self._language_code = params.language @@ -763,6 +762,7 @@ class GeminiLiveLLMService(LLMService): proactivity=params.proactivity or {}, extra=params.extra if isinstance(params.extra, dict) else {}, ) + self._sync_model_name_to_metrics() self._file_api_base_url = file_api_base_url self._file_api: Optional[GeminiFileAPI] = None @@ -1230,7 +1230,9 @@ class GeminiLiveLLMService(LLMService): await self.push_error(error_msg=f"Initialization error: {e}", exception=e) async def _connection_task_handler(self, config: LiveConnectConfig): - async with self._client.aio.live.connect(model=self._model_name, config=config) as session: + async with self._client.aio.live.connect( + model=self._settings.model, config=config + ) as session: logger.info("Connected to Gemini service") # Mark connection start time diff --git a/src/pipecat/services/google/image.py b/src/pipecat/services/google/image.py index fcc8e41d0..f03b1da63 100644 --- a/src/pipecat/services/google/image.py +++ b/src/pipecat/services/google/image.py @@ -79,7 +79,9 @@ class GoogleImageGenService(ImageGenService): http_options = update_google_client_http_options(http_options) self._client = genai.Client(api_key=api_key, http_options=http_options) - self.set_model_name(self._params.model) + + self._settings.model = self._params.model + self._sync_model_name_to_metrics() def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. diff --git a/src/pipecat/services/google/llm.py b/src/pipecat/services/google/llm.py index f5a6db78c..2004aa15a 100644 --- a/src/pipecat/services/google/llm.py +++ b/src/pipecat/services/google/llm.py @@ -797,7 +797,6 @@ class GoogleLLMService(LLMService): params = params or GoogleLLMService.InputParams() - self.set_model_name(model) self._api_key = api_key self._system_instruction = system_instruction self._http_options = update_google_client_http_options(http_options) @@ -811,6 +810,7 @@ class GoogleLLMService(LLMService): thinking=params.thinking, extra=params.extra if isinstance(params.extra, dict) else {}, ) + self._sync_model_name_to_metrics() self._tools = tools self._tool_config = tool_config @@ -870,7 +870,7 @@ class GoogleLLMService(LLMService): # Use the new google-genai client's async method response = await self._client.aio.models.generate_content( - model=self._model_name, + model=self._settings.model, contents=messages, config=generation_config, ) @@ -930,10 +930,10 @@ class GoogleLLMService(LLMService): # There's no way to introspect on model capabilities, so # to check for models that we know default to thinkin on # and can be configured to turn it off. - if not self._model_name.startswith("gemini-2.5-flash"): + if not self._settings.model.startswith("gemini-2.5-flash"): return # If we have an image model, we don't use a budget either. - if "image" in self._model_name: + if "image" in self._settings.model: return # If thinking_config is already set, don't override it. if "thinking_config" in generation_params: @@ -974,7 +974,7 @@ class GoogleLLMService(LLMService): await self.start_ttfb_metrics() return await self._client.aio.models.generate_content_stream( - model=self._model_name, + model=self._settings.model, contents=messages, config=generation_config, ) diff --git a/src/pipecat/services/groq/stt.py b/src/pipecat/services/groq/stt.py index 52cb0a7cc..d51e93c68 100644 --- a/src/pipecat/services/groq/stt.py +++ b/src/pipecat/services/groq/stt.py @@ -62,7 +62,7 @@ class GroqSTTService(BaseWhisperSTTService): # Build kwargs dict with only set parameters kwargs = { "file": ("audio.wav", audio, "audio/wav"), - "model": self.model_name, + "model": self._settings.model, # Use verbose_json to get probability metrics "response_format": "verbose_json" if self._include_prob_metrics else "json", "language": self._language, diff --git a/src/pipecat/services/groq/tts.py b/src/pipecat/services/groq/tts.py index 5c3eab636..cc073f8c7 100644 --- a/src/pipecat/services/groq/tts.py +++ b/src/pipecat/services/groq/tts.py @@ -108,7 +108,6 @@ class GroqTTSService(TTSService): params = params or GroqTTSService.InputParams() self._api_key = api_key - self._model_name = model_name self._output_format = output_format self._params = params @@ -120,6 +119,7 @@ class GroqTTSService(TTSService): speed=params.speed, groq_sample_rate=sample_rate, ) + self._sync_model_name_to_metrics() self._client = AsyncGroq(api_key=self._api_key) @@ -149,7 +149,7 @@ class GroqTTSService(TTSService): try: response = await self._client.audio.speech.create( - model=self._model_name, + model=self._settings.model, voice=self._settings.voice, response_format=self._output_format, input=text, diff --git a/src/pipecat/services/hathora/stt.py b/src/pipecat/services/hathora/stt.py index e77e382c0..a08a80aa2 100644 --- a/src/pipecat/services/hathora/stt.py +++ b/src/pipecat/services/hathora/stt.py @@ -105,8 +105,7 @@ class HathoraSTTService(SegmentedSTTService): language=params.language, config=params.config, ) - - self.set_model_name(model) + self._sync_model_name_to_metrics() def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. diff --git a/src/pipecat/services/hathora/tts.py b/src/pipecat/services/hathora/tts.py index 9778a2f14..1e7662aab 100644 --- a/src/pipecat/services/hathora/tts.py +++ b/src/pipecat/services/hathora/tts.py @@ -123,8 +123,7 @@ class HathoraTTSService(TTSService): speed=params.speed, config=params.config, ) - - self.set_model_name(model) + self._sync_model_name_to_metrics() def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. diff --git a/src/pipecat/services/inworld/tts.py b/src/pipecat/services/inworld/tts.py index 1b6dee46f..1d8aa7e4b 100644 --- a/src/pipecat/services/inworld/tts.py +++ b/src/pipecat/services/inworld/tts.py @@ -173,7 +173,7 @@ class InworldHttpTTSService(WordTTSService): self._cumulative_time = 0.0 - self.set_model_name(model) + self._sync_model_name_to_metrics() def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -562,7 +562,7 @@ class InworldTTSService(AudioContextWordTTSService): # Track the end time of the last word in the current generation self._generation_end_time = 0.0 - self.set_model_name(model) + self._sync_model_name_to_metrics() def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. diff --git a/src/pipecat/services/lmnt/tts.py b/src/pipecat/services/lmnt/tts.py index 71ffb4f5f..f70bfa402 100644 --- a/src/pipecat/services/lmnt/tts.py +++ b/src/pipecat/services/lmnt/tts.py @@ -122,13 +122,13 @@ class LmntTTSService(InterruptibleTTSService): ) self._api_key = api_key - self.set_model_name(model) self._settings = LmntTTSSettings( model=model, voice=voice_id, language=self.language_to_service_language(language), format="raw", ) + self._sync_model_name_to_metrics() self._receive_task = None self._context_id: Optional[str] = None @@ -238,7 +238,7 @@ class LmntTTSService(InterruptibleTTSService): "format": self._settings.format, "sample_rate": self.sample_rate, "language": self._settings.language, - "model": self.model_name, + "model": self._settings.model, } # Connect to LMNT's websocket directly diff --git a/src/pipecat/services/minimax/tts.py b/src/pipecat/services/minimax/tts.py index 507febf2d..54925f7e4 100644 --- a/src/pipecat/services/minimax/tts.py +++ b/src/pipecat/services/minimax/tts.py @@ -235,7 +235,6 @@ class MiniMaxHttpTTSService(TTSService): self._group_id = group_id self._base_url = f"{base_url}?GroupId={group_id}" self._session = aiohttp_session - self._model_name = model # Create voice settings self._settings = MiniMaxTTSSettings( @@ -249,9 +248,7 @@ class MiniMaxHttpTTSService(TTSService): audio_format="pcm", audio_channel=1, ) - - # Set model - self.set_model_name(model) + self._sync_model_name_to_metrics() # Add language boost if provided if params.language: @@ -318,14 +315,6 @@ class MiniMaxHttpTTSService(TTSService): """ return language_to_minimax_language(language) - def set_model_name(self, model: str): - """Set the TTS model to use. - - Args: - model: The model name to use for synthesis. - """ - self._model_name = model - async def start(self, frame: StartFrame): """Start the MiniMax TTS service. @@ -382,7 +371,7 @@ class MiniMaxHttpTTSService(TTSService): "stream": self._settings.stream, "voice_setting": voice_setting, "audio_setting": audio_setting, - "model": self._model_name, + "model": self._settings.model, "text": text, } if is_given(self._settings.language_boost): diff --git a/src/pipecat/services/mistral/llm.py b/src/pipecat/services/mistral/llm.py index 7a8f5b71a..984ffb7dd 100644 --- a/src/pipecat/services/mistral/llm.py +++ b/src/pipecat/services/mistral/llm.py @@ -180,7 +180,7 @@ class MistralLLMService(OpenAILLMService): fixed_messages = self._apply_mistral_fixups(params_from_context["messages"]) params = { - "model": self.model_name, + "model": self._settings.model, "stream": True, "messages": fixed_messages, "tools": params_from_context["tools"], diff --git a/src/pipecat/services/moondream/vision.py b/src/pipecat/services/moondream/vision.py index 6a180b4cb..16be15ac5 100644 --- a/src/pipecat/services/moondream/vision.py +++ b/src/pipecat/services/moondream/vision.py @@ -81,7 +81,8 @@ class MoondreamService(VisionService): """ super().__init__(**kwargs) - self.set_model_name(model) + self._settings.model = model + self._sync_model_name_to_metrics() if not use_cpu: device, dtype = detect_device() diff --git a/src/pipecat/services/nvidia/stt.py b/src/pipecat/services/nvidia/stt.py index a79119c34..fd924204e 100644 --- a/src/pipecat/services/nvidia/stt.py +++ b/src/pipecat/services/nvidia/stt.py @@ -181,10 +181,10 @@ class NvidiaSTTService(STTService): self._function_id = model_function_map.get("function_id") self._settings = NvidiaSTTSettings( + model=model_function_map.get("model_name"), language=params.language, ) - - self.set_model_name(model_function_map.get("model_name")) + self._sync_model_name_to_metrics() self._asr_service = None self._queue = None @@ -282,7 +282,7 @@ class NvidiaSTTService(STTService): if not self._thread_task: self._thread_task = self.create_task(self._thread_task_handler()) - logger.debug(f"Initialized NvidiaSTTService with model: {self.model_name}") + logger.debug(f"Initialized NvidiaSTTService with model: {self._settings.model}") async def stop(self, frame: EndFrame): """Stop the NVIDIA Riva STT service and clean up resources. @@ -467,9 +467,6 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): params = params or NvidiaSegmentedSTTService.InputParams() - # Set model name - self.set_model_name(model_function_map.get("model_name")) - # Initialize NVIDIA Riva settings self._api_key = api_key self._server = server @@ -488,6 +485,7 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): self._config = None self._asr_service = None self._settings = NvidiaSegmentedSTTSettings( + model=model_function_map.get("model_name"), language=self.language_to_service_language(params.language or Language.EN_US) or "en-US", profanity_filter=params.profanity_filter, @@ -496,6 +494,7 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): boosted_lm_words=params.boosted_lm_words, boosted_lm_score=params.boosted_lm_score, ) + self._sync_model_name_to_metrics() def language_to_service_language(self, language: Language) -> Optional[str]: """Convert pipecat Language enum to NVIDIA Riva's language code. @@ -578,7 +577,7 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): await super().start(frame) self._initialize_client() self._config = self._create_recognition_config() - logger.debug(f"Initialized NvidiaSegmentedSTTService with model: {self.model_name}") + logger.debug(f"Initialized NvidiaSegmentedSTTService with model: {self._settings.model}") async def _update_settings(self, update: STTSettings) -> dict[str, Any]: """Apply a settings update and sync internal state. diff --git a/src/pipecat/services/nvidia/tts.py b/src/pipecat/services/nvidia/tts.py index 22cffc6c1..ade5da63d 100644 --- a/src/pipecat/services/nvidia/tts.py +++ b/src/pipecat/services/nvidia/tts.py @@ -112,11 +112,12 @@ class NvidiaTTSService(TTSService): self._function_id = model_function_map.get("function_id") self._use_ssl = use_ssl self._settings = NvidiaTTSSettings( + model=model_function_map.get("model_name"), voice=voice_id, language=params.language, quality=params.quality, ) - self.set_model_name(model_function_map.get("model_name")) + self._sync_model_name_to_metrics() self._service = None self._config = None @@ -192,7 +193,7 @@ class NvidiaTTSService(TTSService): await super().start(frame) self._initialize_client() self._config = self._create_synthesis_config() - logger.debug(f"Initialized NvidiaTTSService with model: {self.model_name}") + logger.debug(f"Initialized NvidiaTTSService with model: {self._settings.model}") @traced_tts async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: diff --git a/src/pipecat/services/openai/base_llm.py b/src/pipecat/services/openai/base_llm.py index 5b624010f..aad88472a 100644 --- a/src/pipecat/services/openai/base_llm.py +++ b/src/pipecat/services/openai/base_llm.py @@ -151,7 +151,7 @@ class BaseOpenAILLMService(LLMService): ) self._retry_timeout_secs = retry_timeout_secs self._retry_on_timeout = retry_on_timeout - self.set_model_name(model) + self._sync_model_name_to_metrics() self._full_model_name: str = "" self._client = self.create_client( api_key=api_key, @@ -265,7 +265,7 @@ class BaseOpenAILLMService(LLMService): Dictionary of parameters for the chat completion request. """ params = { - "model": self.model_name, + "model": self._settings.model, "stream": True, "stream_options": {"include_usage": True}, "frequency_penalty": self._settings.frequency_penalty, diff --git a/src/pipecat/services/openai/image.py b/src/pipecat/services/openai/image.py index d6ca51ae7..36efc5987 100644 --- a/src/pipecat/services/openai/image.py +++ b/src/pipecat/services/openai/image.py @@ -53,7 +53,8 @@ class OpenAIImageGenService(ImageGenService): model: DALL-E model to use for generation. Defaults to "dall-e-3". """ super().__init__() - self.set_model_name(model) + self._settings.model = model + self._sync_model_name_to_metrics() self._image_size = image_size self._client = AsyncOpenAI(api_key=api_key, base_url=base_url) self._aiohttp_session = aiohttp_session @@ -70,7 +71,7 @@ class OpenAIImageGenService(ImageGenService): logger.debug(f"Generating image from prompt: {prompt}") image = await self._client.images.generate( - prompt=prompt, model=self.model_name, n=1, size=self._image_size + prompt=prompt, model=self._settings.model, n=1, size=self._image_size ) image_url = image.data[0].url diff --git a/src/pipecat/services/openai/realtime/llm.py b/src/pipecat/services/openai/realtime/llm.py index 3560f0c27..d765fea75 100644 --- a/src/pipecat/services/openai/realtime/llm.py +++ b/src/pipecat/services/openai/realtime/llm.py @@ -175,12 +175,12 @@ class OpenAIRealtimeLLMService(LLMService): self.api_key = api_key self.base_url = full_url - self.set_model_name(model) self._settings = OpenAIRealtimeLLMSettings( model=model, session_properties=session_properties or events.SessionProperties(), ) + self._sync_model_name_to_metrics() self._audio_input_paused = start_audio_paused self._video_input_paused = start_video_paused self._video_frame_detail = video_frame_detail diff --git a/src/pipecat/services/openai/stt.py b/src/pipecat/services/openai/stt.py index 82ad8c0f0..13a37a2b1 100644 --- a/src/pipecat/services/openai/stt.py +++ b/src/pipecat/services/openai/stt.py @@ -100,24 +100,24 @@ class OpenAISTTService(BaseWhisperSTTService): # Build kwargs dict with only set parameters kwargs = { "file": ("audio.wav", audio, "audio/wav"), - "model": self.model_name, - "language": self._language, + "model": self._settings.model, + "language": self._settings.language, } if self._include_prob_metrics: # GPT-4o-transcribe models only support logprobs (not verbose_json) - if self.model_name in ("gpt-4o-transcribe", "gpt-4o-mini-transcribe"): + if self._settings.model in ("gpt-4o-transcribe", "gpt-4o-mini-transcribe"): kwargs["response_format"] = "json" kwargs["include"] = ["logprobs"] else: # Whisper models support verbose_json kwargs["response_format"] = "verbose_json" - if self._prompt is not None: - kwargs["prompt"] = self._prompt + if self._settings.prompt is not None: + kwargs["prompt"] = self._settings.prompt - if self._temperature is not None: - kwargs["temperature"] = self._temperature + if self._settings.temperature is not None: + kwargs["temperature"] = self._settings.temperature return await self._client.audio.transcriptions.create(**kwargs) @@ -226,7 +226,6 @@ class OpenAIRealtimeSTTService(WebsocketSTTService): self._api_key = api_key self._base_url = base_url - self.set_model_name(model) self._prompt = prompt self._turn_detection = turn_detection @@ -238,6 +237,7 @@ class OpenAIRealtimeSTTService(WebsocketSTTService): language=language, prompt=prompt, ) + self._sync_model_name_to_metrics() self._receive_task = None self._session_ready = False @@ -437,7 +437,7 @@ class OpenAIRealtimeSTTService(WebsocketSTTService): async def _send_session_update(self): """Send ``session.update`` to configure the transcription session.""" - transcription: dict = {"model": self.model_name} + transcription: dict = {"model": self._settings.model} language_code = ( self._language_to_code(self._settings.language) if self._settings.language else None diff --git a/src/pipecat/services/openai/tts.py b/src/pipecat/services/openai/tts.py index 853647f7f..2693bcc27 100644 --- a/src/pipecat/services/openai/tts.py +++ b/src/pipecat/services/openai/tts.py @@ -134,7 +134,6 @@ class OpenAITTSService(TTSService): ) super().__init__(sample_rate=sample_rate, **kwargs) - self.set_model_name(model) self._client = AsyncOpenAI(api_key=api_key, base_url=base_url) if instructions or speed: @@ -154,6 +153,7 @@ class OpenAITTSService(TTSService): instructions=params.instructions if params else instructions, speed=params.speed if params else speed, ) + self._sync_model_name_to_metrics() def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -194,7 +194,7 @@ class OpenAITTSService(TTSService): # Setup API parameters create_params = { "input": text, - "model": self.model_name, + "model": self._settings.model, "voice": VALID_VOICES[self._settings.voice], "response_format": "pcm", } diff --git a/src/pipecat/services/openai_realtime_beta/openai.py b/src/pipecat/services/openai_realtime_beta/openai.py index ef40dcb15..efac34223 100644 --- a/src/pipecat/services/openai_realtime_beta/openai.py +++ b/src/pipecat/services/openai_realtime_beta/openai.py @@ -160,12 +160,12 @@ class OpenAIRealtimeBetaLLMService(LLMService): self.api_key = api_key self.base_url = full_url - self.set_model_name(model) self._settings = OpenAIRealtimeBetaLLMSettings( model=model, session_properties=session_properties or events.SessionProperties(), ) + self._sync_model_name_to_metrics() self._audio_input_paused = start_audio_paused self._send_transcription_frames = send_transcription_frames self._websocket = None diff --git a/src/pipecat/services/openrouter/llm.py b/src/pipecat/services/openrouter/llm.py index a86b18573..c33fda2fc 100644 --- a/src/pipecat/services/openrouter/llm.py +++ b/src/pipecat/services/openrouter/llm.py @@ -72,8 +72,7 @@ class OpenRouterLLMService(OpenAILLMService): Transformed parameters ready for the API call. """ params = super().build_chat_completion_params(params_from_context) - model = getattr(self, "model_name", getattr(self, "model", "")).lower() - if "gemini" in model: + if "gemini" in self._settings.model.lower(): messages = params.get("messages", []) if not messages: return params diff --git a/src/pipecat/services/perplexity/llm.py b/src/pipecat/services/perplexity/llm.py index d2dd40a57..04f25621d 100644 --- a/src/pipecat/services/perplexity/llm.py +++ b/src/pipecat/services/perplexity/llm.py @@ -66,7 +66,7 @@ class PerplexityLLMService(OpenAILLMService): Dictionary of parameters for the chat completion request. """ params = { - "model": self.model_name, + "model": self._settings.model, "stream": True, "messages": params_from_context["messages"], } diff --git a/src/pipecat/services/playht/tts.py b/src/pipecat/services/playht/tts.py index 630351164..26f1e2dad 100644 --- a/src/pipecat/services/playht/tts.py +++ b/src/pipecat/services/playht/tts.py @@ -204,7 +204,7 @@ class PlayHTTTSService(InterruptibleTTSService): speed=params.speed, seed=params.seed, ) - self.set_model_name(voice_engine) + self._sync_model_name_to_metrics() def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -545,6 +545,7 @@ class PlayHTHttpTTSService(TTSService): voice_engine = voice_engine.replace("-ws", "") self._settings = PlayHTTTSSettings( + model=voice_engine, voice=voice_url, language=self.language_to_service_language(params.language) if params.language @@ -554,7 +555,7 @@ class PlayHTHttpTTSService(TTSService): speed=params.speed, seed=params.seed, ) - self.set_model_name(voice_engine) + self._sync_model_name_to_metrics() async def start(self, frame: StartFrame): """Start the PlayHT HTTP TTS service. diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index 2f006efc7..d4cb045df 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -75,7 +75,6 @@ class RimeTTSSettings(TTSSettings): """Settings for Rime WS JSON and HTTP TTS services. Parameters: - modelId: Rime model identifier. audioFormat: Audio output format. samplingRate: Audio sample rate. lang: Rime language code. @@ -86,7 +85,6 @@ class RimeTTSSettings(TTSSettings): inlineSpeedAlpha: Inline speed control markup. """ - modelId: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) audioFormat: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) samplingRate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) lang: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) @@ -104,7 +102,6 @@ class RimeNonJsonTTSSettings(TTSSettings): """Settings for Rime non-JSON WS TTS service. Parameters: - modelId: Rime model identifier. audioFormat: Audio output format. samplingRate: Audio sample rate. lang: Rime language code. @@ -114,7 +111,6 @@ class RimeNonJsonTTSSettings(TTSSettings): top_p: Cumulative probability threshold (0.0-1.0). """ - modelId: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) audioFormat: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) samplingRate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) lang: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) @@ -210,7 +206,7 @@ class RimeTTSService(AudioContextWordTTSService): self._model = model self._settings = RimeTTSSettings( voice=voice_id, - modelId=model, + model=model, audioFormat="pcm", samplingRate=0, lang=self.language_to_service_language(params.language) if params.language else "eng", @@ -219,6 +215,7 @@ class RimeTTSService(AudioContextWordTTSService): pauseBetweenBrackets=json.dumps(params.pause_between_brackets), phonemizeBetweenBrackets=json.dumps(params.phonemize_between_brackets), ) + self._sync_model_name_to_metrics() # State tracking self._context_id = None # Tracks current turn @@ -353,7 +350,7 @@ class RimeTTSService(AudioContextWordTTSService): f"{k}={v}" for k, v in { "speaker": self._settings.voice, - "modelId": self._settings.modelId, + "modelId": self._settings.model, "audioFormat": self._settings.audioFormat, "samplingRate": self._settings.samplingRate, "lang": self._settings.lang, @@ -589,6 +586,7 @@ class RimeHttpTTSService(TTSService): self._session = aiohttp_session self._base_url = "https://users.rime.ai/v1/rime-tts" self._settings = RimeTTSSettings( + model=model, lang=self.language_to_service_language(params.language) if params.language else "eng", speedAlpha=params.speed_alpha, reduceLatency=params.reduce_latency, @@ -597,7 +595,7 @@ class RimeHttpTTSService(TTSService): inlineSpeedAlpha=params.inline_speed_alpha if params.inline_speed_alpha else NOT_GIVEN, voice=voice_id, ) - self.set_model_name(model) + self._sync_model_name_to_metrics() def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -648,7 +646,7 @@ class RimeHttpTTSService(TTSService): payload["inlineSpeedAlpha"] = self._settings.inlineSpeedAlpha payload["text"] = text payload["speaker"] = self._settings.voice - payload["modelId"] = self._model_name + payload["modelId"] = self._settings.model payload["samplingRate"] = self.sample_rate # Arcana does not support PCM audio @@ -769,7 +767,7 @@ class RimeNonJsonTTSService(InterruptibleTTSService): self._model = model self._settings = RimeNonJsonTTSSettings( voice=voice_id, - modelId=model, + model=model, audioFormat=audio_format, samplingRate=sample_rate, lang=self.language_to_service_language(params.language) @@ -782,6 +780,7 @@ class RimeNonJsonTTSService(InterruptibleTTSService): temperature=params.temperature if params.temperature is not None else NOT_GIVEN, top_p=params.top_p if params.top_p is not None else NOT_GIVEN, ) + self._sync_model_name_to_metrics() # Add any extra parameters for future compatibility if params.extra: self._settings.extra.update(params.extra) @@ -863,7 +862,7 @@ class RimeNonJsonTTSService(InterruptibleTTSService): # Build URL with query parameters (only given, non-None values) settings_dict = { "speaker": self._settings.voice, - "modelId": self._settings.modelId, + "modelId": self._settings.model, "audioFormat": self._settings.audioFormat, "samplingRate": self._settings.samplingRate, } @@ -981,10 +980,6 @@ class RimeNonJsonTTSService(InterruptibleTTSService): """ changed = await super()._update_settings(update) - # Sync model to settings dict field - if "model" in changed: - self._settings.modelId = self._model_name - if changed: logger.debug("Settings changed, reconnecting WebSocket with new parameters") await self._disconnect() diff --git a/src/pipecat/services/sambanova/llm.py b/src/pipecat/services/sambanova/llm.py index 99c7bca2c..016e1740d 100644 --- a/src/pipecat/services/sambanova/llm.py +++ b/src/pipecat/services/sambanova/llm.py @@ -84,7 +84,7 @@ class SambaNovaLLMService(OpenAILLMService): # type: ignore Dictionary of parameters for the chat completion request. """ params = { - "model": self.model_name, + "model": self._settings.model, "stream": True, "stream_options": {"include_usage": True}, "temperature": self._settings.temperature, diff --git a/src/pipecat/services/sambanova/stt.py b/src/pipecat/services/sambanova/stt.py index a1cbe8a22..f313f0d7b 100644 --- a/src/pipecat/services/sambanova/stt.py +++ b/src/pipecat/services/sambanova/stt.py @@ -72,7 +72,7 @@ class SambaNovaSTTService(BaseWhisperSTTService): # type: ignore # Build kwargs dict with only set parameters kwargs = { "file": ("audio.wav", audio, "audio/wav"), - "model": self.model_name, + "model": self._settings.model, "response_format": "json", "language": self._language, } diff --git a/src/pipecat/services/sarvam/stt.py b/src/pipecat/services/sarvam/stt.py index aa6baef14..b2c9560e4 100644 --- a/src/pipecat/services/sarvam/stt.py +++ b/src/pipecat/services/sarvam/stt.py @@ -225,7 +225,6 @@ class SarvamSTTService(STTService): super().__init__(sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, **kwargs) - self.set_model_name(model) self._api_key = api_key # Store connection parameters @@ -257,6 +256,7 @@ class SarvamSTTService(STTService): vad_signals=params.vad_signals, high_vad_sensitivity=params.high_vad_sensitivity, ) + self._sync_model_name_to_metrics() if params.vad_signals: self._register_event_handler("on_speech_started") @@ -322,7 +322,7 @@ class SarvamSTTService(STTService): if is_given(update.language) and update.language is not None: if not self._config.supports_language: raise ValueError( - f"Model '{self.model_name}' does not support language parameter " + f"Model '{self._settings.model}' does not support language parameter " "(auto-detects language)." ) @@ -330,11 +330,13 @@ class SarvamSTTService(STTService): if is_given(update.prompt) and update.prompt is not None: if not self._config.supports_prompt: raise ValueError( - f"Model '{self.model_name}' does not support prompt parameter." + f"Model '{self._settings.model}' does not support prompt parameter." ) if is_given(update.mode) and update.mode is not None: if not self._config.supports_mode: - raise ValueError(f"Model '{self.model_name}' does not support mode parameter.") + raise ValueError( + f"Model '{self._settings.model}' does not support mode parameter." + ) changed = await super()._update_settings(update) @@ -374,11 +376,13 @@ class SarvamSTTService(STTService): if not self._config.supports_prompt: if prompt is not None: - raise ValueError(f"Model '{self.model_name}' does not support prompt parameter.") + raise ValueError( + f"Model '{self._settings.model}' does not support prompt parameter." + ) # If prompt is None and model doesn't support prompts, silently return (no-op) return - logger.info(f"Updating {self.model_name} prompt.") + logger.info(f"Updating {self._settings.model} prompt.") self._settings.prompt = prompt await self._disconnect() await self._connect() @@ -460,7 +464,7 @@ class SarvamSTTService(STTService): try: # Build common connection parameters connect_kwargs = { - "model": self.model_name, + "model": self._settings.model, "sample_rate": str(self.sample_rate), } diff --git a/src/pipecat/services/sarvam/tts.py b/src/pipecat/services/sarvam/tts.py index cbccd0130..191689f5a 100644 --- a/src/pipecat/services/sarvam/tts.py +++ b/src/pipecat/services/sarvam/tts.py @@ -489,6 +489,7 @@ class SarvamHttpTTSService(TTSService): model=model, voice=voice_id, ) + self._sync_model_name_to_metrics() # Add parameters based on model support if self._config.supports_pitch: @@ -506,8 +507,6 @@ class SarvamHttpTTSService(TTSService): elif params.temperature != 0.6: logger.warning(f"temperature parameter is ignored for {model}") - self.set_model_name(model) - def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -559,7 +558,7 @@ class SarvamHttpTTSService(TTSService): "speaker": self._settings.voice, "sample_rate": self.sample_rate, "enable_preprocessing": self._settings.enable_preprocessing, - "model": self._model_name, + "model": self._settings.model, "pace": self._settings.pace if is_given(self._settings.pace) else 1.0, } @@ -828,7 +827,6 @@ class SarvamTTSService(InterruptibleTTSService): # WebSocket endpoint URL with model query parameter self._websocket_url = f"{url}?model={model}" self._api_key = api_key - self.set_model_name(model) # Validate and clamp pace to model's valid range pace = params.pace @@ -854,6 +852,7 @@ class SarvamTTSService(InterruptibleTTSService): model=model, voice=voice_id, ) + self._sync_model_name_to_metrics() # Add parameters based on model support if self._config.supports_pitch: diff --git a/src/pipecat/services/soniox/stt.py b/src/pipecat/services/soniox/stt.py index 1f34c061c..1e4b49705 100644 --- a/src/pipecat/services/soniox/stt.py +++ b/src/pipecat/services/soniox/stt.py @@ -195,13 +195,13 @@ class SonioxSTTService(WebsocketSTTService): self._api_key = api_key self._url = url - self.set_model_name(params.model) self._vad_force_turn_endpoint = vad_force_turn_endpoint self._settings = SonioxSTTSettings( model=params.model, input_params=params, ) + self._sync_model_name_to_metrics() self._final_transcription_buffer = [] self._last_tokens_received: Optional[float] = None @@ -247,7 +247,7 @@ class SonioxSTTService(WebsocketSTTService): elif "input_params" in changed and self._settings.input_params.model is not None: # Only input_params was given → pull model up. self._settings.model = self._settings.input_params.model - self.set_model_name(self._settings.model) + self._sync_model_name_to_metrics() # TODO: someday we could reconnect here to apply updated settings. # Code might look something like the below: @@ -380,7 +380,7 @@ class SonioxSTTService(WebsocketSTTService): # Send the initial configuration message. config = { "api_key": self._api_key, - "model": self._model_name, + "model": self._settings.model, "audio_format": params.audio_format, "num_channels": params.num_channels or 1, "enable_endpoint_detection": enable_endpoint_detection, diff --git a/src/pipecat/services/speechmatics/stt.py b/src/pipecat/services/speechmatics/stt.py index 2e23765b2..0ec5609b2 100644 --- a/src/pipecat/services/speechmatics/stt.py +++ b/src/pipecat/services/speechmatics/stt.py @@ -459,7 +459,7 @@ class SpeechmaticsSTTService(STTService): # Model + metrics (operating_point comes from the SDK config/preset) self._settings.model = self._config.operating_point.value - self.set_model_name(self._config.operating_point.value) + self._sync_model_name_to_metrics() # Message queue self._stt_msg_queue: asyncio.Queue[dict[str, Any]] = asyncio.Queue() diff --git a/src/pipecat/services/stt_service.py b/src/pipecat/services/stt_service.py index 47b83ab56..de2539932 100644 --- a/src/pipecat/services/stt_service.py +++ b/src/pipecat/services/stt_service.py @@ -492,7 +492,7 @@ class STTService(AIService): if self.metrics_enabled: ttfb_data = TTFBMetricsData( processor=self.name, - model=self.model_name, + model=self._settings.model, value=ttfb, ) await super().push_frame(MetricsFrame(data=[ttfb_data])) diff --git a/src/pipecat/services/whisper/base_stt.py b/src/pipecat/services/whisper/base_stt.py index 74ca2d102..9def3c2f1 100644 --- a/src/pipecat/services/whisper/base_stt.py +++ b/src/pipecat/services/whisper/base_stt.py @@ -156,7 +156,6 @@ class BaseWhisperSTTService(SegmentedSTTService): **kwargs: Additional arguments passed to SegmentedSTTService. """ super().__init__(ttfs_p99_latency=ttfs_p99_latency, **kwargs) - self.set_model_name(model) self._client = self._create_client(api_key, base_url) self._language = self.language_to_service_language(language or Language.EN) self._prompt = prompt @@ -170,6 +169,7 @@ class BaseWhisperSTTService(SegmentedSTTService): prompt=self._prompt, temperature=self._temperature, ) + self._sync_model_name_to_metrics() def _create_client(self, api_key: Optional[str], base_url: Optional[str]): return AsyncOpenAI(api_key=api_key, base_url=base_url) diff --git a/src/pipecat/services/whisper/stt.py b/src/pipecat/services/whisper/stt.py index 033d815d9..205838314 100644 --- a/src/pipecat/services/whisper/stt.py +++ b/src/pipecat/services/whisper/stt.py @@ -236,7 +236,6 @@ class WhisperSTTService(SegmentedSTTService): super().__init__(**kwargs) self._device: str = device self._compute_type = compute_type - self.set_model_name(model if isinstance(model, str) else model.value) self._no_speech_prob = no_speech_prob self._model: Optional[WhisperModel] = None @@ -247,6 +246,7 @@ class WhisperSTTService(SegmentedSTTService): compute_type=self._compute_type, no_speech_prob=self._no_speech_prob, ) + self._sync_model_name_to_metrics() self._load() @@ -281,7 +281,7 @@ class WhisperSTTService(SegmentedSTTService): logger.debug("Loading Whisper model...") self._model = WhisperModel( - self.model_name, device=self._device, compute_type=self._compute_type + self._settings.model, device=self._device, compute_type=self._compute_type ) logger.debug("Loaded Whisper model") except ModuleNotFoundError as e: @@ -370,7 +370,6 @@ class WhisperSTTServiceMLX(WhisperSTTService): # Skip WhisperSTTService.__init__ and call its parent directly SegmentedSTTService.__init__(self, **kwargs) - self.set_model_name(model if isinstance(model, str) else model.value) self._no_speech_prob = no_speech_prob self._temperature = temperature @@ -381,6 +380,7 @@ class WhisperSTTServiceMLX(WhisperSTTService): temperature=self._temperature, engine="mlx", ) + self._sync_model_name_to_metrics() # No need to call _load() as MLX Whisper loads models on demand @@ -421,7 +421,7 @@ class WhisperSTTServiceMLX(WhisperSTTService): chunk = await asyncio.to_thread( mlx_whisper.transcribe, audio_float, - path_or_hf_repo=self.model_name, + path_or_hf_repo=self._settings.model, temperature=self._temperature, language=self._settings.language, ) diff --git a/src/pipecat/utils/tracing/service_decorators.py b/src/pipecat/utils/tracing/service_decorators.py index 3b23f337a..296ff9522 100644 --- a/src/pipecat/utils/tracing/service_decorators.py +++ b/src/pipecat/utils/tracing/service_decorators.py @@ -44,6 +44,23 @@ T = TypeVar("T") R = TypeVar("R") +def _get_model_name(service) -> str: + """Get the model name from a service instance. + + This is a bit of a mess — there were multiple places a model name could live. + Soon, self._settings should be the only source of truth about model name. + In fact...it might already be the case, but juuuuust to be safe, we'll + check all the places we used to store it. + """ + return ( + getattr(getattr(service, "_settings", None), "model", None) + or getattr(service, "_full_model_name", None) + or getattr(service, "model_name", None) + or getattr(service, "_model_name", None) + or "unknown" + ) + + def _noop_decorator(func): """No-op fallback decorator when tracing is unavailable. @@ -194,7 +211,7 @@ def traced_tts(func: Optional[Callable] = None, *, name: Optional[str] = None) - add_tts_span_attributes( span=span, service_name=service_class_name, - model=getattr(self, "model_name") or "unknown", + model=_get_model_name(self), voice_id=getattr(settings, "voice", "unknown"), text=text, settings=settings, @@ -311,7 +328,7 @@ def traced_stt(func: Optional[Callable] = None, *, name: Optional[str] = None) - add_stt_span_attributes( span=current_span, service_name=service_class_name, - model=getattr(self, "model_name") or settings.get("model", "unknown"), + model=_get_model_name(self), transcript=transcript, is_final=is_final, language=str(language) if language else None, @@ -491,10 +508,7 @@ def traced_llm(func: Optional[Callable] = None, *, name: Optional[str] = None) - # Add all available attributes to the span attribute_kwargs = { "service_name": service_class_name, - "model": getattr(self, "_full_model_name", None) - or getattr(self, "model_name", None) - or params.get("model") - or "unknown", + "model": _get_model_name(self), "stream": True, # Most LLM services use streaming "parameters": params, } @@ -593,11 +607,7 @@ def traced_gemini_live(operation: str) -> Callable: ) as current_span: try: # Base service attributes - model_name = ( - getattr(self, "model_name", None) - or getattr(self, "_model_name", None) - or "unknown" - ) + model_name = _get_model_name(self) voice_id = getattr(self, "_voice_id", None) language_code = getattr(self, "_language_code", None) settings = getattr(self, "_settings", {}) @@ -900,11 +910,7 @@ def traced_openai_realtime(operation: str) -> Callable: ) as current_span: try: # Base service attributes - model_name = ( - getattr(self, "model_name", None) - or getattr(self, "_model_name", None) - or "unknown" - ) + model_name = _get_model_name(self) # Operation-specific attribute collection operation_attrs = {} From af4226adbff84c303b9a6a1da43d07d742ab84d2 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Fri, 20 Feb 2026 15:26:17 -0500 Subject: [PATCH 056/189] Add changelog entries for service settings refactor PR #3714 --- changelog/3714.changed.md | 1 + changelog/3714.deprecated.md | 1 + 2 files changed, 2 insertions(+) create mode 100644 changelog/3714.changed.md create mode 100644 changelog/3714.deprecated.md diff --git a/changelog/3714.changed.md b/changelog/3714.changed.md new file mode 100644 index 000000000..a3081a7c8 --- /dev/null +++ b/changelog/3714.changed.md @@ -0,0 +1 @@ +- ⚠️ Refactored service settings to use strongly-typed dataclasses (`TTSSettings`, `STTSettings`, `LLMSettings`, and service-specific subclasses) instead of plain dicts. Each service now exposes a `_settings` attribute with discoverable, typed fields. Dict-based `*UpdateSettingsFrame(settings={...})` is deprecated in favor of passing typed settings delta objects. For service maintainers, see changes in COMMUNITY_INTEGRATIONS.md. diff --git a/changelog/3714.deprecated.md b/changelog/3714.deprecated.md new file mode 100644 index 000000000..ee71b2070 --- /dev/null +++ b/changelog/3714.deprecated.md @@ -0,0 +1 @@ +- Deprecated `set_model()`, `set_voice()`, and `set_language()` on AI services in favor of runtime updates via `TTSUpdateSettingsFrame`, `STTUpdateSettingsFrame`, and `LLMUpdateSettingsFrame`. From abb20f34ba8eb36fec9d96c22f183093b626da1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Fri, 20 Feb 2026 16:17:51 -0800 Subject: [PATCH 057/189] Update default Anthropic model to claude-sonnet-4-6 Update the default model in AnthropicLLMService and remove the now-unnecessary explicit model from the function calling example. --- examples/foundational/14a-function-calling-anthropic.py | 5 +---- src/pipecat/services/anthropic/llm.py | 4 ++-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/examples/foundational/14a-function-calling-anthropic.py b/examples/foundational/14a-function-calling-anthropic.py index 165d4b220..36030bc2b 100644 --- a/examples/foundational/14a-function-calling-anthropic.py +++ b/examples/foundational/14a-function-calling-anthropic.py @@ -72,10 +72,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady ) - llm = AnthropicLLMService( - api_key=os.getenv("ANTHROPIC_API_KEY"), - model="claude-3-7-sonnet-latest", - ) + llm = AnthropicLLMService(api_key=os.getenv("ANTHROPIC_API_KEY")) llm.register_function("get_weather", get_weather) llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation) diff --git a/src/pipecat/services/anthropic/llm.py b/src/pipecat/services/anthropic/llm.py index a21296fe3..e715c242d 100644 --- a/src/pipecat/services/anthropic/llm.py +++ b/src/pipecat/services/anthropic/llm.py @@ -184,7 +184,7 @@ class AnthropicLLMService(LLMService): self, *, api_key: str, - model: str = "claude-sonnet-4-5-20250929", + model: str = "claude-sonnet-4-6", params: Optional[InputParams] = None, client=None, retry_timeout_secs: Optional[float] = 5.0, @@ -195,7 +195,7 @@ class AnthropicLLMService(LLMService): Args: api_key: Anthropic API key for authentication. - model: Model name to use. Defaults to "claude-sonnet-4-5-20250929". + model: Model name to use. Defaults to "claude-sonnet-4-6". params: Optional model parameters for inference. client: Optional custom Anthropic client instance. retry_timeout_secs: Request timeout in seconds for retry logic. From 521f669051b29da4b312a87d07a59165513aa9d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Fri, 20 Feb 2026 16:18:21 -0800 Subject: [PATCH 058/189] Add changelog entries for PR #3792 --- changelog/3792.changed.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog/3792.changed.md diff --git a/changelog/3792.changed.md b/changelog/3792.changed.md new file mode 100644 index 000000000..ddf7fdc1e --- /dev/null +++ b/changelog/3792.changed.md @@ -0,0 +1 @@ +- Updated default Anthropic model from `claude-sonnet-4-5-20250929` to `claude-sonnet-4-6`. From 18429f80f1c289e47efbe5fec54d4f8ce908a322 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Fri, 20 Feb 2026 16:32:40 -0800 Subject: [PATCH 059/189] github(changelog): allow performance type --- .github/workflows/generate-changelog.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/generate-changelog.yml b/.github/workflows/generate-changelog.yml index 005eb94f1..496b3381c 100644 --- a/.github/workflows/generate-changelog.yml +++ b/.github/workflows/generate-changelog.yml @@ -86,7 +86,7 @@ jobs: fi # Validate fragment types - VALID_TYPES="added changed deprecated removed fixed security other" + VALID_TYPES="added changed deprecated removed fixed performance security other" INVALID_FRAGMENTS="" for file in changelog/*.md; do From 6d9c07b9458b8127ea9420250cbc956de977aab4 Mon Sep 17 00:00:00 2001 From: aconchillo <951761+aconchillo@users.noreply.github.com> Date: Sat, 21 Feb 2026 00:33:43 +0000 Subject: [PATCH 060/189] Update changelog for version 0.0.103 --- CHANGELOG.md | 209 +++++++++++++++++++++++++++++++++++ changelog/3615.fixed.md | 1 - changelog/3625.added.md | 1 - changelog/3642.added.md | 1 - changelog/3642.changed.md | 1 - changelog/3684.changed.md | 3 - changelog/3698.fixed.md | 1 - changelog/3706.changed.md | 1 - changelog/3713.fixed.md | 1 - changelog/3718.fixed.md | 1 - changelog/3719.added.2.md | 1 - changelog/3719.added.md | 1 - changelog/3719.changed.md | 1 - changelog/3720.fixed.md | 1 - changelog/3728.changed.md | 1 - changelog/3729.fixed.2.md | 1 - changelog/3729.fixed.md | 1 - changelog/3730.added.md | 1 - changelog/3730.changed.md | 1 - changelog/3732.changed.md | 3 - changelog/3733.deprecated.md | 1 - changelog/3735.fixed.md | 1 - changelog/3737.fixed.md | 1 - changelog/3744.fixed.md | 1 - changelog/3748.added.md | 1 - changelog/3748.changed.md | 1 - changelog/3761.changed.md | 1 - changelog/3765.changed.md | 1 - changelog/3768.fixed.md | 1 - changelog/3774.added.md | 1 - changelog/3774.fixed.md | 1 - changelog/3776.changed.md | 1 - changelog/3779.added.md | 1 - changelog/3782.fixed.md | 1 - changelog/3784.fixed.md | 1 - changelog/3785.added.md | 1 - changelog/3787.fixed.md | 1 - changelog/3789.fixed.md | 1 - changelog/3792.changed.md | 1 - 39 files changed, 209 insertions(+), 42 deletions(-) delete mode 100644 changelog/3615.fixed.md delete mode 100644 changelog/3625.added.md delete mode 100644 changelog/3642.added.md delete mode 100644 changelog/3642.changed.md delete mode 100644 changelog/3684.changed.md delete mode 100644 changelog/3698.fixed.md delete mode 100644 changelog/3706.changed.md delete mode 100644 changelog/3713.fixed.md delete mode 100644 changelog/3718.fixed.md delete mode 100644 changelog/3719.added.2.md delete mode 100644 changelog/3719.added.md delete mode 100644 changelog/3719.changed.md delete mode 100644 changelog/3720.fixed.md delete mode 100644 changelog/3728.changed.md delete mode 100644 changelog/3729.fixed.2.md delete mode 100644 changelog/3729.fixed.md delete mode 100644 changelog/3730.added.md delete mode 100644 changelog/3730.changed.md delete mode 100644 changelog/3732.changed.md delete mode 100644 changelog/3733.deprecated.md delete mode 100644 changelog/3735.fixed.md delete mode 100644 changelog/3737.fixed.md delete mode 100644 changelog/3744.fixed.md delete mode 100644 changelog/3748.added.md delete mode 100644 changelog/3748.changed.md delete mode 100644 changelog/3761.changed.md delete mode 100644 changelog/3765.changed.md delete mode 100644 changelog/3768.fixed.md delete mode 100644 changelog/3774.added.md delete mode 100644 changelog/3774.fixed.md delete mode 100644 changelog/3776.changed.md delete mode 100644 changelog/3779.added.md delete mode 100644 changelog/3782.fixed.md delete mode 100644 changelog/3784.fixed.md delete mode 100644 changelog/3785.added.md delete mode 100644 changelog/3787.fixed.md delete mode 100644 changelog/3789.fixed.md delete mode 100644 changelog/3792.changed.md diff --git a/CHANGELOG.md b/CHANGELOG.md index ab41e8163..c917ec992 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,215 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 +## [0.0.103] - 2026-02-20 + +### Added + +- Added `"timestampTransportStrategy": "ASYNC"` to `InworldAITTSService`. This + allows timestamps info to trail audio chunks arrival, resulting in much + better first audio chunk latency + (PR [#3625](https://github.com/pipecat-ai/pipecat/pull/3625)) + +- Added model-specific `InputParams` to `RimeTTSService`: arcana params + (`repetition_penalty`, `temperature`, `top_p`) and mistv2 params + (`no_text_normalization`, `save_oovs`, `segment`). Model, voice, and param + changes now trigger WebSocket reconnection. + (PR [#3642](https://github.com/pipecat-ai/pipecat/pull/3642)) + +- Added `write_transport_frame()` hook to `BaseOutputTransport` allowing + transport subclasses to handle custom frame types that flow through the audio + queue. + (PR [#3719](https://github.com/pipecat-ai/pipecat/pull/3719)) + +- Added `DailySIPTransferFrame` and `DailySIPReferFrame` to the Daily + transport. These frames queue SIP transfer and SIP REFER operations with + audio, so the operation executes only after the bot finishes its current + utterance. + (PR [#3719](https://github.com/pipecat-ai/pipecat/pull/3719)) + +- Added keepalive support to `SarvamSTTService` to prevent idle connection + timeouts (e.g. when used behind a `ServiceSwitcher`). + (PR [#3730](https://github.com/pipecat-ai/pipecat/pull/3730)) + +- Added `UserIdleTimeoutUpdateFrame` to enable or disable user idle detection + at runtime by updating the timeout dynamically. + (PR [#3748](https://github.com/pipecat-ai/pipecat/pull/3748)) + +- Added `broadcast_sibling_id` field to the base `Frame` class. This field is + automatically set by `broadcast_frame()` and `broadcast_frame_instance()` to + the ID of the paired frame pushed in the opposite direction, allowing + receivers to identify broadcast pairs. + (PR [#3774](https://github.com/pipecat-ai/pipecat/pull/3774)) + +- Added `ignored_sources` parameter to `RTVIObserverParams` and + `add_ignored_source()`/`remove_ignored_source()` methods to `RTVIObserver` to + suppress RTVI messages from specific pipeline processors (e.g. a silent + evaluation LLM). + (PR [#3779](https://github.com/pipecat-ai/pipecat/pull/3779)) + +- Added `DeepgramSageMakerTTSService` for running Deepgram TTS models deployed + on AWS SageMaker endpoints via HTTP/2 bidirectional streaming. Supports the + Deepgram TTS protocol (Speak, Flush, Clear, Close), interruption handling, + and per-turn TTFB metrics. + (PR [#3785](https://github.com/pipecat-ai/pipecat/pull/3785)) + +### Changed + +- ⚠️ `RimeTTSService` now defaults to `model="arcana"` and the + `wss://users-ws.rime.ai/ws3` endpoint. `InputParams` defaults changed from + mistv2-specific values to `None` — only explicitly-set params are sent as + query params. + (PR [#3642](https://github.com/pipecat-ai/pipecat/pull/3642)) + +- `AICFilter` now shares read-only AIC models via a singleton `AICModelManager` + in `aic_filter.py`. + - Multiple filters using the same model path or `(model_id, + model_download_dir)` share one loaded model, with reference counting and + concurrent load deduplication. + - Model file I/O runs off the event loop so the filter does not block. + (PR [#3684](https://github.com/pipecat-ai/pipecat/pull/3684)) + +- Added `X-User-Agent` and `X-Request-Id` headers to `InworldTTSService` for + better traceability. + (PR [#3706](https://github.com/pipecat-ai/pipecat/pull/3706)) + +- `DailyUpdateRemoteParticipantsFrame` is no longer deprecated and is now + queued with audio like other transport frames. + (PR [#3719](https://github.com/pipecat-ai/pipecat/pull/3719)) + +- Bumped Pillow dependency upper bound from `<12` to `<13` to allow Pillow + 12.x. + (PR [#3728](https://github.com/pipecat-ai/pipecat/pull/3728)) + +- Moved STT keepalive mechanism from `WebsocketSTTService` to the `STTService` + base class, allowing any STT service (not just websocket-based ones) to use + idle-connection keepalive via the `keepalive_timeout` and + `keepalive_interval` parameters. + (PR [#3730](https://github.com/pipecat-ai/pipecat/pull/3730)) + +- Improved audio context management in `AudioContextTTSService` by moving + context ID tracking to the base class and adding + `reuse_context_id_within_turn` parameter to control concurrent TTS request + handling. + - Added helper methods: `has_active_audio_context()`, + `get_active_audio_context_id()`, `remove_active_audio_context()`, + `reset_active_audio_context()` + - Simplified Cartesia, ElevenLabs, Inworld, Rime, AsyncAI, and Gradium TTS + implementations by removing duplicate context management code + (PR [#3732](https://github.com/pipecat-ai/pipecat/pull/3732)) + +- `UserIdleController` is now always created with a default timeout of 0 + (disabled). The `user_idle_timeout` parameter changed from `Optional[float] = + None` to `float = 0` in `UserTurnProcessor`, `LLMUserAggregatorParams`, and + `UserIdleController`. + (PR [#3748](https://github.com/pipecat-ai/pipecat/pull/3748)) + +- Change the version specifier from `>=0.2.8` to `~=0.2.8` for the + `speechmatics-voice` package to ensure compatibility with future patch + versions. + (PR [#3761](https://github.com/pipecat-ai/pipecat/pull/3761)) + +- Updated `InworldTTSService` and `InworldHttpTTSService` to use `ASYNC` + timestamp transport strategy by default + (PR [#3765](https://github.com/pipecat-ai/pipecat/pull/3765)) + +- Added `start_time` and `end_time` parameters to `start_ttfb_metrics()`, + `stop_ttfb_metrics()`, `start_processing_metrics()`, and + `stop_processing_metrics()` in `FrameProcessor` and `FrameProcessorMetrics`, + allowing custom timestamps for metrics measurement. `STTService` now uses + these instead of custom TTFB tracking. + (PR [#3776](https://github.com/pipecat-ai/pipecat/pull/3776)) + +- Updated default Anthropic model from `claude-sonnet-4-5-20250929` to + `claude-sonnet-4-6`. + (PR [#3792](https://github.com/pipecat-ai/pipecat/pull/3792)) + +### Deprecated + +- Deprecated unused `Traceable`, `@traceable`, `@traced`, and + `AttachmentStrategy` in `pipecat.utils.tracing.class_decorators`. This module + will be removed in a future release. + (PR [#3733](https://github.com/pipecat-ai/pipecat/pull/3733)) + +### Fixed + +- Fixed race condition where `RTVIObserver` could send messages before + `DailyTransport` join completed. Outbound messages are now queued & delivered + after the transport is ready. + (PR [#3615](https://github.com/pipecat-ai/pipecat/pull/3615)) + +- Fixed async generator cleanup in OpenAI LLM streaming to prevent + `AttributeError` with uvloop on Python 3.12+ (MagicStack/uvloop#699). + (PR [#3698](https://github.com/pipecat-ai/pipecat/pull/3698)) + +- Fixed `SmallWebRTCTransport` input audio resampling to properly handle all + sample rates, including 8kHz audio. + (PR [#3713](https://github.com/pipecat-ai/pipecat/pull/3713)) + +- Fixed a race condition in `RTVIObserver` where bot output messages could be + sent before the bot-started-speaking event. + (PR [#3718](https://github.com/pipecat-ai/pipecat/pull/3718)) + +- Fixed Grok Realtime `session.updated` event parsing failure caused by the API + returning prefixed voice names (e.g. `"human_Ara"` instead of `"Ara"`). + (PR [#3720](https://github.com/pipecat-ai/pipecat/pull/3720)) + +- Fixed context ID reuse issue in `ElevenLabsTTSService`, `InworldTTSService`, + `RimeTTSService`, `CartesiaTTSService`, `AsyncAITTSService`, and + `PlayHTTTSService`. Services now properly reuse the same context ID across + multiple `run_tts()` invocations within a single LLM turn, preventing context + tracking issues and incorrect lifecycle signaling. + (PR [#3729](https://github.com/pipecat-ai/pipecat/pull/3729)) + +- Fixed word timestamp interleaving issue in `ElevenLabsTTSService` when + processing multiple sentences within a single LLM turn. + (PR [#3729](https://github.com/pipecat-ai/pipecat/pull/3729)) + +- Fixed tracing service decorators executing the wrapped function twice when + the function itself raised an exception (e.g., LLM rate limit, TTS timeout). + (PR [#3735](https://github.com/pipecat-ai/pipecat/pull/3735)) + +- Fixed `LLMUserAggregator` broadcasting mute events before `StartFrame` + reaches downstream processors. + (PR [#3737](https://github.com/pipecat-ai/pipecat/pull/3737)) + +- Fixed `UserIdleController` false idle triggers caused by gaps between user + and bot activity frames. The idle timer now starts only after + `BotStoppedSpeakingFrame` and is suppressed during active user turns and + function calls. + (PR [#3744](https://github.com/pipecat-ai/pipecat/pull/3744)) + +- Fixed incorrect `sample_rate` assignment in + `TavusInputTransport._on_participant_audio_data` (was using + `audio.audio_frames` instead of `audio.sample_rate`). + (PR [#3768](https://github.com/pipecat-ai/pipecat/pull/3768)) + +- Fixed `RTVIObserver` not processing upstream-only frames. Previously, all + upstream frames were filtered out to avoid duplicate messages from + broadcasted frames. Now only upstream copies of broadcasted frames are + skipped. + (PR [#3774](https://github.com/pipecat-ai/pipecat/pull/3774)) + +- Fixed mutable default arguments in `LLMContextAggregatorPair.__init__()` that + could cause shared state across instances. + (PR [#3782](https://github.com/pipecat-ai/pipecat/pull/3782)) + +- Fixed `DeepgramSageMakerSTTService` to properly track finalize lifecycle + using `request_finalize()` / `confirm_finalize()` and use `is_final` (instead + of `is_final and speech_final`) for final transcription detection, matching + `DeepgramSTTService` behavior. + (PR [#3784](https://github.com/pipecat-ai/pipecat/pull/3784)) + +- Fixed a race condition in `AudioContextTTSService` where the audio context + could time out between consecutive TTS requests within the same turn, causing + audio to be discarded. + (PR [#3787](https://github.com/pipecat-ai/pipecat/pull/3787)) + +- Fixed `push_interruption_task_frame_and_wait()` hanging indefinitely when the + `InterruptionFrame` does not reach the pipeline sink within the timeout. + Added a `timeout` keyword argument to customize the wait duration. + (PR [#3789](https://github.com/pipecat-ai/pipecat/pull/3789)) + ## [0.0.102] - 2026-02-10 ### Added diff --git a/changelog/3615.fixed.md b/changelog/3615.fixed.md deleted file mode 100644 index b14dfd70f..000000000 --- a/changelog/3615.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed race condition where `RTVIObserver` could send messages before `DailyTransport` join completed. Outbound messages are now queued & delivered after the transport is ready. diff --git a/changelog/3625.added.md b/changelog/3625.added.md deleted file mode 100644 index ddf787567..000000000 --- a/changelog/3625.added.md +++ /dev/null @@ -1 +0,0 @@ -- Added `"timestampTransportStrategy": "ASYNC"` to `InworldAITTSService`. This allows timestamps info to trail audio chunks arrival, resulting in much better first audio chunk latency diff --git a/changelog/3642.added.md b/changelog/3642.added.md deleted file mode 100644 index 47668bf59..000000000 --- a/changelog/3642.added.md +++ /dev/null @@ -1 +0,0 @@ -- Added model-specific `InputParams` to `RimeTTSService`: arcana params (`repetition_penalty`, `temperature`, `top_p`) and mistv2 params (`no_text_normalization`, `save_oovs`, `segment`). Model, voice, and param changes now trigger WebSocket reconnection. diff --git a/changelog/3642.changed.md b/changelog/3642.changed.md deleted file mode 100644 index 96a43fbb8..000000000 --- a/changelog/3642.changed.md +++ /dev/null @@ -1 +0,0 @@ -- ⚠️ `RimeTTSService` now defaults to `model="arcana"` and the `wss://users-ws.rime.ai/ws3` endpoint. `InputParams` defaults changed from mistv2-specific values to `None` — only explicitly-set params are sent as query params. diff --git a/changelog/3684.changed.md b/changelog/3684.changed.md deleted file mode 100644 index 1bdb2c89c..000000000 --- a/changelog/3684.changed.md +++ /dev/null @@ -1,3 +0,0 @@ -- `AICFilter` now shares read-only AIC models via a singleton `AICModelManager` in `aic_filter.py`. - - Multiple filters using the same model path or `(model_id, model_download_dir)` share one loaded model, with reference counting and concurrent load deduplication. - - Model file I/O runs off the event loop so the filter does not block. diff --git a/changelog/3698.fixed.md b/changelog/3698.fixed.md deleted file mode 100644 index c040e9efb..000000000 --- a/changelog/3698.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed async generator cleanup in OpenAI LLM streaming to prevent `AttributeError` with uvloop on Python 3.12+ (MagicStack/uvloop#699). diff --git a/changelog/3706.changed.md b/changelog/3706.changed.md deleted file mode 100644 index 0c9876bdc..000000000 --- a/changelog/3706.changed.md +++ /dev/null @@ -1 +0,0 @@ -- Added `X-User-Agent` and `X-Request-Id` headers to `InworldTTSService` for better traceability. diff --git a/changelog/3713.fixed.md b/changelog/3713.fixed.md deleted file mode 100644 index 241f0e56a..000000000 --- a/changelog/3713.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed `SmallWebRTCTransport` input audio resampling to properly handle all sample rates, including 8kHz audio. diff --git a/changelog/3718.fixed.md b/changelog/3718.fixed.md deleted file mode 100644 index 68e1d2682..000000000 --- a/changelog/3718.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed a race condition in `RTVIObserver` where bot output messages could be sent before the bot-started-speaking event. diff --git a/changelog/3719.added.2.md b/changelog/3719.added.2.md deleted file mode 100644 index 77d8956d7..000000000 --- a/changelog/3719.added.2.md +++ /dev/null @@ -1 +0,0 @@ -- Added `write_transport_frame()` hook to `BaseOutputTransport` allowing transport subclasses to handle custom frame types that flow through the audio queue. diff --git a/changelog/3719.added.md b/changelog/3719.added.md deleted file mode 100644 index bc1c2d6b1..000000000 --- a/changelog/3719.added.md +++ /dev/null @@ -1 +0,0 @@ -- Added `DailySIPTransferFrame` and `DailySIPReferFrame` to the Daily transport. These frames queue SIP transfer and SIP REFER operations with audio, so the operation executes only after the bot finishes its current utterance. diff --git a/changelog/3719.changed.md b/changelog/3719.changed.md deleted file mode 100644 index f42d0303b..000000000 --- a/changelog/3719.changed.md +++ /dev/null @@ -1 +0,0 @@ -- `DailyUpdateRemoteParticipantsFrame` is no longer deprecated and is now queued with audio like other transport frames. diff --git a/changelog/3720.fixed.md b/changelog/3720.fixed.md deleted file mode 100644 index c3cb69d34..000000000 --- a/changelog/3720.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed Grok Realtime `session.updated` event parsing failure caused by the API returning prefixed voice names (e.g. `"human_Ara"` instead of `"Ara"`). diff --git a/changelog/3728.changed.md b/changelog/3728.changed.md deleted file mode 100644 index bc5ccc74d..000000000 --- a/changelog/3728.changed.md +++ /dev/null @@ -1 +0,0 @@ -- Bumped Pillow dependency upper bound from `<12` to `<13` to allow Pillow 12.x. diff --git a/changelog/3729.fixed.2.md b/changelog/3729.fixed.2.md deleted file mode 100644 index 6d4f33d93..000000000 --- a/changelog/3729.fixed.2.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed context ID reuse issue in `ElevenLabsTTSService`, `InworldTTSService`, `RimeTTSService`, `CartesiaTTSService`, `AsyncAITTSService`, and `PlayHTTTSService`. Services now properly reuse the same context ID across multiple `run_tts()` invocations within a single LLM turn, preventing context tracking issues and incorrect lifecycle signaling. diff --git a/changelog/3729.fixed.md b/changelog/3729.fixed.md deleted file mode 100644 index b8be759fb..000000000 --- a/changelog/3729.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed word timestamp interleaving issue in `ElevenLabsTTSService` when processing multiple sentences within a single LLM turn. diff --git a/changelog/3730.added.md b/changelog/3730.added.md deleted file mode 100644 index e3ac64278..000000000 --- a/changelog/3730.added.md +++ /dev/null @@ -1 +0,0 @@ -- Added keepalive support to `SarvamSTTService` to prevent idle connection timeouts (e.g. when used behind a `ServiceSwitcher`). diff --git a/changelog/3730.changed.md b/changelog/3730.changed.md deleted file mode 100644 index 697bc863c..000000000 --- a/changelog/3730.changed.md +++ /dev/null @@ -1 +0,0 @@ -- Moved STT keepalive mechanism from `WebsocketSTTService` to the `STTService` base class, allowing any STT service (not just websocket-based ones) to use idle-connection keepalive via the `keepalive_timeout` and `keepalive_interval` parameters. diff --git a/changelog/3732.changed.md b/changelog/3732.changed.md deleted file mode 100644 index 22681cf04..000000000 --- a/changelog/3732.changed.md +++ /dev/null @@ -1,3 +0,0 @@ -- Improved audio context management in `AudioContextTTSService` by moving context ID tracking to the base class and adding `reuse_context_id_within_turn` parameter to control concurrent TTS request handling. - - Added helper methods: `has_active_audio_context()`, `get_active_audio_context_id()`, `remove_active_audio_context()`, `reset_active_audio_context()` - - Simplified Cartesia, ElevenLabs, Inworld, Rime, AsyncAI, and Gradium TTS implementations by removing duplicate context management code diff --git a/changelog/3733.deprecated.md b/changelog/3733.deprecated.md deleted file mode 100644 index 8b1fb29bb..000000000 --- a/changelog/3733.deprecated.md +++ /dev/null @@ -1 +0,0 @@ -- Deprecated unused `Traceable`, `@traceable`, `@traced`, and `AttachmentStrategy` in `pipecat.utils.tracing.class_decorators`. This module will be removed in a future release. diff --git a/changelog/3735.fixed.md b/changelog/3735.fixed.md deleted file mode 100644 index 02de936c7..000000000 --- a/changelog/3735.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed tracing service decorators executing the wrapped function twice when the function itself raised an exception (e.g., LLM rate limit, TTS timeout). diff --git a/changelog/3737.fixed.md b/changelog/3737.fixed.md deleted file mode 100644 index 6dee96f82..000000000 --- a/changelog/3737.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed `LLMUserAggregator` broadcasting mute events before `StartFrame` reaches downstream processors. diff --git a/changelog/3744.fixed.md b/changelog/3744.fixed.md deleted file mode 100644 index d2b3f665f..000000000 --- a/changelog/3744.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed `UserIdleController` false idle triggers caused by gaps between user and bot activity frames. The idle timer now starts only after `BotStoppedSpeakingFrame` and is suppressed during active user turns and function calls. diff --git a/changelog/3748.added.md b/changelog/3748.added.md deleted file mode 100644 index 223f8bf4b..000000000 --- a/changelog/3748.added.md +++ /dev/null @@ -1 +0,0 @@ -- Added `UserIdleTimeoutUpdateFrame` to enable or disable user idle detection at runtime by updating the timeout dynamically. diff --git a/changelog/3748.changed.md b/changelog/3748.changed.md deleted file mode 100644 index 61be61c6b..000000000 --- a/changelog/3748.changed.md +++ /dev/null @@ -1 +0,0 @@ -- `UserIdleController` is now always created with a default timeout of 0 (disabled). The `user_idle_timeout` parameter changed from `Optional[float] = None` to `float = 0` in `UserTurnProcessor`, `LLMUserAggregatorParams`, and `UserIdleController`. diff --git a/changelog/3761.changed.md b/changelog/3761.changed.md deleted file mode 100644 index 71618502c..000000000 --- a/changelog/3761.changed.md +++ /dev/null @@ -1 +0,0 @@ -- Change the version specifier from `>=0.2.8` to `~=0.2.8` for the `speechmatics-voice` package to ensure compatibility with future patch versions. diff --git a/changelog/3765.changed.md b/changelog/3765.changed.md deleted file mode 100644 index 5d3e758d5..000000000 --- a/changelog/3765.changed.md +++ /dev/null @@ -1 +0,0 @@ -- Updated `InworldTTSService` and `InworldHttpTTSService` to use `ASYNC` timestamp transport strategy by default diff --git a/changelog/3768.fixed.md b/changelog/3768.fixed.md deleted file mode 100644 index 4c8d6438e..000000000 --- a/changelog/3768.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed incorrect `sample_rate` assignment in `TavusInputTransport._on_participant_audio_data` (was using `audio.audio_frames` instead of `audio.sample_rate`). diff --git a/changelog/3774.added.md b/changelog/3774.added.md deleted file mode 100644 index e72599e60..000000000 --- a/changelog/3774.added.md +++ /dev/null @@ -1 +0,0 @@ -- Added `broadcast_sibling_id` field to the base `Frame` class. This field is automatically set by `broadcast_frame()` and `broadcast_frame_instance()` to the ID of the paired frame pushed in the opposite direction, allowing receivers to identify broadcast pairs. diff --git a/changelog/3774.fixed.md b/changelog/3774.fixed.md deleted file mode 100644 index a839f56ed..000000000 --- a/changelog/3774.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed `RTVIObserver` not processing upstream-only frames. Previously, all upstream frames were filtered out to avoid duplicate messages from broadcasted frames. Now only upstream copies of broadcasted frames are skipped. diff --git a/changelog/3776.changed.md b/changelog/3776.changed.md deleted file mode 100644 index 87b5d6128..000000000 --- a/changelog/3776.changed.md +++ /dev/null @@ -1 +0,0 @@ -- Added `start_time` and `end_time` parameters to `start_ttfb_metrics()`, `stop_ttfb_metrics()`, `start_processing_metrics()`, and `stop_processing_metrics()` in `FrameProcessor` and `FrameProcessorMetrics`, allowing custom timestamps for metrics measurement. `STTService` now uses these instead of custom TTFB tracking. diff --git a/changelog/3779.added.md b/changelog/3779.added.md deleted file mode 100644 index 8800cfc04..000000000 --- a/changelog/3779.added.md +++ /dev/null @@ -1 +0,0 @@ -- Added `ignored_sources` parameter to `RTVIObserverParams` and `add_ignored_source()`/`remove_ignored_source()` methods to `RTVIObserver` to suppress RTVI messages from specific pipeline processors (e.g. a silent evaluation LLM). diff --git a/changelog/3782.fixed.md b/changelog/3782.fixed.md deleted file mode 100644 index 7d21fdeab..000000000 --- a/changelog/3782.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed mutable default arguments in `LLMContextAggregatorPair.__init__()` that could cause shared state across instances. diff --git a/changelog/3784.fixed.md b/changelog/3784.fixed.md deleted file mode 100644 index e88431f16..000000000 --- a/changelog/3784.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed `DeepgramSageMakerSTTService` to properly track finalize lifecycle using `request_finalize()` / `confirm_finalize()` and use `is_final` (instead of `is_final and speech_final`) for final transcription detection, matching `DeepgramSTTService` behavior. diff --git a/changelog/3785.added.md b/changelog/3785.added.md deleted file mode 100644 index 90a4172d4..000000000 --- a/changelog/3785.added.md +++ /dev/null @@ -1 +0,0 @@ -- Added `DeepgramSageMakerTTSService` for running Deepgram TTS models deployed on AWS SageMaker endpoints via HTTP/2 bidirectional streaming. Supports the Deepgram TTS protocol (Speak, Flush, Clear, Close), interruption handling, and per-turn TTFB metrics. diff --git a/changelog/3787.fixed.md b/changelog/3787.fixed.md deleted file mode 100644 index ff11ada71..000000000 --- a/changelog/3787.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed a race condition in `AudioContextTTSService` where the audio context could time out between consecutive TTS requests within the same turn, causing audio to be discarded. diff --git a/changelog/3789.fixed.md b/changelog/3789.fixed.md deleted file mode 100644 index 1bf2be1a3..000000000 --- a/changelog/3789.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed `push_interruption_task_frame_and_wait()` hanging indefinitely when the `InterruptionFrame` does not reach the pipeline sink within the timeout. Added a `timeout` keyword argument to customize the wait duration. diff --git a/changelog/3792.changed.md b/changelog/3792.changed.md deleted file mode 100644 index ddf7fdc1e..000000000 --- a/changelog/3792.changed.md +++ /dev/null @@ -1 +0,0 @@ -- Updated default Anthropic model from `claude-sonnet-4-5-20250929` to `claude-sonnet-4-6`. From f49658de15199f63b803c8ed50ceaef7aa172dc7 Mon Sep 17 00:00:00 2001 From: Om Chauhan Date: Sat, 21 Feb 2026 17:06:54 +0530 Subject: [PATCH 061/189] skipping provider-specific messages during summarization --- .../context/llm_context_summarization.py | 18 ++++- tests/test_context_summarization.py | 74 ++++++++++++++++++- 2 files changed, 89 insertions(+), 3 deletions(-) diff --git a/src/pipecat/utils/context/llm_context_summarization.py b/src/pipecat/utils/context/llm_context_summarization.py index 6865a00d9..06551e3bb 100644 --- a/src/pipecat/utils/context/llm_context_summarization.py +++ b/src/pipecat/utils/context/llm_context_summarization.py @@ -15,7 +15,7 @@ from typing import List, Optional from loguru import logger -from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_context import LLMContext, LLMSpecificMessage # Token estimation constants CHARS_PER_TOKEN = 4 # Industry-standard heuristic: 1 token ≈ 4 characters @@ -188,6 +188,9 @@ class LLMContextSummarizationUtil: total = 0 for message in context.messages: + if isinstance(message, LLMSpecificMessage): + continue + # Role and structure overhead total += TOKEN_OVERHEAD_PER_MESSAGE @@ -248,6 +251,9 @@ class LLMContextSummarizationUtil: for i in range(start_idx, len(messages)): msg = messages[i] + if isinstance(msg, LLMSpecificMessage): + continue + role = msg.get("role") # Check for tool calls in assistant messages @@ -298,7 +304,12 @@ class LLMContextSummarizationUtil: # Find first system message index first_system_index = next( - (i for i, msg in enumerate(messages) if msg.get("role") == "system"), -1 + ( + i + for i, msg in enumerate(messages) + if not isinstance(msg, LLMSpecificMessage) and msg.get("role") == "system" + ), + -1, ) # Messages to summarize are between first system and recent messages @@ -356,6 +367,9 @@ class LLMContextSummarizationUtil: transcript_parts = [] for msg in messages: + if isinstance(msg, LLMSpecificMessage): + continue + role = msg.get("role", "unknown") content = msg.get("content", "") diff --git a/tests/test_context_summarization.py b/tests/test_context_summarization.py index 87aaa74d3..36559ed3f 100644 --- a/tests/test_context_summarization.py +++ b/tests/test_context_summarization.py @@ -10,7 +10,7 @@ import unittest from unittest.mock import AsyncMock, MagicMock, patch from pipecat.frames.frames import LLMContextSummaryRequestFrame -from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_context import LLMContext, LLMSpecificMessage from pipecat.services.llm_service import LLMService from pipecat.utils.context.llm_context_summarization import ( LLMContextSummarizationConfig, @@ -602,5 +602,77 @@ class TestSummaryGenerationExceptions(unittest.IsolatedAsyncioTestCase): self.assertEqual(last_index, 1) # Should be the index of the last summarized message +class TestLLMSpecificMessageHandling(unittest.TestCase): + """Tests that LLMSpecificMessage objects are correctly skipped in summarization.""" + + def test_estimate_context_tokens_skips_specific_messages(self): + """Test that estimate_context_tokens skips LLMSpecificMessage objects.""" + context = LLMContext() + context.add_message({"role": "user", "content": "Hello"}) + context.add_message(LLMSpecificMessage(llm="google", message={})) + context.add_message({"role": "assistant", "content": "Hi there"}) + + tokens_with_specific = LLMContextSummarizationUtil.estimate_context_tokens(context) + + context_without = LLMContext() + context_without.add_message({"role": "user", "content": "Hello"}) + context_without.add_message({"role": "assistant", "content": "Hi there"}) + tokens_without = LLMContextSummarizationUtil.estimate_context_tokens(context_without) + + self.assertEqual(tokens_with_specific, tokens_without) + + def test_get_messages_to_summarize_with_specific_messages(self): + """Test that get_messages_to_summarize handles LLMSpecificMessage objects.""" + context = LLMContext() + context.add_message({"role": "system", "content": "System prompt"}) + context.add_message(LLMSpecificMessage(llm="google", message={})) + context.add_message({"role": "user", "content": "Message 1"}) + context.add_message({"role": "assistant", "content": "Response 1"}) + context.add_message(LLMSpecificMessage(llm="google", message={})) + context.add_message({"role": "user", "content": "Message 2"}) + context.add_message({"role": "assistant", "content": "Response 2"}) + + result = LLMContextSummarizationUtil.get_messages_to_summarize(context, 2) + + self.assertGreater(len(result.messages), 0) + self.assertGreater(result.last_summarized_index, 0) + + def test_format_messages_skips_specific_messages(self): + """Test that format_messages_for_summary skips LLMSpecificMessage objects.""" + messages = [ + {"role": "user", "content": "Hello"}, + LLMSpecificMessage(llm="google", message={}), + {"role": "assistant", "content": "Hi there"}, + ] + + transcript = LLMContextSummarizationUtil.format_messages_for_summary(messages) + + self.assertIn("USER: Hello", transcript) + self.assertIn("ASSISTANT: Hi there", transcript) + + def test_function_call_tracking_skips_specific_messages(self): + """Test that _get_function_calls_in_progress_index skips LLMSpecificMessage.""" + messages = [ + {"role": "user", "content": "What time is it?"}, + LLMSpecificMessage(llm="google", message={}), + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_123", + "type": "function", + "function": {"name": "get_time", "arguments": "{}"}, + } + ], + }, + LLMSpecificMessage(llm="google", message={}), + {"role": "tool", "tool_call_id": "call_123", "content": '{"time": "10:30 AM"}'}, + ] + + result = LLMContextSummarizationUtil._get_function_calls_in_progress_index(messages, 0) + self.assertEqual(result, -1) + + if __name__ == "__main__": unittest.main() From 9476b5d184751ddfaff76b478711fc22ba835401 Mon Sep 17 00:00:00 2001 From: Om Chauhan Date: Sat, 21 Feb 2026 17:35:08 +0530 Subject: [PATCH 062/189] added changelog --- changelog/3794.fixed.md | 1 + tests/test_context_summarization.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 changelog/3794.fixed.md diff --git a/changelog/3794.fixed.md b/changelog/3794.fixed.md new file mode 100644 index 000000000..e2b3c7c00 --- /dev/null +++ b/changelog/3794.fixed.md @@ -0,0 +1 @@ +- Added `LLMSpecificMessage` handling in `LLMContextSummarizationUtil` to skip provider-specific messages during context summarization. diff --git a/tests/test_context_summarization.py b/tests/test_context_summarization.py index 36559ed3f..3bb1246e9 100644 --- a/tests/test_context_summarization.py +++ b/tests/test_context_summarization.py @@ -634,8 +634,8 @@ class TestLLMSpecificMessageHandling(unittest.TestCase): result = LLMContextSummarizationUtil.get_messages_to_summarize(context, 2) - self.assertGreater(len(result.messages), 0) - self.assertGreater(result.last_summarized_index, 0) + self.assertEqual(len(result.messages), 4) + self.assertEqual(result.last_summarized_index, 4) def test_format_messages_skips_specific_messages(self): """Test that format_messages_for_summary skips LLMSpecificMessage objects.""" From a18aa738e0f792c56e22574a15e30c9a6d8917a1 Mon Sep 17 00:00:00 2001 From: Om Chauhan Date: Sat, 21 Feb 2026 18:26:31 +0530 Subject: [PATCH 063/189] fix(realtime): handle response_cancel_not_active as non-fatal --- src/pipecat/services/grok/realtime/llm.py | 7 +++++-- src/pipecat/services/openai/realtime/llm.py | 9 ++++++--- src/pipecat/services/openai_realtime_beta/openai.py | 9 ++++++--- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/pipecat/services/grok/realtime/llm.py b/src/pipecat/services/grok/realtime/llm.py index e1355ce31..0d3687a26 100644 --- a/src/pipecat/services/grok/realtime/llm.py +++ b/src/pipecat/services/grok/realtime/llm.py @@ -511,8 +511,11 @@ class GrokRealtimeLLMService(LLMService): elif evt.type == "response.function_call_arguments.done": await self._handle_evt_function_call_arguments_done(evt) elif evt.type == "error": - await self._handle_evt_error(evt) - return + if evt.error.code == "response_cancel_not_active": + logger.warning(f"Non-fatal API error: {evt.error.message}") + else: + await self._handle_evt_error(evt) + return async def _handle_evt_conversation_created(self, evt): """Handle conversation.created event - first event after connecting.""" diff --git a/src/pipecat/services/openai/realtime/llm.py b/src/pipecat/services/openai/realtime/llm.py index cf249408c..ebd1fbdbc 100644 --- a/src/pipecat/services/openai/realtime/llm.py +++ b/src/pipecat/services/openai/realtime/llm.py @@ -577,9 +577,12 @@ class OpenAIRealtimeLLMService(LLMService): await self._handle_evt_function_call_arguments_done(evt) elif evt.type == "error": if not await self._maybe_handle_evt_retrieve_conversation_item_error(evt): - await self._handle_evt_error(evt) - # errors are fatal, so exit the receive loop - return + if evt.error.code == "response_cancel_not_active": + logger.warning(f"Non-fatal API error: {evt.error.message}") + else: + await self._handle_evt_error(evt) + # errors are fatal, so exit the receive loop + return @traced_openai_realtime(operation="llm_setup") async def _handle_evt_session_created(self, evt): diff --git a/src/pipecat/services/openai_realtime_beta/openai.py b/src/pipecat/services/openai_realtime_beta/openai.py index 1199d8556..808fbb053 100644 --- a/src/pipecat/services/openai_realtime_beta/openai.py +++ b/src/pipecat/services/openai_realtime_beta/openai.py @@ -503,9 +503,12 @@ class OpenAIRealtimeBetaLLMService(LLMService): await self._handle_evt_audio_transcript_delta(evt) elif evt.type == "error": if not await self._maybe_handle_evt_retrieve_conversation_item_error(evt): - await self._handle_evt_error(evt) - # errors are fatal, so exit the receive loop - return + if evt.error.code == "response_cancel_not_active": + logger.warning(f"Non-fatal API error: {evt.error.message}") + else: + await self._handle_evt_error(evt) + # errors are fatal, so exit the receive loop + return @traced_openai_realtime(operation="llm_setup") async def _handle_evt_session_created(self, evt): From b390dc369c67b93fb04e7797069ed2a7402334bc Mon Sep 17 00:00:00 2001 From: Om Chauhan Date: Sat, 21 Feb 2026 18:33:29 +0530 Subject: [PATCH 064/189] added changelog --- changelog/3795.fixed.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog/3795.fixed.md diff --git a/changelog/3795.fixed.md b/changelog/3795.fixed.md new file mode 100644 index 000000000..8c231abac --- /dev/null +++ b/changelog/3795.fixed.md @@ -0,0 +1 @@ +- Treated `response_cancel_not_active` as a non-fatal error in realtime services (`OpenAIRealtimeLLMService`, `GrokRealtimeLLMService`, `OpenAIRealtimeBetaLLMService`) to prevent WebSocket disconnection when cancelling an inactive response. \ No newline at end of file From 6a3718d33d5ab82d3ffe893eaee03dccef51e889 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Mon, 23 Feb 2026 08:52:12 -0500 Subject: [PATCH 065/189] Inline local-smart-turn-v3 deps for Poetry compatibility Replace self-referential `pipecat-ai[local-smart-turn-v3]` extra in core dependencies with the actual packages (`transformers`, `onnxruntime`). Self-referential extras are not supported by Poetry and cause dependency resolution failures. Since these are required by the default turn stop strategy (LocalSmartTurnAnalyzerV3), they belong in core dependencies. - Remove `local-smart-turn-v3` optional extra from pyproject.toml - Remove try/except ModuleNotFoundError guard (now always installed) - Remove `--extra local-smart-turn-v3` from CI workflows --- .github/workflows/coverage.yaml | 1 - .github/workflows/tests.yaml | 1 - changelog/3803.fixed.md | 1 + changelog/3803.removed.md | 1 + pyproject.toml | 7 ++++--- .../turn/smart_turn/local_smart_turn_v3.py | 12 ++---------- uv.lock | 17 +++++++++-------- 7 files changed, 17 insertions(+), 23 deletions(-) create mode 100644 changelog/3803.fixed.md create mode 100644 changelog/3803.removed.md diff --git a/.github/workflows/coverage.yaml b/.github/workflows/coverage.yaml index df9c388bf..d65841a7d 100644 --- a/.github/workflows/coverage.yaml +++ b/.github/workflows/coverage.yaml @@ -40,7 +40,6 @@ jobs: --extra google \ --extra langchain \ --extra livekit \ - --extra local-smart-turn-v3 \ --extra piper \ --extra tracing \ --extra websocket diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 5941448f3..a36a2fbd0 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -44,7 +44,6 @@ jobs: --extra google \ --extra langchain \ --extra livekit \ - --extra local-smart-turn-v3 \ --extra piper \ --extra tracing \ --extra websocket diff --git a/changelog/3803.fixed.md b/changelog/3803.fixed.md new file mode 100644 index 000000000..73d7c3f19 --- /dev/null +++ b/changelog/3803.fixed.md @@ -0,0 +1 @@ +- Fixed Poetry compatibility by inlining `local-smart-turn-v3` dependencies (`transformers`, `onnxruntime`) into core dependencies instead of using a self-referential extra. diff --git a/changelog/3803.removed.md b/changelog/3803.removed.md new file mode 100644 index 000000000..867c3cfcc --- /dev/null +++ b/changelog/3803.removed.md @@ -0,0 +1 @@ +- Removed `local-smart-turn-v3` optional extra from `pyproject.toml`. The `transformers` and `onnxruntime` packages are now always installed as core dependencies since they are required by the default turn stop strategy, `TurnAnalyzerUserTurnStopStrategy` which uses `LocalSmartTurnAnalyzerV3`. diff --git a/pyproject.toml b/pyproject.toml index db76fa24e..63ff02c06 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,8 +38,10 @@ dependencies = [ # Pinning numba to resolve package dependencies "numba==0.61.2", "wait_for2>=0.4.1; python_version<'3.12'", - # Pipecat optionals - "pipecat-ai[local-smart-turn-v3]", + # Required by LocalSmartTurnAnalyzerV3 + # Inlined here instead of using a self-referential extra for Poetry compatibility. + "transformers", + "onnxruntime~=1.23.2", ] [project.urls] @@ -84,7 +86,6 @@ livekit = [ "livekit~=1.0.13", "livekit-api~=1.0.5", "tenacity>=8.2.3,<10.0.0", lmnt = [ "pipecat-ai[websockets-base]" ] local = [ "pyaudio~=0.2.14" ] local-smart-turn = [ "coremltools>=8.0", "transformers", "torch>=2.5.0,<3", "torchaudio>=2.5.0,<3" ] -local-smart-turn-v3 = [ "transformers", "onnxruntime~=1.23.2" ] mcp = [ "mcp[cli]>=1.11.0,<2" ] mem0 = [ "mem0ai~=0.1.94" ] mistral = [] diff --git a/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py b/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py index 1eae7cc02..b9e2a7663 100644 --- a/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +++ b/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py @@ -13,21 +13,13 @@ local end-of-turn detection without requiring network connectivity. from typing import Any, Dict, Optional import numpy as np +import onnxruntime as ort from loguru import logger +from transformers import WhisperFeatureExtractor from pipecat.audio.turn.smart_turn.base_smart_turn import BaseSmartTurn from pipecat.utils.env import env_truthy -try: - import onnxruntime as ort - from transformers import WhisperFeatureExtractor -except ModuleNotFoundError as e: - logger.error(f"Exception: {e}") - logger.error( - "In order to use LocalSmartTurnAnalyzerV3, you need to `pip install pipecat-ai[local-smart-turn-v3]`." - ) - raise Exception(f"Missing module: {e}") - class LocalSmartTurnAnalyzerV3(BaseSmartTurn): """Local turn analyzer using the smart-turn-v3 ONNX model. diff --git a/uv.lock b/uv.lock index 06563ab45..c7adf5406 100644 --- a/uv.lock +++ b/uv.lock @@ -2111,6 +2111,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/38/3f/9859f655d11901e7b2996c6e3d33e0caa9a1d4572c3bc61ed0faa64b2f4c/greenlet-3.3.2-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9bc885b89709d901859cf95179ec9f6bb67a3d2bb1f0e88456461bd4b7f8fd0d", size = 277747, upload-time = "2026-02-20T20:16:21.325Z" }, { url = "https://files.pythonhosted.org/packages/fb/07/cb284a8b5c6498dbd7cba35d31380bb123d7dceaa7907f606c8ff5993cbf/greenlet-3.3.2-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b568183cf65b94919be4438dc28416b234b678c608cafac8874dfeeb2a9bbe13", size = 579202, upload-time = "2026-02-20T20:47:28.955Z" }, { url = "https://files.pythonhosted.org/packages/ed/45/67922992b3a152f726163b19f890a85129a992f39607a2a53155de3448b8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:527fec58dc9f90efd594b9b700662ed3fb2493c2122067ac9c740d98080a620e", size = 590620, upload-time = "2026-02-20T20:55:55.581Z" }, + { url = "https://files.pythonhosted.org/packages/03/5f/6e2a7d80c353587751ef3d44bb947f0565ec008a2e0927821c007e96d3a7/greenlet-3.3.2-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:508c7f01f1791fbc8e011bd508f6794cb95397fdb198a46cb6635eb5b78d85a7", size = 602132, upload-time = "2026-02-20T21:02:43.261Z" }, { url = "https://files.pythonhosted.org/packages/ad/55/9f1ebb5a825215fadcc0f7d5073f6e79e3007e3282b14b22d6aba7ca6cb8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ad0c8917dd42a819fe77e6bdfcb84e3379c0de956469301d9fd36427a1ca501f", size = 591729, upload-time = "2026-02-20T20:20:58.395Z" }, { url = "https://files.pythonhosted.org/packages/24/b4/21f5455773d37f94b866eb3cf5caed88d6cea6dd2c6e1f9c34f463cba3ec/greenlet-3.3.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:97245cc10e5515dbc8c3104b2928f7f02b6813002770cfaffaf9a6e0fc2b94ef", size = 1551946, upload-time = "2026-02-20T20:49:31.102Z" }, { url = "https://files.pythonhosted.org/packages/00/68/91f061a926abead128fe1a87f0b453ccf07368666bd59ffa46016627a930/greenlet-3.3.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8c1fdd7d1b309ff0da81d60a9688a8bd044ac4e18b250320a96fc68d31c209ca", size = 1618494, upload-time = "2026-02-20T20:21:06.541Z" }, @@ -2118,6 +2119,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f3/47/16400cb42d18d7a6bb46f0626852c1718612e35dcb0dffa16bbaffdf5dd2/greenlet-3.3.2-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:c56692189a7d1c7606cb794be0a8381470d95c57ce5be03fb3d0ef57c7853b86", size = 278890, upload-time = "2026-02-20T20:19:39.263Z" }, { url = "https://files.pythonhosted.org/packages/a3/90/42762b77a5b6aa96cd8c0e80612663d39211e8ae8a6cd47c7f1249a66262/greenlet-3.3.2-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ebd458fa8285960f382841da585e02201b53a5ec2bac6b156fc623b5ce4499f", size = 581120, upload-time = "2026-02-20T20:47:30.161Z" }, { url = "https://files.pythonhosted.org/packages/bf/6f/f3d64f4fa0a9c7b5c5b3c810ff1df614540d5aa7d519261b53fba55d4df9/greenlet-3.3.2-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a443358b33c4ec7b05b79a7c8b466f5d275025e750298be7340f8fc63dff2a55", size = 594363, upload-time = "2026-02-20T20:55:56.965Z" }, + { url = "https://files.pythonhosted.org/packages/9c/8b/1430a04657735a3f23116c2e0d5eb10220928846e4537a938a41b350bed6/greenlet-3.3.2-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4375a58e49522698d3e70cc0b801c19433021b5c37686f7ce9c65b0d5c8677d2", size = 605046, upload-time = "2026-02-20T21:02:45.234Z" }, { url = "https://files.pythonhosted.org/packages/72/83/3e06a52aca8128bdd4dcd67e932b809e76a96ab8c232a8b025b2850264c5/greenlet-3.3.2-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e2cd90d413acbf5e77ae41e5d3c9b3ac1d011a756d7284d7f3f2b806bbd6358", size = 594156, upload-time = "2026-02-20T20:20:59.955Z" }, { url = "https://files.pythonhosted.org/packages/70/79/0de5e62b873e08fe3cef7dbe84e5c4bc0e8ed0c7ff131bccb8405cd107c8/greenlet-3.3.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:442b6057453c8cb29b4fb36a2ac689382fc71112273726e2423f7f17dc73bf99", size = 1554649, upload-time = "2026-02-20T20:49:32.293Z" }, { url = "https://files.pythonhosted.org/packages/5a/00/32d30dee8389dc36d42170a9c66217757289e2afb0de59a3565260f38373/greenlet-3.3.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:45abe8eb6339518180d5a7fa47fa01945414d7cca5ecb745346fc6a87d2750be", size = 1619472, upload-time = "2026-02-20T20:21:07.966Z" }, @@ -2126,6 +2128,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ea/ab/1608e5a7578e62113506740b88066bf09888322a311cff602105e619bd87/greenlet-3.3.2-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:ac8d61d4343b799d1e526db579833d72f23759c71e07181c2d2944e429eb09cd", size = 280358, upload-time = "2026-02-20T20:17:43.971Z" }, { url = "https://files.pythonhosted.org/packages/a5/23/0eae412a4ade4e6623ff7626e38998cb9b11e9ff1ebacaa021e4e108ec15/greenlet-3.3.2-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ceec72030dae6ac0c8ed7591b96b70410a8be370b6a477b1dbc072856ad02bd", size = 601217, upload-time = "2026-02-20T20:47:31.462Z" }, { url = "https://files.pythonhosted.org/packages/f8/16/5b1678a9c07098ecb9ab2dd159fafaf12e963293e61ee8d10ecb55273e5e/greenlet-3.3.2-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a2a5be83a45ce6188c045bcc44b0ee037d6a518978de9a5d97438548b953a1ac", size = 611792, upload-time = "2026-02-20T20:55:58.423Z" }, + { url = "https://files.pythonhosted.org/packages/5c/c5/cc09412a29e43406eba18d61c70baa936e299bc27e074e2be3806ed29098/greenlet-3.3.2-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ae9e21c84035c490506c17002f5c8ab25f980205c3e61ddb3a2a2a2e6c411fcb", size = 626250, upload-time = "2026-02-20T21:02:46.596Z" }, { url = "https://files.pythonhosted.org/packages/50/1f/5155f55bd71cabd03765a4aac9ac446be129895271f73872c36ebd4b04b6/greenlet-3.3.2-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43e99d1749147ac21dde49b99c9abffcbc1e2d55c67501465ef0930d6e78e070", size = 613875, upload-time = "2026-02-20T20:21:01.102Z" }, { url = "https://files.pythonhosted.org/packages/fc/dd/845f249c3fcd69e32df80cdab059b4be8b766ef5830a3d0aa9d6cad55beb/greenlet-3.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4c956a19350e2c37f2c48b336a3afb4bff120b36076d9d7fb68cb44e05d95b79", size = 1571467, upload-time = "2026-02-20T20:49:33.495Z" }, { url = "https://files.pythonhosted.org/packages/2a/50/2649fe21fcc2b56659a452868e695634722a6655ba245d9f77f5656010bf/greenlet-3.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6c6f8ba97d17a1e7d664151284cb3315fc5f8353e75221ed4324f84eb162b395", size = 1640001, upload-time = "2026-02-20T20:21:09.154Z" }, @@ -2134,6 +2137,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ac/48/f8b875fa7dea7dd9b33245e37f065af59df6a25af2f9561efa8d822fde51/greenlet-3.3.2-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:aa6ac98bdfd716a749b84d4034486863fd81c3abde9aa3cf8eff9127981a4ae4", size = 279120, upload-time = "2026-02-20T20:19:01.9Z" }, { url = "https://files.pythonhosted.org/packages/49/8d/9771d03e7a8b1ee456511961e1b97a6d77ae1dea4a34a5b98eee706689d3/greenlet-3.3.2-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab0c7e7901a00bc0a7284907273dc165b32e0d109a6713babd04471327ff7986", size = 603238, upload-time = "2026-02-20T20:47:32.873Z" }, { url = "https://files.pythonhosted.org/packages/59/0e/4223c2bbb63cd5c97f28ffb2a8aee71bdfb30b323c35d409450f51b91e3e/greenlet-3.3.2-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d248d8c23c67d2291ffd47af766e2a3aa9fa1c6703155c099feb11f526c63a92", size = 614219, upload-time = "2026-02-20T20:55:59.817Z" }, + { url = "https://files.pythonhosted.org/packages/94/2b/4d012a69759ac9d77210b8bfb128bc621125f5b20fc398bce3940d036b1c/greenlet-3.3.2-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ccd21bb86944ca9be6d967cf7691e658e43417782bce90b5d2faeda0ff78a7dd", size = 628268, upload-time = "2026-02-20T21:02:48.024Z" }, { url = "https://files.pythonhosted.org/packages/7a/34/259b28ea7a2a0c904b11cd36c79b8cef8019b26ee5dbe24e73b469dea347/greenlet-3.3.2-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6997d360a4e6a4e936c0f9625b1c20416b8a0ea18a8e19cabbefc712e7397ab", size = 616774, upload-time = "2026-02-20T20:21:02.454Z" }, { url = "https://files.pythonhosted.org/packages/0a/03/996c2d1689d486a6e199cb0f1cf9e4aa940c500e01bdf201299d7d61fa69/greenlet-3.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:64970c33a50551c7c50491671265d8954046cb6e8e2999aacdd60e439b70418a", size = 1571277, upload-time = "2026-02-20T20:49:34.795Z" }, { url = "https://files.pythonhosted.org/packages/d9/c4/2570fc07f34a39f2caf0bf9f24b0a1a0a47bc2e8e465b2c2424821389dfc/greenlet-3.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1a9172f5bf6bd88e6ba5a84e0a68afeac9dc7b6b412b245dd64f52d83c81e55b", size = 1640455, upload-time = "2026-02-20T20:21:10.261Z" }, @@ -2142,6 +2146,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3f/ae/8bffcbd373b57a5992cd077cbe8858fff39110480a9d50697091faea6f39/greenlet-3.3.2-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:8d1658d7291f9859beed69a776c10822a0a799bc4bfe1bd4272bb60e62507dab", size = 279650, upload-time = "2026-02-20T20:18:00.783Z" }, { url = "https://files.pythonhosted.org/packages/d1/c0/45f93f348fa49abf32ac8439938726c480bd96b2a3c6f4d949ec0124b69f/greenlet-3.3.2-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18cb1b7337bca281915b3c5d5ae19f4e76d35e1df80f4ad3c1a7be91fadf1082", size = 650295, upload-time = "2026-02-20T20:47:34.036Z" }, { url = "https://files.pythonhosted.org/packages/b3/de/dd7589b3f2b8372069ab3e4763ea5329940fc7ad9dcd3e272a37516d7c9b/greenlet-3.3.2-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c2e47408e8ce1c6f1ceea0dffcdf6ebb85cc09e55c7af407c99f1112016e45e9", size = 662163, upload-time = "2026-02-20T20:56:01.295Z" }, + { url = "https://files.pythonhosted.org/packages/cd/ac/85804f74f1ccea31ba518dcc8ee6f14c79f73fe36fa1beba38930806df09/greenlet-3.3.2-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e3cb43ce200f59483eb82949bf1835a99cf43d7571e900d7c8d5c62cdf25d2f9", size = 675371, upload-time = "2026-02-20T21:02:49.664Z" }, { url = "https://files.pythonhosted.org/packages/d2/d8/09bfa816572a4d83bccd6750df1926f79158b1c36c5f73786e26dbe4ee38/greenlet-3.3.2-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63d10328839d1973e5ba35e98cccbca71b232b14051fd957b6f8b6e8e80d0506", size = 664160, upload-time = "2026-02-20T20:21:04.015Z" }, { url = "https://files.pythonhosted.org/packages/48/cf/56832f0c8255d27f6c35d41b5ec91168d74ec721d85f01a12131eec6b93c/greenlet-3.3.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8e4ab3cfb02993c8cc248ea73d7dae6cec0253e9afa311c9b37e603ca9fad2ce", size = 1619181, upload-time = "2026-02-20T20:49:36.052Z" }, { url = "https://files.pythonhosted.org/packages/0a/23/b90b60a4aabb4cec0796e55f25ffbfb579a907c3898cd2905c8918acaa16/greenlet-3.3.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:94ad81f0fd3c0c0681a018a976e5c2bd2ca2d9d94895f23e7bb1af4e8af4e2d5", size = 1687713, upload-time = "2026-02-20T20:21:11.684Z" }, @@ -2150,6 +2155,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/98/6d/8f2ef704e614bcf58ed43cfb8d87afa1c285e98194ab2cfad351bf04f81e/greenlet-3.3.2-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:e26e72bec7ab387ac80caa7496e0f908ff954f31065b0ffc1f8ecb1338b11b54", size = 286617, upload-time = "2026-02-20T20:19:29.856Z" }, { url = "https://files.pythonhosted.org/packages/5e/0d/93894161d307c6ea237a43988f27eba0947b360b99ac5239ad3fe09f0b47/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b466dff7a4ffda6ca975979bab80bdadde979e29fc947ac3be4451428d8b0e4", size = 655189, upload-time = "2026-02-20T20:47:35.742Z" }, { url = "https://files.pythonhosted.org/packages/f5/2c/d2d506ebd8abcb57386ec4f7ba20f4030cbe56eae541bc6fd6ef399c0b41/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b8bddc5b73c9720bea487b3bffdb1840fe4e3656fba3bd40aa1489e9f37877ff", size = 658225, upload-time = "2026-02-20T20:56:02.527Z" }, + { url = "https://files.pythonhosted.org/packages/d1/67/8197b7e7e602150938049d8e7f30de1660cfb87e4c8ee349b42b67bdb2e1/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:59b3e2c40f6706b05a9cd299c836c6aa2378cabe25d021acd80f13abf81181cf", size = 666581, upload-time = "2026-02-20T21:02:51.526Z" }, { url = "https://files.pythonhosted.org/packages/8e/30/3a09155fbf728673a1dea713572d2d31159f824a37c22da82127056c44e4/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b26b0f4428b871a751968285a1ac9648944cea09807177ac639b030bddebcea4", size = 657907, upload-time = "2026-02-20T20:21:05.259Z" }, { url = "https://files.pythonhosted.org/packages/f3/fd/d05a4b7acd0154ed758797f0a43b4c0962a843bedfe980115e842c5b2d08/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1fb39a11ee2e4d94be9a76671482be9398560955c9e568550de0224e41104727", size = 1618857, upload-time = "2026-02-20T20:49:37.309Z" }, { url = "https://files.pythonhosted.org/packages/6f/e1/50ee92a5db521de8f35075b5eff060dd43d39ebd46c2181a2042f7070385/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:20154044d9085151bc309e7689d6f7ba10027f8f5a8c0676ad398b951913d89e", size = 1680010, upload-time = "2026-02-20T20:21:13.427Z" }, @@ -4509,10 +4515,6 @@ local-smart-turn = [ { name = "torchaudio" }, { name = "transformers" }, ] -local-smart-turn-v3 = [ - { name = "onnxruntime" }, - { name = "transformers" }, -] mcp = [ { name = "mcp", extra = ["cli"] }, ] @@ -4695,7 +4697,7 @@ requires-dist = [ { name = "numba", specifier = "==0.61.2" }, { name = "numpy", specifier = ">=1.26.4,<3" }, { name = "nvidia-riva-client", marker = "extra == 'nvidia'", specifier = "~=2.21.1" }, - { name = "onnxruntime", marker = "extra == 'local-smart-turn-v3'", specifier = "~=1.23.2" }, + { name = "onnxruntime", specifier = "~=1.23.2" }, { name = "onnxruntime", marker = "extra == 'silero'", specifier = "~=1.23.2" }, { name = "openai", specifier = ">=1.74.0,<3" }, { name = "opencv-python", marker = "extra == 'webrtc'", specifier = ">=4.11.0.86,<5" }, @@ -4705,7 +4707,6 @@ requires-dist = [ { name = "opentelemetry-sdk", marker = "extra == 'tracing'", specifier = ">=1.33.0" }, { name = "ormsgpack", marker = "extra == 'fish'", specifier = "~=1.7.0" }, { name = "pillow", specifier = ">=11.1.0,<13" }, - { name = "pipecat-ai", extras = ["local-smart-turn-v3"] }, { name = "pipecat-ai", extras = ["nvidia"], marker = "extra == 'riva'" }, { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'assemblyai'" }, { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'asyncai'" }, @@ -4755,14 +4756,14 @@ requires-dist = [ { name = "timm", marker = "extra == 'moondream'", specifier = "~=1.0.13" }, { name = "torch", marker = "extra == 'local-smart-turn'", specifier = ">=2.5.0,<3" }, { name = "torchaudio", marker = "extra == 'local-smart-turn'", specifier = ">=2.5.0,<3" }, + { name = "transformers" }, { name = "transformers", marker = "extra == 'local-smart-turn'" }, - { name = "transformers", marker = "extra == 'local-smart-turn-v3'" }, { name = "transformers", marker = "extra == 'moondream'", specifier = ">=4.48.0" }, { name = "uvicorn", marker = "extra == 'runner'", specifier = ">=0.32.0,<1.0.0" }, { name = "wait-for2", marker = "python_full_version < '3.12'", specifier = ">=0.4.1" }, { name = "websockets", marker = "extra == 'websockets-base'", specifier = ">=13.1,<16.0" }, ] -provides-extras = ["aic", "anthropic", "assemblyai", "asyncai", "aws", "aws-nova-sonic", "azure", "cartesia", "camb", "cerebras", "daily", "deepgram", "deepseek", "elevenlabs", "fal", "fireworks", "fish", "gladia", "google", "gradium", "grok", "groq", "gstreamer", "heygen", "hume", "inworld", "koala", "kokoro", "krisp", "langchain", "livekit", "lmnt", "local", "local-smart-turn", "local-smart-turn-v3", "mcp", "mem0", "mistral", "mlx-whisper", "moondream", "neuphonic", "noisereduce", "nvidia", "openai", "rnnoise", "openpipe", "openrouter", "perplexity", "piper", "playht", "qwen", "remote-smart-turn", "resembleai", "rime", "riva", "runner", "sagemaker", "sambanova", "sarvam", "sentry", "silero", "simli", "soniox", "soundfile", "speechmatics", "strands", "tavus", "together", "tracing", "ultravox", "webrtc", "websocket", "websockets-base", "whisper"] +provides-extras = ["aic", "anthropic", "assemblyai", "asyncai", "aws", "aws-nova-sonic", "azure", "cartesia", "camb", "cerebras", "daily", "deepgram", "deepseek", "elevenlabs", "fal", "fireworks", "fish", "gladia", "google", "gradium", "grok", "groq", "gstreamer", "heygen", "hume", "inworld", "koala", "kokoro", "krisp", "langchain", "livekit", "lmnt", "local", "local-smart-turn", "mcp", "mem0", "mistral", "mlx-whisper", "moondream", "neuphonic", "noisereduce", "nvidia", "openai", "rnnoise", "openpipe", "openrouter", "perplexity", "piper", "playht", "qwen", "remote-smart-turn", "resembleai", "rime", "riva", "runner", "sagemaker", "sambanova", "sarvam", "sentry", "silero", "simli", "soniox", "soundfile", "speechmatics", "strands", "tavus", "together", "tracing", "ultravox", "webrtc", "websocket", "websockets-base", "whisper"] [package.metadata.requires-dev] dev = [ From 029f3dbefb171d087514285b3cb4a569487f9093 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Mon, 23 Feb 2026 12:08:13 -0500 Subject: [PATCH 066/189] Updating 55o ElevenLabsTTSService example to also exercise switching voices, which requires reconnect --- .../foundational/55o-update-settings-elevenlabs-tts.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/examples/foundational/55o-update-settings-elevenlabs-tts.py b/examples/foundational/55o-update-settings-elevenlabs-tts.py index 4186f07ae..3fefa1ffb 100644 --- a/examples/foundational/55o-update-settings-elevenlabs-tts.py +++ b/examples/foundational/55o-update-settings-elevenlabs-tts.py @@ -104,6 +104,14 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info("Updating ElevenLabs TTS settings: speed=0.7") await task.queue_frame(TTSUpdateSettingsFrame(update=ElevenLabsTTSSettings(speed=0.7))) + await asyncio.sleep(10) + logger.info("Updating ElevenLabs TTS settings: switching to a different voice") + await task.queue_frame( + TTSUpdateSettingsFrame( + update=ElevenLabsTTSSettings(voice=os.getenv("ELEVENLABS_VOICE_ID_ALT")) + ) + ) + @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") From c527e1f30f090365850344acdafbd5bb3277f110 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Mon, 23 Feb 2026 14:26:16 -0500 Subject: [PATCH 067/189] Add dataclass vs Pydantic BaseModel rule to CLAUDE.md Document the existing convention: use @dataclass for frames and internal pipeline data, use Pydantic BaseModel for configuration, parameters, metrics, and external API data. --- CLAUDE.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CLAUDE.md b/CLAUDE.md index 7b79fa168..6886fc1ed 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -107,6 +107,9 @@ All data flows as **Frame** objects through a pipeline of **FrameProcessors**: - **Docstrings**: Google-style. Classes describe purpose; `__init__` has `Args:` section; dataclasses use `Parameters:` section. - **Linting**: Ruff (line length 100). Pre-commit hooks enforce formatting. - **Type hints**: Required for complex async code. +- **Dataclass vs Pydantic**: Use `@dataclass` for frames and internal pipeline data (high-frequency, no validation needed). Use Pydantic `BaseModel` for configuration, parameters, metrics, and external API data (benefits from validation and serialization). Specifically: + - `@dataclass`: Frame types, context aggregator pairs, internal data containers + - `BaseModel`: Service `InputParams`, transport/VAD/turn params, metrics data, API request/response models, serializer params ### Docstring Example From 30db5fea7caa1dd7a5a501cca4dff0e3f4ec7265 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Mon, 23 Feb 2026 15:38:57 -0500 Subject: [PATCH 068/189] Clarify that ServiceSettings and subclasses represent runtime-updatable settings Update docstrings for ServiceSettings, LLMSettings, TTSSettings, and STTSettings to make clear these capture only the subset of service configuration that can be changed while the pipeline is running via UpdateSettingsFrame, not all constructor parameters. --- src/pipecat/services/settings.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/pipecat/services/settings.py b/src/pipecat/services/settings.py index 0721cf3cd..1033305c1 100644 --- a/src/pipecat/services/settings.py +++ b/src/pipecat/services/settings.py @@ -92,7 +92,13 @@ _S = TypeVar("_S", bound="ServiceSettings") @dataclass class ServiceSettings: - """Base class for service settings. + """Base class for runtime-updatable service settings. + + These settings represent the subset of a service's configuration that can + be changed **while the pipeline is running** (e.g. switching the model or + changing the voice). They are *not* meant to capture every constructor + parameter — only those that support live updates via + ``UpdateSettingsFrame``. Every AI service type (LLM, TTS, STT) extends this with its own fields. Fields default to ``NOT_GIVEN`` so that an instance can represent either @@ -244,7 +250,9 @@ class ServiceSettings: @dataclass class LLMSettings(ServiceSettings): - """Settings for LLM services. + """Runtime-updatable settings for LLM services. + + See ``ServiceSettings`` for the general concept. Parameters: model: LLM model identifier. @@ -279,7 +287,9 @@ class LLMSettings(ServiceSettings): @dataclass class TTSSettings(ServiceSettings): - """Settings for TTS services. + """Runtime-updatable settings for TTS services. + + See ``ServiceSettings`` for the general concept. Parameters: model: TTS model identifier. @@ -302,7 +312,9 @@ class TTSSettings(ServiceSettings): @dataclass class STTSettings(ServiceSettings): - """Settings for STT services. + """Runtime-updatable settings for STT services. + + See ``ServiceSettings`` for the general concept. Parameters: model: STT model identifier. From e804060e170c8979512475e7deacca113f59da81 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Mon, 23 Feb 2026 15:45:00 -0500 Subject: [PATCH 069/189] Update COMMUNITY_INTEGRATIONS.md _update_settings examples Simplify the reconnect example to show a common pattern (reconnect on any change) and improve the _warn_unhandled_updated_settings example to show selective handling of specific fields. --- COMMUNITY_INTEGRATIONS.md | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/COMMUNITY_INTEGRATIONS.md b/COMMUNITY_INTEGRATIONS.md index 32fbff333..642754451 100644 --- a/COMMUNITY_INTEGRATIONS.md +++ b/COMMUNITY_INTEGRATIONS.md @@ -266,17 +266,18 @@ class MySTTService(STTService): self._sync_model_name_to_metrics() ``` -To react to runtime setting changes, override `_update_settings`. The base implementation applies the delta to `self._settings` and returns a `dict` mapping each changed field name to its **pre-update** value. Your override should call `super()` first, then act on the changed fields: +To react to runtime setting changes, override `_update_settings`. The base implementation applies the delta to `self._settings` and returns a `dict` mapping each changed field name to its **pre-update** value. Your override should call `super()` first, then act on the changed fields. A common implementation might look like: ```python async def _update_settings(self, update: STTSettings) -> dict[str, Any]: """Apply a settings update, reconfiguring the recognizer if needed.""" changed = await super()._update_settings(update) - if "language" in changed: - # Restart the recognizer with the new language. - await self._disconnect() - await self._connect() + if not changed: + return changed + + await self._disconnect() + await self._connect() return changed ``` @@ -285,7 +286,7 @@ The dict keys work like a set for membership tests (`"language" in changed`) and Note that, in this example, the service requires a reconnect to apply the new language. Consider, for each setting, whether your service requires reconnection or can apply changes in-place. -If your service can't yet apply certain settings at runtime, call `self._warn_unhandled_updated_settings(changed)` with the unhandled field names so users get a clear log message: +If your service can't yet apply certain settings at runtime, call `self._warn_unhandled_updated_settings(changed)` with any unhandled field names so users get a clear log message: ```python async def _update_settings(self, update: STTSettings) -> dict[str, Any]: @@ -294,8 +295,11 @@ async def _update_settings(self, update: STTSettings) -> dict[str, Any]: if not changed: return changed - # TODO: someday we could reconnect here to apply updated settings. - self._warn_unhandled_updated_settings(changed) + if "language" in changed: + await self._update_language() + else: + # TODO: handle changes to other settings soon! + self._warn_unhandled_updated_settings(changed.keys() - {"language"}) return changed ``` From ff174dd1c2bda105e683dfc042b6e22d645d8c91 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Mon, 23 Feb 2026 15:58:39 -0500 Subject: [PATCH 070/189] Fix STT/TTS Deepgram Sagemaker 55-series examples (examples updating settings at runtime) --- .../55a-update-settings-deepgram-sagemaker-stt.py | 4 ++-- .../55q-update-settings-deepgram-sagemaker-tts.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/foundational/55a-update-settings-deepgram-sagemaker-stt.py b/examples/foundational/55a-update-settings-deepgram-sagemaker-stt.py index dc8576261..8e45b5f2a 100644 --- a/examples/foundational/55a-update-settings-deepgram-sagemaker-stt.py +++ b/examples/foundational/55a-update-settings-deepgram-sagemaker-stt.py @@ -55,8 +55,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info(f"Starting bot") stt = DeepgramSageMakerSTTService( - endpoint_name=os.getenv("SAGEMAKER_ENDPOINT_NAME", "my-deepgram-stt-endpoint"), - region=os.getenv("AWS_REGION", "us-east-2"), + endpoint_name=os.getenv("SAGEMAKER_STT_ENDPOINT_NAME"), + region=os.getenv("AWS_REGION"), ) tts = CartesiaTTSService( diff --git a/examples/foundational/55q-update-settings-deepgram-sagemaker-tts.py b/examples/foundational/55q-update-settings-deepgram-sagemaker-tts.py index 2db7af7fe..35fb7cebe 100644 --- a/examples/foundational/55q-update-settings-deepgram-sagemaker-tts.py +++ b/examples/foundational/55q-update-settings-deepgram-sagemaker-tts.py @@ -56,8 +56,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) tts = DeepgramSageMakerTTSService( - endpoint_name=os.getenv("SAGEMAKER_ENDPOINT_NAME", "my-deepgram-tts-endpoint"), - region=os.getenv("AWS_REGION", "us-east-2"), + endpoint_name=os.getenv("SAGEMAKER_TTS_ENDPOINT_NAME"), + region=os.getenv("AWS_REGION"), voice="aura-2-helena-en", ) From bcf11ecbd4abda082fc59a9c6d40fabf74403085 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Mon, 23 Feb 2026 16:01:30 -0500 Subject: [PATCH 071/189] Looks like the Deepgram Sagemaker TTS services aren't able yet to successfully disconnect/reconnect to apply runtime settings updates. For now, marking them as not yet supporting runtime settings updates. --- src/pipecat/services/deepgram/stt_sagemaker.py | 9 +++++++-- src/pipecat/services/deepgram/tts_sagemaker.py | 12 +++++++++--- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/pipecat/services/deepgram/stt_sagemaker.py b/src/pipecat/services/deepgram/stt_sagemaker.py index 30a53a4d1..64bb2ba8f 100644 --- a/src/pipecat/services/deepgram/stt_sagemaker.py +++ b/src/pipecat/services/deepgram/stt_sagemaker.py @@ -204,8 +204,13 @@ class DeepgramSageMakerSTTService(STTService): elif "live_options" in changed and self._settings.live_options.language is not None: self._settings.language = self._settings.live_options.language - await self._disconnect() - await self._connect() + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # await self._disconnect() + # await self._connect() + + self._warn_unhandled_updated_settings(changed) + return changed async def start(self, frame: StartFrame): diff --git a/src/pipecat/services/deepgram/tts_sagemaker.py b/src/pipecat/services/deepgram/tts_sagemaker.py index 24f9eaec9..8447c96f0 100644 --- a/src/pipecat/services/deepgram/tts_sagemaker.py +++ b/src/pipecat/services/deepgram/tts_sagemaker.py @@ -228,14 +228,20 @@ class DeepgramSageMakerTTSService(TTSService): """ changed = await super()._update_settings(update) + if not changed: + return changed + # Deepgram uses voice as the model, so keep them in sync for metrics if "voice" in changed: self._settings.model = self._settings.voice self._sync_model_name_to_metrics() - if changed: - await self._disconnect() - await self._connect() + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # await self._disconnect() + # await self._connect() + + self._warn_unhandled_updated_settings(changed) return changed From 7556427862c3dae510e5876b422990b121e0ac2a Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Mon, 23 Feb 2026 16:52:11 -0500 Subject: [PATCH 072/189] Revise changelog entries for service settings refactor Split the single "changed" entry into separate "added", "changed", and "deprecated" entries for clarity. Add a note about the subtle behavior change in the deprecated set_model/set_voice/set_language methods. --- changelog/3714.added.md | 19 +++++++++++++++++++ changelog/3714.changed.md | 2 +- changelog/3714.deprecated.2.md | 1 + changelog/3714.deprecated.md | 2 ++ 4 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 changelog/3714.added.md create mode 100644 changelog/3714.deprecated.2.md diff --git a/changelog/3714.added.md b/changelog/3714.added.md new file mode 100644 index 000000000..83084675a --- /dev/null +++ b/changelog/3714.added.md @@ -0,0 +1,19 @@ +- Added support for using strongly-typed objects instead of dicts for updating service settings at runtime. + + Instead of, say: + + ```python + await task.queue_frame( + STTUpdateSettingsFrame(settings={"language": Language.ES}) + ) + ``` + + you'd do: + + ```python + await task.queue_frame( + STTUpdateSettingsFrame(update=DeepgramSTTSettings(language=Language.ES)) + ) + ``` + + Each service now vends strongly-typed classes like `DeepgramSTTSettings` representing the service's runtime-updatable settings. diff --git a/changelog/3714.changed.md b/changelog/3714.changed.md index a3081a7c8..bcfb5cbf7 100644 --- a/changelog/3714.changed.md +++ b/changelog/3714.changed.md @@ -1 +1 @@ -- ⚠️ Refactored service settings to use strongly-typed dataclasses (`TTSSettings`, `STTSettings`, `LLMSettings`, and service-specific subclasses) instead of plain dicts. Each service now exposes a `_settings` attribute with discoverable, typed fields. Dict-based `*UpdateSettingsFrame(settings={...})` is deprecated in favor of passing typed settings delta objects. For service maintainers, see changes in COMMUNITY_INTEGRATIONS.md. +- ⚠️ Refactored runtime-updatable service settings to use strongly-typed classes (`TTSSettings`, `STTSettings`, `LLMSettings`, and service-specific subclasses) instead of plain dicts. Each service's `_settings` now holds these strongly-typed objects. For service maintainers, see changes in COMMUNITY_INTEGRATIONS.md. diff --git a/changelog/3714.deprecated.2.md b/changelog/3714.deprecated.2.md new file mode 100644 index 000000000..232c1dee5 --- /dev/null +++ b/changelog/3714.deprecated.2.md @@ -0,0 +1 @@ +- Dict-based `*UpdateSettingsFrame(settings={...})` is deprecated in favor of passing typed settings delta objects with `*UpdateSettingsFrame(update={...})`. diff --git a/changelog/3714.deprecated.md b/changelog/3714.deprecated.md index ee71b2070..75337a642 100644 --- a/changelog/3714.deprecated.md +++ b/changelog/3714.deprecated.md @@ -1 +1,3 @@ - Deprecated `set_model()`, `set_voice()`, and `set_language()` on AI services in favor of runtime updates via `TTSUpdateSettingsFrame`, `STTUpdateSettingsFrame`, and `LLMUpdateSettingsFrame`. + + ⚠️ Note, too, a subtle behavior change in these deprecated methods. Whereas previously only `set_language()` caused the service to actually react to the update (e.g. by reconnecting to a remote service so it an pick up the change), now all these methods do. This change was made as part of a refactor making them all work the same way under the hood. From 71fc078c246533b44b0dc15c0da780f01e38bb47 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Mon, 23 Feb 2026 16:55:11 -0500 Subject: [PATCH 073/189] Refine ServiceSettings docstring: clarify NOT_GIVEN semantics and fix frame reference Use wildcard `*UpdateSettingsFrame` to cover all frame types. Clarify that NOT_GIVEN only appears in update deltas, not in the service's current settings state. --- src/pipecat/services/settings.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/pipecat/services/settings.py b/src/pipecat/services/settings.py index 1033305c1..4664ecd39 100644 --- a/src/pipecat/services/settings.py +++ b/src/pipecat/services/settings.py @@ -98,11 +98,13 @@ class ServiceSettings: be changed **while the pipeline is running** (e.g. switching the model or changing the voice). They are *not* meant to capture every constructor parameter — only those that support live updates via - ``UpdateSettingsFrame``. + ``*UpdateSettingsFrame``. Every AI service type (LLM, TTS, STT) extends this with its own fields. Fields default to ``NOT_GIVEN`` so that an instance can represent either - the full current state **or** a sparse update delta. + the full current state **or** a sparse update delta. Note that in the full + current state, **all fields will be given** (i.e. ``NOT_GIVEN`` is reserved + for update deltas). Parameters: model: The model identifier used by the service. From 65f563ad34f689ce14a2f6ecd73ecdaba26d3ec3 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Mon, 23 Feb 2026 21:27:39 -0500 Subject: [PATCH 074/189] Add debug logging to KrispVivaTurn analyze_end_of_turn and update example Move speech detection tracking outside the per-frame loop in append_audio since is_speech applies to the whole buffer. Add debug log in analyze_end_of_turn to show state and probability at decision time. Update the Krisp VIVA example to use Cartesia TTS and turn analyzer strategy. --- changelog/3809.changed.md | 1 + examples/foundational/07p-interruptible-krisp-viva.py | 8 ++++++-- scripts/evals/run-release-evals.py | 3 +-- src/pipecat/audio/turn/krisp_viva_turn.py | 3 +++ 4 files changed, 11 insertions(+), 4 deletions(-) create mode 100644 changelog/3809.changed.md diff --git a/changelog/3809.changed.md b/changelog/3809.changed.md new file mode 100644 index 000000000..43aca00f3 --- /dev/null +++ b/changelog/3809.changed.md @@ -0,0 +1 @@ +- Added debug logging to `KrispVivaTurn.analyze_end_of_turn()` to log turn state and probability at decision time. diff --git a/examples/foundational/07p-interruptible-krisp-viva.py b/examples/foundational/07p-interruptible-krisp-viva.py index 259f02aa5..4da42e201 100644 --- a/examples/foundational/07p-interruptible-krisp-viva.py +++ b/examples/foundational/07p-interruptible-krisp-viva.py @@ -41,12 +41,14 @@ from pipecat.processors.aggregators.llm_response_universal import ( ) from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService -from pipecat.services.deepgram.tts import DeepgramTTSService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy +from pipecat.turns.user_turn_strategies import UserTurnStrategies load_dotenv(override=True) @@ -76,7 +78,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) - tts = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY"), voice="aura-helios-en") + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), voice_id="71a7ad14-091c-4e8e-a314-022ece01c121" + ) llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) diff --git a/scripts/evals/run-release-evals.py b/scripts/evals/run-release-evals.py index 19e5d2649..77fc23a33 100644 --- a/scripts/evals/run-release-evals.py +++ b/scripts/evals/run-release-evals.py @@ -123,6 +123,7 @@ TESTS_07 = [ ("07n-interruptible-google.py", EVAL_SIMPLE_MATH), ("07n-interruptible-google-http.py", EVAL_SIMPLE_MATH), ("07o-interruptible-assemblyai.py", EVAL_SIMPLE_MATH), + ("07p-interruptible-krisp-viva.py", EVAL_SIMPLE_MATH), ("07q-interruptible-rime.py", EVAL_SIMPLE_MATH), ("07q-interruptible-rime-http.py", EVAL_SIMPLE_MATH), ("07r-interruptible-nvidia.py", EVAL_SIMPLE_MATH), @@ -148,8 +149,6 @@ TESTS_07 = [ ("07zj-interruptible-kokoro.py", EVAL_SIMPLE_MATH), # Needs a local XTTS docker instance running. # ("07i-interruptible-xtts.py", EVAL_SIMPLE_MATH), - # Needs a Krisp license. - # ("07p-interruptible-krisp.py", EVAL_SIMPLE_MATH), ] TESTS_12 = [ diff --git a/src/pipecat/audio/turn/krisp_viva_turn.py b/src/pipecat/audio/turn/krisp_viva_turn.py index 04e59421f..59f8aada8 100644 --- a/src/pipecat/audio/turn/krisp_viva_turn.py +++ b/src/pipecat/audio/turn/krisp_viva_turn.py @@ -331,6 +331,9 @@ class KrispVivaTurn(BaseTurnAnalyzer): """ # For real-time processing, the state is determined in append_audio # Return the last state that was computed + logger.debug( + f"Krisp turn analysis: state={self._last_state}, probability={self._last_probability}" + ) return self._last_state, None def clear(self): From 0f7e6e14ab34ba22d66e0678e47aa854c977f24f Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Tue, 24 Feb 2026 08:54:03 -0500 Subject: [PATCH 075/189] Bump nltk minimum version from 3.9.1 to 3.9.3 Resolves a security vulnerability flagged by Dependabot (#164). --- changelog/3811.changed.md | 1 + pyproject.toml | 2 +- uv.lock | 8 ++++---- 3 files changed, 6 insertions(+), 5 deletions(-) create mode 100644 changelog/3811.changed.md diff --git a/changelog/3811.changed.md b/changelog/3811.changed.md new file mode 100644 index 000000000..eb3eb492e --- /dev/null +++ b/changelog/3811.changed.md @@ -0,0 +1 @@ +- Bumped `nltk` minimum version from 3.9.1 to 3.9.3 to resolve a security vulnerability. diff --git a/pyproject.toml b/pyproject.toml index 63ff02c06..a45ebb3b3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ dependencies = [ "docstring_parser~=0.16", "loguru~=0.7.3", "Markdown>=3.7,<4", - "nltk>=3.9.1,<4", + "nltk>=3.9.3,<4", "numpy>=1.26.4,<3", "Pillow>=11.1.0,<13", "protobuf~=5.29.6", diff --git a/uv.lock b/uv.lock index c7adf5406..bd2f64639 100644 --- a/uv.lock +++ b/uv.lock @@ -3686,7 +3686,7 @@ wheels = [ [[package]] name = "nltk" -version = "3.9.2" +version = "3.9.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, @@ -3694,9 +3694,9 @@ dependencies = [ { name = "regex" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f9/76/3a5e4312c19a028770f86fd7c058cf9f4ec4321c6cf7526bab998a5b683c/nltk-3.9.2.tar.gz", hash = "sha256:0f409e9b069ca4177c1903c3e843eef90c7e92992fa4931ae607da6de49e1419", size = 2887629, upload-time = "2025-10-01T07:19:23.764Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e1/8f/915e1c12df07c70ed779d18ab83d065718a926e70d3ea33eb0cd66ffb7c0/nltk-3.9.3.tar.gz", hash = "sha256:cb5945d6424a98d694c2b9a0264519fab4363711065a46aa0ae7a2195b92e71f", size = 2923673, upload-time = "2026-02-24T12:05:53.833Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/60/90/81ac364ef94209c100e12579629dc92bf7a709a84af32f8c551b02c07e94/nltk-3.9.2-py3-none-any.whl", hash = "sha256:1e209d2b3009110635ed9709a67a1a3e33a10f799490fa71cf4bec218c11c88a", size = 1513404, upload-time = "2025-10-01T07:19:21.648Z" }, + { url = "https://files.pythonhosted.org/packages/c2/7e/9af5a710a1236e4772de8dfcc6af942a561327bb9f42b5b4a24d0cf100fd/nltk-3.9.3-py3-none-any.whl", hash = "sha256:60b3db6e9995b3dd976b1f0fa7dec22069b2677e759c28eb69b62ddd44870522", size = 1525385, upload-time = "2026-02-24T12:05:46.54Z" }, ] [[package]] @@ -4692,7 +4692,7 @@ requires-dist = [ { name = "mcp", extras = ["cli"], marker = "extra == 'mcp'", specifier = ">=1.11.0,<2" }, { name = "mem0ai", marker = "extra == 'mem0'", specifier = "~=0.1.94" }, { name = "mlx-whisper", marker = "extra == 'mlx-whisper'", specifier = "~=0.4.2" }, - { name = "nltk", specifier = ">=3.9.1,<4" }, + { name = "nltk", specifier = ">=3.9.3,<4" }, { name = "noisereduce", marker = "extra == 'noisereduce'", specifier = "~=3.0.3" }, { name = "numba", specifier = "==0.61.2" }, { name = "numpy", specifier = ">=1.26.4,<3" }, From aff8ab8a40e0d268af8672bab76446317efa1405 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Mon, 23 Feb 2026 19:42:14 -0500 Subject: [PATCH 076/189] Update OpenAI Realtime default model to gpt-realtime-1.5 --- changelog/3807.changed.md | 1 + src/pipecat/services/openai/realtime/llm.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog/3807.changed.md diff --git a/changelog/3807.changed.md b/changelog/3807.changed.md new file mode 100644 index 000000000..cc99f29fb --- /dev/null +++ b/changelog/3807.changed.md @@ -0,0 +1 @@ +- Updated `OpenAIRealtimeLLMService` default model to `gpt-realtime-1.5`. \ No newline at end of file diff --git a/src/pipecat/services/openai/realtime/llm.py b/src/pipecat/services/openai/realtime/llm.py index d765fea75..750f6ded0 100644 --- a/src/pipecat/services/openai/realtime/llm.py +++ b/src/pipecat/services/openai/realtime/llm.py @@ -121,7 +121,7 @@ class OpenAIRealtimeLLMService(LLMService): self, *, api_key: str, - model: str = "gpt-realtime", + model: str = "gpt-realtime-1.5", base_url: str = "wss://api.openai.com/v1/realtime", session_properties: Optional[events.SessionProperties] = None, start_audio_paused: bool = False, From 6f7664846cace2b4109cd9a207c69deabc700ebc Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Tue, 24 Feb 2026 12:53:55 -0500 Subject: [PATCH 077/189] Add can_generate_metrics to Soniox and AWS Transcribe STT services Commit 859cd7c9 refactored STT TTFB measurement to use the base class start_ttfb_metrics/stop_ttfb_metrics, which are gated behind can_generate_metrics(). Soniox and AWS Transcribe never overrode this method (default returns False), so TTFB was silently never reported. --- changelog/3813.fixed.md | 1 + src/pipecat/services/aws/stt.py | 8 ++++++++ src/pipecat/services/soniox/stt.py | 8 ++++++++ 3 files changed, 17 insertions(+) create mode 100644 changelog/3813.fixed.md diff --git a/changelog/3813.fixed.md b/changelog/3813.fixed.md new file mode 100644 index 000000000..9d9115e77 --- /dev/null +++ b/changelog/3813.fixed.md @@ -0,0 +1 @@ +- Fixed STT TTFB metrics not being reported for `SonioxSTTService` and `AWSTranscribeSTTService` due to missing `can_generate_metrics()` override. diff --git a/src/pipecat/services/aws/stt.py b/src/pipecat/services/aws/stt.py index 09552ecfc..c53e3648c 100644 --- a/src/pipecat/services/aws/stt.py +++ b/src/pipecat/services/aws/stt.py @@ -126,6 +126,14 @@ class AWSTranscribeSTTService(WebsocketSTTService): self._receive_task = None + def can_generate_metrics(self) -> bool: + """Check if this service can generate processing metrics. + + Returns: + True, as AWS Transcribe STT supports metrics generation. + """ + return True + def get_service_encoding(self, encoding: str) -> str: """Convert internal encoding format to AWS Transcribe format. diff --git a/src/pipecat/services/soniox/stt.py b/src/pipecat/services/soniox/stt.py index 1e4b49705..61dbb794f 100644 --- a/src/pipecat/services/soniox/stt.py +++ b/src/pipecat/services/soniox/stt.py @@ -208,6 +208,14 @@ class SonioxSTTService(WebsocketSTTService): self._receive_task = None + def can_generate_metrics(self) -> bool: + """Check if this service can generate processing metrics. + + Returns: + True, as Soniox STT supports metrics generation. + """ + return True + async def start(self, frame: StartFrame): """Start the Soniox STT websocket connection. From 23ad1815156614667e23c7bfd37f540e86619cbc Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Tue, 24 Feb 2026 13:09:29 -0500 Subject: [PATCH 078/189] Fix Soniox processing metrics to measure token-to-transcript time Move start_processing_metrics from run_stt (called per audio chunk, producing noisy 0ms logs) to _receive_messages when the first final token arrives for a new utterance. The existing stop_processing_metrics in send_endpoint_transcript completes the pair, giving a meaningful measurement of time from first recognition to finalized transcript. --- src/pipecat/services/soniox/stt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pipecat/services/soniox/stt.py b/src/pipecat/services/soniox/stt.py index 61dbb794f..630e11862 100644 --- a/src/pipecat/services/soniox/stt.py +++ b/src/pipecat/services/soniox/stt.py @@ -301,10 +301,8 @@ class SonioxSTTService(WebsocketSTTService): Yields: Frame: None (transcription results come via WebSocket callbacks). """ - await self.start_processing_metrics() if self._websocket and self._websocket.state is State.OPEN: await self._websocket.send(audio) - await self.stop_processing_metrics() yield None @@ -485,6 +483,8 @@ class SonioxSTTService(WebsocketSTTService): # the rest will be sent as interim tokens (even final tokens). await send_endpoint_transcript() else: + if not self._final_transcription_buffer: + await self.start_processing_metrics() self._final_transcription_buffer.append(token) else: non_final_transcription.append(token) From 323477bfa42a6388c4a0aeef0ca76ee6e559ca77 Mon Sep 17 00:00:00 2001 From: filipi87 Date: Tue, 24 Feb 2026 15:48:46 -0300 Subject: [PATCH 079/189] Refactoring the services using the WordTTSService. --- src/pipecat/services/azure/tts.py | 8 +- src/pipecat/services/cartesia/tts.py | 5 +- src/pipecat/services/elevenlabs/tts.py | 10 +- src/pipecat/services/gradium/tts.py | 5 +- src/pipecat/services/hume/tts.py | 6 +- src/pipecat/services/inworld/tts.py | 8 +- src/pipecat/services/resembleai/tts.py | 5 +- src/pipecat/services/rime/tts.py | 5 +- src/pipecat/services/tts_service.py | 188 ++++++++++--------------- 9 files changed, 102 insertions(+), 138 deletions(-) diff --git a/src/pipecat/services/azure/tts.py b/src/pipecat/services/azure/tts.py index f1bf9d400..7672a846c 100644 --- a/src/pipecat/services/azure/tts.py +++ b/src/pipecat/services/azure/tts.py @@ -27,7 +27,7 @@ from pipecat.frames.frames import ( from pipecat.processors.frame_processor import FrameDirection from pipecat.services.azure.common import language_to_azure_language from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven -from pipecat.services.tts_service import TTSService, WordTTSService +from pipecat.services.tts_service import TTSService from pipecat.transcriptions.language import Language from pipecat.utils.tracing.service_decorators import traced_tts @@ -258,7 +258,7 @@ class AzureBaseTTSService: return escaped_text -class AzureTTSService(WordTTSService, AzureBaseTTSService): +class AzureTTSService(TTSService, AzureBaseTTSService): """Azure Cognitive Services streaming TTS service with word timestamps. Provides real-time text-to-speech synthesis using Azure's WebSocket-based @@ -286,14 +286,14 @@ class AzureTTSService(WordTTSService, AzureBaseTTSService): sample_rate: Audio sample rate in Hz. If None, uses service default. params: Voice and synthesis parameters configuration. aggregate_sentences: Whether to aggregate sentences before synthesis. - **kwargs: Additional arguments passed to parent WordTTSService. + **kwargs: Additional arguments passed to the parent TTSService. """ - # Initialize WordTTSService first to set up word timestamp tracking super().__init__( aggregate_sentences=aggregate_sentences, push_text_frames=False, # We'll push text frames based on word timestamps push_stop_frames=True, pause_frame_processing=True, + supports_word_timestamps=True, sample_rate=sample_rate, **kwargs, ) diff --git a/src/pipecat/services/cartesia/tts.py b/src/pipecat/services/cartesia/tts.py index cca540c72..3f6fe2c21 100644 --- a/src/pipecat/services/cartesia/tts.py +++ b/src/pipecat/services/cartesia/tts.py @@ -29,7 +29,7 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven, is_given -from pipecat.services.tts_service import AudioContextWordTTSService, TTSService +from pipecat.services.tts_service import AudioContextTTSService, TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.text.base_text_aggregator import BaseTextAggregator from pipecat.utils.text.skip_tags_aggregator import SkipTagsAggregator @@ -229,7 +229,7 @@ class CartesiaTTSSettings(TTSSettings): return super().from_mapping(flat) -class CartesiaTTSService(AudioContextWordTTSService): +class CartesiaTTSService(AudioContextTTSService): """Cartesia TTS service with WebSocket streaming and word timestamps. Provides text-to-speech using Cartesia's streaming WebSocket API. @@ -311,6 +311,7 @@ class CartesiaTTSService(AudioContextWordTTSService): aggregate_sentences=aggregate_sentences, push_text_frames=False, pause_frame_processing=True, + supports_word_timestamps=True, sample_rate=sample_rate, text_aggregator=text_aggregator, **kwargs, diff --git a/src/pipecat/services/elevenlabs/tts.py b/src/pipecat/services/elevenlabs/tts.py index 72fa6c11a..8d51e9dde 100644 --- a/src/pipecat/services/elevenlabs/tts.py +++ b/src/pipecat/services/elevenlabs/tts.py @@ -46,8 +46,8 @@ from pipecat.frames.frames import ( from pipecat.processors.frame_processor import FrameDirection from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven, is_given from pipecat.services.tts_service import ( - AudioContextWordTTSService, - WordTTSService, + AudioContextTTSService, + TTSService, ) from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -317,7 +317,7 @@ def calculate_word_times( return (word_times, new_partial_word, new_partial_word_start_time) -class ElevenLabsTTSService(AudioContextWordTTSService): +class ElevenLabsTTSService(AudioContextTTSService): """ElevenLabs WebSocket-based TTS service with word timestamps. Provides real-time text-to-speech using ElevenLabs' WebSocket streaming API. @@ -399,6 +399,7 @@ class ElevenLabsTTSService(AudioContextWordTTSService): push_text_frames=False, push_stop_frames=True, pause_frame_processing=True, + supports_word_timestamps=True, sample_rate=sample_rate, **kwargs, ) @@ -838,7 +839,7 @@ class ElevenLabsTTSService(AudioContextWordTTSService): yield ErrorFrame(error=f"Unknown error occurred: {e}") -class ElevenLabsHttpTTSService(WordTTSService): +class ElevenLabsHttpTTSService(TTSService): """ElevenLabs HTTP-based TTS service with word timestamps. Provides text-to-speech using ElevenLabs' HTTP streaming API for simpler, @@ -903,6 +904,7 @@ class ElevenLabsHttpTTSService(WordTTSService): aggregate_sentences=aggregate_sentences, push_text_frames=False, push_stop_frames=True, + supports_word_timestamps=True, sample_rate=sample_rate, **kwargs, ) diff --git a/src/pipecat/services/gradium/tts.py b/src/pipecat/services/gradium/tts.py index 8b8995c41..d10f6258d 100644 --- a/src/pipecat/services/gradium/tts.py +++ b/src/pipecat/services/gradium/tts.py @@ -25,7 +25,7 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven -from pipecat.services.tts_service import AudioContextWordTTSService +from pipecat.services.tts_service import AudioContextTTSService from pipecat.utils.tracing.service_decorators import traced_tts try: @@ -51,7 +51,7 @@ class GradiumTTSSettings(TTSSettings): output_format: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) -class GradiumTTSService(AudioContextWordTTSService): +class GradiumTTSService(AudioContextTTSService): """Text-to-Speech service using Gradium's websocket API.""" _settings: GradiumTTSSettings @@ -91,6 +91,7 @@ class GradiumTTSService(AudioContextWordTTSService): push_stop_frames=True, push_text_frames=False, pause_frame_processing=True, + supports_word_timestamps=True, sample_rate=SAMPLE_RATE, **kwargs, ) diff --git a/src/pipecat/services/hume/tts.py b/src/pipecat/services/hume/tts.py index d15f13ce1..b5a064334 100644 --- a/src/pipecat/services/hume/tts.py +++ b/src/pipecat/services/hume/tts.py @@ -27,7 +27,7 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven -from pipecat.services.tts_service import WordTTSService +from pipecat.services.tts_service import TTSService from pipecat.utils.tracing.service_decorators import traced_tts try: @@ -64,7 +64,7 @@ class HumeTTSSettings(TTSSettings): trailing_silence: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) -class HumeTTSService(WordTTSService): +class HumeTTSService(TTSService): """Hume Octave Text-to-Speech service. Streams PCM audio via Hume's HTTP output streaming (JSON chunks) endpoint @@ -121,11 +121,11 @@ class HumeTTSService(WordTTSService): f"Hume TTS streams at {HUME_SAMPLE_RATE} Hz; configured sample_rate={sample_rate}" ) - # WordTTSService sets push_text_frames=False by default, which we want super().__init__( sample_rate=sample_rate, push_text_frames=False, push_stop_frames=True, + supports_word_timestamps=True, **kwargs, ) diff --git a/src/pipecat/services/inworld/tts.py b/src/pipecat/services/inworld/tts.py index 11e6125c0..8e457aabc 100644 --- a/src/pipecat/services/inworld/tts.py +++ b/src/pipecat/services/inworld/tts.py @@ -51,7 +51,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.tts_service import AudioContextWordTTSService, WordTTSService +from pipecat.services.tts_service import AudioContextTTSService, TTSService from pipecat.utils.tracing.service_decorators import traced_tts @@ -102,7 +102,7 @@ class InworldTTSSettings(TTSSettings): return super().from_mapping(flat) -class InworldHttpTTSService(WordTTSService): +class InworldHttpTTSService(TTSService): """Inworld AI HTTP-based TTS service. Supports both streaming and non-streaming modes via the `streaming` parameter. @@ -153,6 +153,7 @@ class InworldHttpTTSService(WordTTSService): super().__init__( push_text_frames=False, push_stop_frames=True, + supports_word_timestamps=True, sample_rate=sample_rate, **kwargs, ) @@ -467,7 +468,7 @@ class InworldHttpTTSService(WordTTSService): ) -class InworldTTSService(AudioContextWordTTSService): +class InworldTTSService(AudioContextTTSService): """Inworld AI WebSocket-based TTS service. Uses bidirectional WebSocket for lower latency streaming. Supports multiple @@ -534,6 +535,7 @@ class InworldTTSService(AudioContextWordTTSService): push_text_frames=False, push_stop_frames=True, pause_frame_processing=True, + supports_word_timestamps=True, sample_rate=sample_rate, aggregate_sentences=aggregate_sentences, append_trailing_space=append_trailing_space, diff --git a/src/pipecat/services/resembleai/tts.py b/src/pipecat/services/resembleai/tts.py index b8bb4a1da..177a4c10e 100644 --- a/src/pipecat/services/resembleai/tts.py +++ b/src/pipecat/services/resembleai/tts.py @@ -26,7 +26,7 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven -from pipecat.services.tts_service import AudioContextWordTTSService +from pipecat.services.tts_service import AudioContextTTSService from pipecat.utils.tracing.service_decorators import traced_tts try: @@ -58,7 +58,7 @@ class ResembleAITTSSettings(TTSSettings): } -class ResembleAITTSService(AudioContextWordTTSService): +class ResembleAITTSService(AudioContextTTSService): """Resemble AI TTS service with WebSocket streaming and word timestamps. Provides text-to-speech using Resemble AI's streaming WebSocket API. @@ -93,6 +93,7 @@ class ResembleAITTSService(AudioContextWordTTSService): super().__init__( sample_rate=sample_rate, reuse_context_id_within_turn=False, + supports_word_timestamps=True, **kwargs, ) diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index 6e03c2461..6e795cc3d 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -33,7 +33,7 @@ from pipecat.frames.frames import ( from pipecat.processors.frame_processor import FrameDirection from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven, is_given from pipecat.services.tts_service import ( - AudioContextWordTTSService, + AudioContextTTSService, InterruptibleTTSService, TTSService, ) @@ -130,7 +130,7 @@ class RimeNonJsonTTSSettings(TTSSettings): _aliases: ClassVar[Dict[str, str]] = {"speaker": "voice"} -class RimeTTSService(AudioContextWordTTSService): +class RimeTTSService(AudioContextTTSService): """Text-to-Speech service using Rime's websocket API. Uses Rime's websocket JSON API to convert text to speech with word-level timing @@ -207,6 +207,7 @@ class RimeTTSService(AudioContextWordTTSService): push_text_frames=False, push_stop_frames=True, pause_frame_processing=True, + supports_word_timestamps=True, append_trailing_space=True, sample_rate=sample_rate, **kwargs, diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index 9d9c41b60..e7b57833d 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -128,6 +128,8 @@ class TTSService(AIService): append_trailing_space: bool = False, # TTS output sample rate sample_rate: Optional[int] = None, + # if True, enables word-level timestamp tracking and synchronization + supports_word_timestamps: bool = False, # Text aggregator to aggregate incoming tokens and decide when to push to the TTS. text_aggregator: Optional[BaseTextAggregator] = None, # Types of text aggregations that should not be spoken. @@ -160,6 +162,9 @@ class TTSService(AIService): append_trailing_space: Whether to append a trailing space to text before sending to TTS. This helps prevent some TTS services from vocalizing trailing punctuation (e.g., "dot"). sample_rate: Output sample rate for generated audio. + supports_word_timestamps: Whether this service supports word-level timestamp tracking. + When True, enables synchronization of audio with spoken words so only spoken words + are added to the conversation context. text_aggregator: Custom text aggregator for processing incoming text. .. deprecated:: 0.0.95 @@ -231,6 +236,13 @@ class TTSService(AIService): self._processing_text: bool = False self._tts_contexts: Dict[str, TTSContext] = {} + # Word timestamp state (active when supports_word_timestamps=True) + self._supports_word_timestamps: bool = supports_word_timestamps + self._initial_word_timestamp: int = -1 + self._initial_word_times: List[Tuple[str, float, Optional[str]]] = [] + self._words_task: Optional[asyncio.Task] = None + self._llm_response_started: bool = False + self._register_event_handler("on_connected") self._register_event_handler("on_disconnected") self._register_event_handler("on_connection_error") @@ -366,6 +378,8 @@ class TTSService(AIService): self._sample_rate = self._init_sample_rate or frame.audio_out_sample_rate if self._push_stop_frames and not self._stop_frame_task: self._stop_frame_task = self.create_task(self._stop_frame_handler()) + if self._supports_word_timestamps: + self._create_words_task() async def stop(self, frame: EndFrame): """Stop the TTS service. @@ -377,6 +391,8 @@ class TTSService(AIService): if self._stop_frame_task: await self.cancel_task(self._stop_frame_task) self._stop_frame_task = None + if self._words_task: + await self._stop_words_task() async def cancel(self, frame: CancelFrame): """Cancel the TTS service. @@ -388,6 +404,8 @@ class TTSService(AIService): if self._stop_frame_task: await self.cancel_task(self._stop_frame_task) self._stop_frame_task = None + if self._words_task: + await self._stop_words_task() def add_text_transformer( self, @@ -492,6 +510,9 @@ class TTSService(AIService): elif isinstance(frame, InterruptionFrame): await self._handle_interruption(frame, direction) await self.push_frame(frame, direction) + elif isinstance(frame, LLMFullResponseStartFrame): + self._llm_response_started = True + await self.push_frame(frame, direction) elif isinstance(frame, (LLMFullResponseEndFrame, EndFrame)): # We pause processing incoming frames if the LLM response included # text (it might be that it's only a function calling response). We @@ -510,6 +531,9 @@ class TTSService(AIService): await self.push_frame(frame, direction) else: await self.push_frame(frame, direction) + # Flush any pending audio so the TTS service closes the current context. + if self._supports_word_timestamps: + await self.flush_audio() elif isinstance(frame, TTSSpeakFrame): # Store if we were processing text or not so we can set it back. processing_text = self._processing_text @@ -648,6 +672,10 @@ class TTSService(AIService): for filter in self._text_filters: await filter.handle_interruption() + self._llm_response_started = False + if self._supports_word_timestamps: + await self.reset_word_timestamps() + async def _maybe_pause_frame_processing(self): if self._processing_text and self._pause_frame_processing: await self.pause_processing_frames() @@ -786,25 +814,9 @@ class TTSService(AIService): await self.push_frame(TTSStoppedFrame()) has_started = False - -class WordTTSService(TTSService): - """Base class for TTS services that support word timestamps. - - Word timestamps are useful to synchronize audio with text of the spoken - words. This way only the spoken words are added to the conversation context. - """ - - def __init__(self, **kwargs): - """Initialize the Word TTS service. - - Args: - **kwargs: Additional arguments passed to the parent TTSService. - """ - super().__init__(**kwargs) - self._initial_word_timestamp = -1 - self._initial_word_times = [] - self._words_task = None - self._llm_response_started: bool = False + # + # Word timestamp methods (active when supports_word_timestamps=True) + # async def start_word_timestamps(self): """Start tracking word timestamps from the current time.""" @@ -839,55 +851,9 @@ class WordTTSService(TTSService): else: await self._add_word_timestamps(word_times_with_context) - async def start(self, frame: StartFrame): - """Start the word TTS service. - - Args: - frame: The start frame containing initialization parameters. - """ - await super().start(frame) - self._create_words_task() - - async def stop(self, frame: EndFrame): - """Stop the word TTS service. - - Args: - frame: The end frame. - """ - await super().stop(frame) - await self._stop_words_task() - - async def cancel(self, frame: CancelFrame): - """Cancel the word TTS service. - - Args: - frame: The cancel frame. - """ - await super().cancel(frame) - await self._stop_words_task() - - async def process_frame(self, frame: Frame, direction: FrameDirection): - """Process frames with word timestamp awareness. - - Args: - frame: The frame to process. - direction: The direction of frame processing. - """ - await super().process_frame(frame, direction) - - if isinstance(frame, LLMFullResponseStartFrame): - self._llm_response_started = True - elif isinstance(frame, (LLMFullResponseEndFrame, EndFrame)): - await self.flush_audio() - - async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): - await super()._handle_interruption(frame, direction) - self._llm_response_started = False - await self.reset_word_timestamps() - def _create_words_task(self): if not self._words_task: - self._words_queue = asyncio.Queue() + self._words_queue: asyncio.Queue = asyncio.Queue() self._words_task = self.create_task(self._words_task_handler()) async def _stop_words_task(self): @@ -929,6 +895,23 @@ class WordTTSService(TTSService): self._words_queue.task_done() +class WordTTSService(TTSService): + """Deprecated. Use TTSService with supports_word_timestamps=True instead. + + .. deprecated:: 0.0.104 + Word timestamp functionality has been moved to TTSService. Pass + ``supports_word_timestamps=True`` to TTSService (or any subclass) instead. + """ + + def __init__(self, **kwargs): + """Initialize the Word TTS service. + + Args: + **kwargs: Additional arguments passed to the parent TTSService. + """ + super().__init__(supports_word_timestamps=True, **kwargs) + + class WebsocketTTSService(TTSService, WebsocketService): """Base class for websocket-based TTS services. @@ -1001,10 +984,12 @@ class InterruptibleTTSService(WebsocketTTSService): self._bot_speaking = False -class WebsocketWordTTSService(WordTTSService, WebsocketService): - """Base class for websocket-based TTS services that support word timestamps. +class WebsocketWordTTSService(WebsocketTTSService): + """Deprecated. Use WebsocketTTSService with supports_word_timestamps=True instead. - Combines word timestamp functionality with websocket connectivity. + .. deprecated:: 0.0.104 + Word timestamp functionality has been moved to TTSService. Pass + ``supports_word_timestamps=True`` to WebsocketTTSService instead. """ def __init__(self, *, reconnect_on_error: bool = True, **kwargs): @@ -1014,53 +999,26 @@ class WebsocketWordTTSService(WordTTSService, WebsocketService): reconnect_on_error: Whether to automatically reconnect on websocket errors. **kwargs: Additional arguments passed to parent classes. """ - WordTTSService.__init__(self, **kwargs) - WebsocketService.__init__(self, reconnect_on_error=reconnect_on_error, **kwargs) - - async def _report_error(self, error: ErrorFrame): - await self._call_event_handler("on_connection_error", error.error) - await self.push_error_frame(error) + super().__init__( + supports_word_timestamps=True, reconnect_on_error=reconnect_on_error, **kwargs + ) -class InterruptibleWordTTSService(WebsocketWordTTSService): - """Websocket-based TTS service with word timestamps that handles interruptions. +class InterruptibleWordTTSService(InterruptibleTTSService): + """Deprecated. Use InterruptibleTTSService with supports_word_timestamps=True instead. - For TTS services that support word timestamps but can't correlate generated - audio with requested text. Handles interruptions by reconnecting when needed. + .. deprecated:: 0.0.104 + Word timestamp functionality has been moved to TTSService. Pass + ``supports_word_timestamps=True`` to InterruptibleTTSService instead. """ def __init__(self, **kwargs): """Initialize the Interruptible Word TTS service. Args: - **kwargs: Additional arguments passed to the parent WebsocketWordTTSService. + **kwargs: Additional arguments passed to the parent InterruptibleTTSService. """ - super().__init__(**kwargs) - - # Indicates if the bot is speaking. If the bot is not speaking we don't - # need to reconnect when the user speaks. If the bot is speaking and the - # user interrupts we need to reconnect. - self._bot_speaking = False - - async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): - await super()._handle_interruption(frame, direction) - if self._bot_speaking: - await self._disconnect() - await self._connect() - - async def process_frame(self, frame: Frame, direction: FrameDirection): - """Process frames with bot speaking state tracking. - - Args: - frame: The frame to process. - direction: The direction of frame processing. - """ - await super().process_frame(frame, direction) - - if isinstance(frame, BotStartedSpeakingFrame): - self._bot_speaking = True - elif isinstance(frame, BotStoppedSpeakingFrame): - self._bot_speaking = False + super().__init__(supports_word_timestamps=True, **kwargs) class AudioContextTTSService(WebsocketTTSService): @@ -1299,15 +1257,12 @@ class AudioContextTTSService(WebsocketTTSService): break -class AudioContextWordTTSService(AudioContextTTSService, WebsocketWordTTSService): - """Websocket-based TTS service with word timestamps and audio context management. +class AudioContextWordTTSService(AudioContextTTSService): + """Deprecated. Use AudioContextTTSService with supports_word_timestamps=True instead. - This is a base class for websocket-based TTS services that support word - timestamps and also allow correlating the generated audio with the requested - text through audio contexts. - - Combines the audio context management capabilities of AudioContextTTSService - with the word timestamp functionality of WebsocketWordTTSService. + .. deprecated:: 0.0.104 + Word timestamp functionality has been moved to TTSService. Pass + ``supports_word_timestamps=True`` to AudioContextTTSService instead. """ def __init__(self, *, reconnect_on_error: bool = True, **kwargs): @@ -1317,5 +1272,6 @@ class AudioContextWordTTSService(AudioContextTTSService, WebsocketWordTTSService reconnect_on_error: Whether to automatically reconnect on websocket errors. **kwargs: Additional arguments passed to parent classes. """ - AudioContextTTSService.__init__(self, reconnect_on_error=reconnect_on_error, **kwargs) - WebsocketWordTTSService.__init__(self, reconnect_on_error=reconnect_on_error, **kwargs) + super().__init__( + supports_word_timestamps=True, reconnect_on_error=reconnect_on_error, **kwargs + ) From 6cda2ff941d094af0726f970cdccd30924387ecf Mon Sep 17 00:00:00 2001 From: filipi87 Date: Tue, 24 Feb 2026 15:49:02 -0300 Subject: [PATCH 080/189] Changelog entry for word timestamp refactor and deprecation notes. --- changelog/3786.changed.md | 1 + changelog/3786.deprecated.md | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 changelog/3786.changed.md create mode 100644 changelog/3786.deprecated.md diff --git a/changelog/3786.changed.md b/changelog/3786.changed.md new file mode 100644 index 000000000..ed8e7e444 --- /dev/null +++ b/changelog/3786.changed.md @@ -0,0 +1 @@ +- Word timestamp support has been moved from `WordTTSService` into `TTSService` via a new `supports_word_timestamps` parameter. Services that previously extended `WordTTSService`, `AudioContextWordTTSService`, or `WebsocketWordTTSService` now pass `supports_word_timestamps=True` to their parent `__init__` instead. diff --git a/changelog/3786.deprecated.md b/changelog/3786.deprecated.md new file mode 100644 index 000000000..7ac5a5b9c --- /dev/null +++ b/changelog/3786.deprecated.md @@ -0,0 +1,5 @@ +- Deprecated `WordTTSService`, `WebsocketWordTTSService`, `AudioContextWordTTSService`, and `InterruptibleWordTTSService`. Use their non-word counterparts with `supports_word_timestamps=True` instead: + - `WordTTSService` → `TTSService(supports_word_timestamps=True)` + - `WebsocketWordTTSService` → `WebsocketTTSService(supports_word_timestamps=True)` + - `AudioContextWordTTSService` → `AudioContextTTSService(supports_word_timestamps=True)` + - `InterruptibleWordTTSService` → `InterruptibleTTSService(supports_word_timestamps=True)` From bcc2b4def48ef5c0c13544ab526df79fe80531d1 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 24 Feb 2026 10:52:05 -0500 Subject: [PATCH 081/189] Make clearer the distinction between "storage-mode" and "delta-mode" usage of `*Settings` objects - Storage mode: for use in `self._settings`. All fields should be specified, i.e. should not be `NOT_GIVEN`. - Delta mode: for use in `*UpdateSettingsFrame`. In service of this, this commit: - Adds a runtime check that all fields are specified in storage mode - Updates all services to specify all fields in stored settings - Updates all services to no longer check for `is_given` in stored settings (not necessary anymore) - Updates relevant docstrings - Renames `update` to `delta` in `*UpdateSettingsFrame` - Updates community integrations guide --- COMMUNITY_INTEGRATIONS.md | 8 +- changelog/3714.added.md | 2 +- changelog/3714.deprecated.2.md | 2 +- .../55a-update-settings-deepgram-flux-stt.py | 2 +- ...-update-settings-deepgram-sagemaker-stt.py | 2 +- .../55a-update-settings-deepgram-stt.py | 2 +- .../55b-update-settings-azure-stt.py | 4 +- .../55c-update-settings-google-stt.py | 2 +- .../55d-update-settings-assemblyai-stt.py | 2 +- .../55e-update-settings-gladia-stt.py | 2 +- ...update-settings-elevenlabs-realtime-stt.py | 2 +- .../55g-update-settings-elevenlabs-stt.py | 2 +- .../55h-update-settings-speechmatics-stt.py | 6 +- .../55i-update-settings-whisper-api-stt.py | 2 +- .../55j-update-settings-sarvam-stt.py | 2 +- .../55k-update-settings-soniox-stt.py | 2 +- .../55l-update-settings-aws-transcribe-stt.py | 2 +- .../55m-update-settings-cartesia-stt.py | 2 +- .../55n-update-settings-cartesia-http-tts.py | 2 +- .../55n-update-settings-cartesia-tts.py | 2 +- ...55o-update-settings-elevenlabs-http-tts.py | 2 +- .../55o-update-settings-elevenlabs-tts.py | 4 +- .../55p-update-settings-openai-tts.py | 2 +- .../55q-update-settings-deepgram-http-tts.py | 4 +- ...-update-settings-deepgram-sagemaker-tts.py | 4 +- .../55q-update-settings-deepgram-tts.py | 4 +- .../55r-update-settings-azure-http-tts.py | 2 +- .../55r-update-settings-azure-tts.py | 2 +- .../55s-update-settings-google-http-tts.py | 2 +- .../55s-update-settings-google-stream-tts.py | 2 +- .../55t-update-settings-playht-tts.py | 2 +- .../55u-update-settings-rime-http-tts.py | 2 +- .../55u-update-settings-rime-tts.py | 2 +- .../55v-update-settings-lmnt-tts.py | 2 +- .../55w-update-settings-fish-tts.py | 2 +- .../55x-update-settings-minimax-tts.py | 2 +- .../55y-update-settings-groq-tts.py | 2 +- .../55z-update-settings-hume-tts.py | 2 +- ...55za-update-settings-neuphonic-http-tts.py | 2 +- .../55za-update-settings-neuphonic-tts.py | 2 +- .../55zb-update-settings-inworld-http-tts.py | 4 +- .../55zb-update-settings-inworld-tts.py | 2 +- .../55zc-update-settings-gemini-tts.py | 2 +- .../55zd-update-settings-aws-polly-tts.py | 2 +- .../55ze-update-settings-sarvam-http-tts.py | 2 +- .../55ze-update-settings-sarvam-tts.py | 2 +- .../55zf-update-settings-camb-tts.py | 2 +- .../55zg-update-settings-hathora-tts.py | 2 +- .../55zh-update-settings-resembleai-tts.py | 2 +- .../55zi-update-settings-azure-llm.py | 2 +- .../55zi-update-settings-openai-llm.py | 2 +- .../55zj-update-settings-anthropic-llm.py | 2 +- .../55zk-update-settings-google-llm.py | 2 +- .../55zk-update-settings-google-vertex-llm.py | 2 +- .../55zl-update-settings-azure-realtime.py | 4 +- .../55zl-update-settings-openai-realtime.py | 4 +- ...55zm-update-settings-gemini-live-vertex.py | 4 +- .../55zm-update-settings-gemini-live.py | 4 +- .../55zn-update-settings-ultravox-realtime.py | 4 +- .../55zo-update-settings-grok-realtime.py | 2 +- .../55zp-update-settings-aws-bedrock-llm.py | 4 +- .../55zq-update-settings-fal-stt.py | 2 +- .../55zr-update-settings-gradium-stt.py | 2 +- .../55zs-update-settings-hathora-stt.py | 2 +- ...zt-update-settings-nvidia-segmented-stt.py | 2 +- .../55zt-update-settings-nvidia-stt.py | 2 +- ...5zu-update-settings-openai-realtime-stt.py | 2 +- .../55zv-update-settings-asyncai-http-tts.py | 2 +- .../55zv-update-settings-asyncai-tts.py | 2 +- .../55zw-update-settings-gradium-tts.py | 2 +- .../55zx-update-settings-cerebras-llm.py | 2 +- .../55zy-update-settings-deepseek-llm.py | 2 +- .../55zz-update-settings-fireworks-llm.py | 2 +- .../55zza-update-settings-grok-llm.py | 2 +- .../55zzb-update-settings-groq-llm.py | 2 +- .../55zzc-update-settings-mistral-llm.py | 2 +- .../55zzd-update-settings-nvidia-llm.py | 2 +- .../55zze-update-settings-ollama-llm.py | 2 +- .../55zzf-update-settings-openrouter-llm.py | 2 +- .../55zzg-update-settings-perplexity-llm.py | 2 +- .../55zzh-update-settings-qwen-llm.py | 2 +- .../55zzi-update-settings-sambanova-llm.py | 2 +- .../55zzj-update-settings-together-llm.py | 2 +- ...5zzk-update-settings-aws-nova-sonic-llm.py | 2 +- .../55zzl-update-settings-nvidia-tts.py | 2 +- .../55zzm-update-settings-speechmatics-tts.py | 2 +- src/pipecat/frames/frames.py | 12 +- src/pipecat/services/ai_service.py | 13 +- src/pipecat/services/anthropic/llm.py | 5 + src/pipecat/services/assemblyai/stt.py | 9 +- src/pipecat/services/asyncai/tts.py | 6 +- src/pipecat/services/aws/llm.py | 6 + src/pipecat/services/aws/nova_sonic/llm.py | 12 +- src/pipecat/services/aws/stt.py | 6 +- src/pipecat/services/aws/tts.py | 1 + src/pipecat/services/azure/stt.py | 7 +- src/pipecat/services/azure/tts.py | 1 + src/pipecat/services/cartesia/stt.py | 8 +- src/pipecat/services/cartesia/tts.py | 25 +- src/pipecat/services/deepgram/flux/stt.py | 6 +- src/pipecat/services/deepgram/stt.py | 16 +- .../services/deepgram/stt_sagemaker.py | 16 +- src/pipecat/services/deepgram/tts.py | 10 +- .../services/deepgram/tts_sagemaker.py | 7 +- src/pipecat/services/elevenlabs/stt.py | 16 +- src/pipecat/services/elevenlabs/tts.py | 32 +-- src/pipecat/services/fal/stt.py | 1 + src/pipecat/services/fish/tts.py | 8 +- src/pipecat/services/gladia/stt.py | 10 +- .../services/google/gemini_live/llm.py | 9 +- src/pipecat/services/google/llm.py | 5 + src/pipecat/services/google/stt.py | 48 ++-- src/pipecat/services/google/tts.py | 39 +-- src/pipecat/services/gradium/stt.py | 17 +- src/pipecat/services/gradium/tts.py | 9 +- src/pipecat/services/grok/realtime/llm.py | 20 +- src/pipecat/services/hume/tts.py | 5 +- src/pipecat/services/inworld/tts.py | 57 ++--- src/pipecat/services/kokoro/tts.py | 1 + src/pipecat/services/llm_service.py | 24 +- src/pipecat/services/lmnt/tts.py | 8 +- src/pipecat/services/minimax/tts.py | 16 +- src/pipecat/services/neuphonic/tts.py | 8 +- src/pipecat/services/nvidia/stt.py | 8 +- src/pipecat/services/nvidia/tts.py | 6 +- src/pipecat/services/openai/base_llm.py | 3 + src/pipecat/services/openai/realtime/llm.py | 19 +- src/pipecat/services/openai/stt.py | 10 +- .../services/openai_realtime_beta/openai.py | 19 +- src/pipecat/services/perplexity/llm.py | 12 +- src/pipecat/services/piper/tts.py | 10 +- src/pipecat/services/playht/tts.py | 10 +- src/pipecat/services/resembleai/tts.py | 1 + src/pipecat/services/rime/tts.py | 100 ++++---- src/pipecat/services/sarvam/stt.py | 34 +-- src/pipecat/services/sarvam/tts.py | 28 ++- src/pipecat/services/settings.py | 232 ++++++++++++------ src/pipecat/services/soniox/stt.py | 12 +- src/pipecat/services/speechmatics/stt.py | 31 +-- src/pipecat/services/speechmatics/tts.py | 2 + src/pipecat/services/stt_service.py | 24 +- src/pipecat/services/tts_service.py | 28 +-- src/pipecat/services/ultravox/llm.py | 18 +- src/pipecat/services/whisper/base_stt.py | 6 +- src/pipecat/services/xtts/tts.py | 1 + 145 files changed, 732 insertions(+), 587 deletions(-) diff --git a/COMMUNITY_INTEGRATIONS.md b/COMMUNITY_INTEGRATIONS.md index 642754451..e11c79f31 100644 --- a/COMMUNITY_INTEGRATIONS.md +++ b/COMMUNITY_INTEGRATIONS.md @@ -260,9 +260,11 @@ class MySTTService(STTService): _settings: MySTTSettings - def __init__(self, *, model: str, region: str, **kwargs): + def __init__(self, *, model: str, language: str, region: str, **kwargs): super().__init__(**kwargs) - self._settings = MySTTSettings(model=model, region=region) + # Initial value must be provided for every field in self._settings + # before service is started + self._settings = MySTTSettings(model=model, language=language, region=region) self._sync_model_name_to_metrics() ``` @@ -298,7 +300,7 @@ async def _update_settings(self, update: STTSettings) -> dict[str, Any]: if "language" in changed: await self._update_language() else: - # TODO: handle changes to other settings soon! + # TODO: this should be temporary - handle changes to other settings soon! self._warn_unhandled_updated_settings(changed.keys() - {"language"}) return changed diff --git a/changelog/3714.added.md b/changelog/3714.added.md index 83084675a..efa54b7d5 100644 --- a/changelog/3714.added.md +++ b/changelog/3714.added.md @@ -12,7 +12,7 @@ ```python await task.queue_frame( - STTUpdateSettingsFrame(update=DeepgramSTTSettings(language=Language.ES)) + STTUpdateSettingsFrame(delta=DeepgramSTTSettings(language=Language.ES)) ) ``` diff --git a/changelog/3714.deprecated.2.md b/changelog/3714.deprecated.2.md index 232c1dee5..d386fa5a4 100644 --- a/changelog/3714.deprecated.2.md +++ b/changelog/3714.deprecated.2.md @@ -1 +1 @@ -- Dict-based `*UpdateSettingsFrame(settings={...})` is deprecated in favor of passing typed settings delta objects with `*UpdateSettingsFrame(update={...})`. +- Dict-based `*UpdateSettingsFrame(settings={...})` is deprecated in favor of passing typed settings delta objects with `*UpdateSettingsFrame(delta={...})`. diff --git a/examples/foundational/55a-update-settings-deepgram-flux-stt.py b/examples/foundational/55a-update-settings-deepgram-flux-stt.py index d5fb66a2e..a482e513c 100644 --- a/examples/foundational/55a-update-settings-deepgram-flux-stt.py +++ b/examples/foundational/55a-update-settings-deepgram-flux-stt.py @@ -103,7 +103,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Deepgram Flux STT settings: language=es") await task.queue_frame( - STTUpdateSettingsFrame(update=DeepgramFluxSTTSettings(language=Language.ES)) + STTUpdateSettingsFrame(delta=DeepgramFluxSTTSettings(language=Language.ES)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55a-update-settings-deepgram-sagemaker-stt.py b/examples/foundational/55a-update-settings-deepgram-sagemaker-stt.py index 8e45b5f2a..05c92e7e2 100644 --- a/examples/foundational/55a-update-settings-deepgram-sagemaker-stt.py +++ b/examples/foundational/55a-update-settings-deepgram-sagemaker-stt.py @@ -109,7 +109,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Deepgram SageMaker STT settings: language=es") await task.queue_frame( - STTUpdateSettingsFrame(update=DeepgramSageMakerSTTSettings(language=Language.ES)) + STTUpdateSettingsFrame(delta=DeepgramSageMakerSTTSettings(language=Language.ES)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55a-update-settings-deepgram-stt.py b/examples/foundational/55a-update-settings-deepgram-stt.py index aea9475a8..39dde69e9 100644 --- a/examples/foundational/55a-update-settings-deepgram-stt.py +++ b/examples/foundational/55a-update-settings-deepgram-stt.py @@ -103,7 +103,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Deepgram STT settings: language=es") await task.queue_frame( - STTUpdateSettingsFrame(update=DeepgramSTTSettings(language=Language.ES)) + STTUpdateSettingsFrame(delta=DeepgramSTTSettings(language=Language.ES)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55b-update-settings-azure-stt.py b/examples/foundational/55b-update-settings-azure-stt.py index 7fd0d2ca4..96e4041d0 100644 --- a/examples/foundational/55b-update-settings-azure-stt.py +++ b/examples/foundational/55b-update-settings-azure-stt.py @@ -105,9 +105,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Azure STT settings: language=es") - await task.queue_frame( - STTUpdateSettingsFrame(update=AzureSTTSettings(language=Language.ES)) - ) + await task.queue_frame(STTUpdateSettingsFrame(delta=AzureSTTSettings(language=Language.ES))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55c-update-settings-google-stt.py b/examples/foundational/55c-update-settings-google-stt.py index dd33bfe75..dede5b173 100644 --- a/examples/foundational/55c-update-settings-google-stt.py +++ b/examples/foundational/55c-update-settings-google-stt.py @@ -103,7 +103,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Google STT settings: language=es") await task.queue_frame( - STTUpdateSettingsFrame(update=GoogleSTTSettings(language=Language.ES)) + STTUpdateSettingsFrame(delta=GoogleSTTSettings(language=Language.ES)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55d-update-settings-assemblyai-stt.py b/examples/foundational/55d-update-settings-assemblyai-stt.py index 6d6a2532e..d37c3ec7b 100644 --- a/examples/foundational/55d-update-settings-assemblyai-stt.py +++ b/examples/foundational/55d-update-settings-assemblyai-stt.py @@ -103,7 +103,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating AssemblyAI STT settings: language=es") await task.queue_frame( - STTUpdateSettingsFrame(update=AssemblyAISTTSettings(language=Language.ES)) + STTUpdateSettingsFrame(delta=AssemblyAISTTSettings(language=Language.ES)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55e-update-settings-gladia-stt.py b/examples/foundational/55e-update-settings-gladia-stt.py index a2c6f21fe..e5bd5486a 100644 --- a/examples/foundational/55e-update-settings-gladia-stt.py +++ b/examples/foundational/55e-update-settings-gladia-stt.py @@ -103,7 +103,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Gladia STT settings: language=es") await task.queue_frame( - STTUpdateSettingsFrame(update=GladiaSTTSettings(language=Language.ES)) + STTUpdateSettingsFrame(delta=GladiaSTTSettings(language=Language.ES)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55f-update-settings-elevenlabs-realtime-stt.py b/examples/foundational/55f-update-settings-elevenlabs-realtime-stt.py index 9aee04fbb..c3f0a6325 100644 --- a/examples/foundational/55f-update-settings-elevenlabs-realtime-stt.py +++ b/examples/foundational/55f-update-settings-elevenlabs-realtime-stt.py @@ -106,7 +106,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating ElevenLabs Realtime STT settings: language=es") await task.queue_frame( - STTUpdateSettingsFrame(update=ElevenLabsRealtimeSTTSettings(language=Language.ES)) + STTUpdateSettingsFrame(delta=ElevenLabsRealtimeSTTSettings(language=Language.ES)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55g-update-settings-elevenlabs-stt.py b/examples/foundational/55g-update-settings-elevenlabs-stt.py index 33844935a..9435bc1ac 100644 --- a/examples/foundational/55g-update-settings-elevenlabs-stt.py +++ b/examples/foundational/55g-update-settings-elevenlabs-stt.py @@ -108,7 +108,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating ElevenLabs STT settings: language=es") await task.queue_frame( - STTUpdateSettingsFrame(update=ElevenLabsSTTSettings(language=Language.ES)) + STTUpdateSettingsFrame(delta=ElevenLabsSTTSettings(language=Language.ES)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55h-update-settings-speechmatics-stt.py b/examples/foundational/55h-update-settings-speechmatics-stt.py index d041d69d2..c362d2f9f 100644 --- a/examples/foundational/55h-update-settings-speechmatics-stt.py +++ b/examples/foundational/55h-update-settings-speechmatics-stt.py @@ -110,13 +110,13 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Speechmatics STT settings: language=es") await task.queue_frame( - STTUpdateSettingsFrame(update=SpeechmaticsSTTSettings(language=Language.ES)) + STTUpdateSettingsFrame(delta=SpeechmaticsSTTSettings(language=Language.ES)) ) await asyncio.sleep(10) logger.info("Updating Speechmatics STT settings: focus_speakers=['S1']") await task.queue_frame( - STTUpdateSettingsFrame(update=SpeechmaticsSTTSettings(focus_speakers=["S1"])) + STTUpdateSettingsFrame(delta=SpeechmaticsSTTSettings(focus_speakers=["S1"])) ) await asyncio.sleep(10) @@ -125,7 +125,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): ) await task.queue_frame( STTUpdateSettingsFrame( - update=SpeechmaticsSTTSettings( + delta=SpeechmaticsSTTSettings( speaker_active_format="{text}" ) ) diff --git a/examples/foundational/55i-update-settings-whisper-api-stt.py b/examples/foundational/55i-update-settings-whisper-api-stt.py index 1d5022674..741601c83 100644 --- a/examples/foundational/55i-update-settings-whisper-api-stt.py +++ b/examples/foundational/55i-update-settings-whisper-api-stt.py @@ -108,7 +108,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info('Updating OpenAI STT settings: language="es"') - await task.queue_frame(STTUpdateSettingsFrame(update=BaseWhisperSTTSettings(language="es"))) + await task.queue_frame(STTUpdateSettingsFrame(delta=BaseWhisperSTTSettings(language="es"))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55j-update-settings-sarvam-stt.py b/examples/foundational/55j-update-settings-sarvam-stt.py index e39c5cb5a..cab9656f8 100644 --- a/examples/foundational/55j-update-settings-sarvam-stt.py +++ b/examples/foundational/55j-update-settings-sarvam-stt.py @@ -103,7 +103,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Sarvam STT settings: language=en-IN") await task.queue_frame( - STTUpdateSettingsFrame(update=SarvamSTTSettings(language=Language.EN_IN)) + STTUpdateSettingsFrame(delta=SarvamSTTSettings(language=Language.EN_IN)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55k-update-settings-soniox-stt.py b/examples/foundational/55k-update-settings-soniox-stt.py index 2cbcd44f4..85b5d2ba4 100644 --- a/examples/foundational/55k-update-settings-soniox-stt.py +++ b/examples/foundational/55k-update-settings-soniox-stt.py @@ -103,7 +103,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Soniox STT settings: language=es") await task.queue_frame( - STTUpdateSettingsFrame(update=SonioxSTTSettings(language=Language.ES)) + STTUpdateSettingsFrame(delta=SonioxSTTSettings(language=Language.ES)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55l-update-settings-aws-transcribe-stt.py b/examples/foundational/55l-update-settings-aws-transcribe-stt.py index 0f4c18981..3bfeb2faf 100644 --- a/examples/foundational/55l-update-settings-aws-transcribe-stt.py +++ b/examples/foundational/55l-update-settings-aws-transcribe-stt.py @@ -103,7 +103,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating AWS Transcribe STT settings: language=es") await task.queue_frame( - STTUpdateSettingsFrame(update=AWSTranscribeSTTSettings(language=Language.ES)) + STTUpdateSettingsFrame(delta=AWSTranscribeSTTSettings(language=Language.ES)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55m-update-settings-cartesia-stt.py b/examples/foundational/55m-update-settings-cartesia-stt.py index 6ba27a85e..a87847a5a 100644 --- a/examples/foundational/55m-update-settings-cartesia-stt.py +++ b/examples/foundational/55m-update-settings-cartesia-stt.py @@ -103,7 +103,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Cartesia STT settings: language=es") await task.queue_frame( - STTUpdateSettingsFrame(update=CartesiaSTTSettings(language=Language.ES)) + STTUpdateSettingsFrame(delta=CartesiaSTTSettings(language=Language.ES)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55n-update-settings-cartesia-http-tts.py b/examples/foundational/55n-update-settings-cartesia-http-tts.py index 27cee5b8f..02d3bca2a 100644 --- a/examples/foundational/55n-update-settings-cartesia-http-tts.py +++ b/examples/foundational/55n-update-settings-cartesia-http-tts.py @@ -107,7 +107,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info("Updating Cartesia HTTP TTS settings: speed increased to 1.5") await task.queue_frame( TTSUpdateSettingsFrame( - update=CartesiaTTSSettings(generation_config=GenerationConfig(speed=1.5)) + delta=CartesiaTTSSettings(generation_config=GenerationConfig(speed=1.5)) ) ) diff --git a/examples/foundational/55n-update-settings-cartesia-tts.py b/examples/foundational/55n-update-settings-cartesia-tts.py index 303c23a25..04e9d8fee 100644 --- a/examples/foundational/55n-update-settings-cartesia-tts.py +++ b/examples/foundational/55n-update-settings-cartesia-tts.py @@ -106,7 +106,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info("Updating Cartesia TTS settings: speed increased to 1.5") await task.queue_frame( TTSUpdateSettingsFrame( - update=CartesiaTTSSettings(generation_config=GenerationConfig(speed=1.5)) + delta=CartesiaTTSSettings(generation_config=GenerationConfig(speed=1.5)) ) ) diff --git a/examples/foundational/55o-update-settings-elevenlabs-http-tts.py b/examples/foundational/55o-update-settings-elevenlabs-http-tts.py index a67202702..2ca51730f 100644 --- a/examples/foundational/55o-update-settings-elevenlabs-http-tts.py +++ b/examples/foundational/55o-update-settings-elevenlabs-http-tts.py @@ -107,7 +107,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating ElevenLabs TTS settings: speed=0.7") await task.queue_frame( - TTSUpdateSettingsFrame(update=ElevenLabsHttpTTSSettings(speed=0.7)) + TTSUpdateSettingsFrame(delta=ElevenLabsHttpTTSSettings(speed=0.7)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55o-update-settings-elevenlabs-tts.py b/examples/foundational/55o-update-settings-elevenlabs-tts.py index 3fefa1ffb..ddbfd8b8f 100644 --- a/examples/foundational/55o-update-settings-elevenlabs-tts.py +++ b/examples/foundational/55o-update-settings-elevenlabs-tts.py @@ -102,13 +102,13 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating ElevenLabs TTS settings: speed=0.7") - await task.queue_frame(TTSUpdateSettingsFrame(update=ElevenLabsTTSSettings(speed=0.7))) + await task.queue_frame(TTSUpdateSettingsFrame(delta=ElevenLabsTTSSettings(speed=0.7))) await asyncio.sleep(10) logger.info("Updating ElevenLabs TTS settings: switching to a different voice") await task.queue_frame( TTSUpdateSettingsFrame( - update=ElevenLabsTTSSettings(voice=os.getenv("ELEVENLABS_VOICE_ID_ALT")) + delta=ElevenLabsTTSSettings(voice=os.getenv("ELEVENLABS_VOICE_ID_ALT")) ) ) diff --git a/examples/foundational/55p-update-settings-openai-tts.py b/examples/foundational/55p-update-settings-openai-tts.py index 5aef081fc..fcc24fb76 100644 --- a/examples/foundational/55p-update-settings-openai-tts.py +++ b/examples/foundational/55p-update-settings-openai-tts.py @@ -99,7 +99,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating OpenAI TTS settings: speed=2.0") - await task.queue_frame(TTSUpdateSettingsFrame(update=OpenAITTSSettings(speed=2.0))) + await task.queue_frame(TTSUpdateSettingsFrame(delta=OpenAITTSSettings(speed=2.0))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55q-update-settings-deepgram-http-tts.py b/examples/foundational/55q-update-settings-deepgram-http-tts.py index 64bbea587..d94bf631a 100644 --- a/examples/foundational/55q-update-settings-deepgram-http-tts.py +++ b/examples/foundational/55q-update-settings-deepgram-http-tts.py @@ -106,13 +106,13 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info('Updating Deepgram TTS settings: voice="aura-2-aries-en"') await task.queue_frame( - TTSUpdateSettingsFrame(update=DeepgramTTSSettings(voice="aura-2-aries-en")) + TTSUpdateSettingsFrame(delta=DeepgramTTSSettings(voice="aura-2-aries-en")) ) await asyncio.sleep(10) logger.info('Updating Deepgram TTS settings: voice="aura-2-luna-en"') await task.queue_frame( - TTSUpdateSettingsFrame(update=DeepgramTTSSettings(voice="aura-2-luna-en")) + TTSUpdateSettingsFrame(delta=DeepgramTTSSettings(voice="aura-2-luna-en")) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55q-update-settings-deepgram-sagemaker-tts.py b/examples/foundational/55q-update-settings-deepgram-sagemaker-tts.py index 35fb7cebe..85087d0d2 100644 --- a/examples/foundational/55q-update-settings-deepgram-sagemaker-tts.py +++ b/examples/foundational/55q-update-settings-deepgram-sagemaker-tts.py @@ -106,13 +106,13 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info('Updating Deepgram SageMaker TTS settings: voice="aura-2-aries-en"') await task.queue_frame( - TTSUpdateSettingsFrame(update=DeepgramSageMakerTTSSettings(voice="aura-2-aries-en")) + TTSUpdateSettingsFrame(delta=DeepgramSageMakerTTSSettings(voice="aura-2-aries-en")) ) await asyncio.sleep(10) logger.info('Updating Deepgram SageMaker TTS settings: voice="aura-2-luna-en"') await task.queue_frame( - TTSUpdateSettingsFrame(update=DeepgramSageMakerTTSSettings(voice="aura-2-luna-en")) + TTSUpdateSettingsFrame(delta=DeepgramSageMakerTTSSettings(voice="aura-2-luna-en")) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55q-update-settings-deepgram-tts.py b/examples/foundational/55q-update-settings-deepgram-tts.py index 9d94a50da..e205ffa73 100644 --- a/examples/foundational/55q-update-settings-deepgram-tts.py +++ b/examples/foundational/55q-update-settings-deepgram-tts.py @@ -99,13 +99,13 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info('Updating Deepgram TTS settings: voice="aura-2-aries-en"') await task.queue_frame( - TTSUpdateSettingsFrame(update=DeepgramTTSSettings(voice="aura-2-aries-en")) + TTSUpdateSettingsFrame(delta=DeepgramTTSSettings(voice="aura-2-aries-en")) ) await asyncio.sleep(10) logger.info('Updating Deepgram TTS settings: voice="aura-2-luna-en"') await task.queue_frame( - TTSUpdateSettingsFrame(update=DeepgramTTSSettings(voice="aura-2-luna-en")) + TTSUpdateSettingsFrame(delta=DeepgramTTSSettings(voice="aura-2-luna-en")) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55r-update-settings-azure-http-tts.py b/examples/foundational/55r-update-settings-azure-http-tts.py index 3132580ed..0e4df5e7c 100644 --- a/examples/foundational/55r-update-settings-azure-http-tts.py +++ b/examples/foundational/55r-update-settings-azure-http-tts.py @@ -102,7 +102,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info('Updating Azure TTS settings: rate="0.7", style="sad"') await task.queue_frame( - TTSUpdateSettingsFrame(update=AzureTTSSettings(rate="0.7", style="sad")) + TTSUpdateSettingsFrame(delta=AzureTTSSettings(rate="0.7", style="sad")) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55r-update-settings-azure-tts.py b/examples/foundational/55r-update-settings-azure-tts.py index d156eab43..a32dad5ed 100644 --- a/examples/foundational/55r-update-settings-azure-tts.py +++ b/examples/foundational/55r-update-settings-azure-tts.py @@ -102,7 +102,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info('Updating Azure TTS settings: rate="0.7", style="sad"') await task.queue_frame( - TTSUpdateSettingsFrame(update=AzureTTSSettings(rate="0.7", style="sad")) + TTSUpdateSettingsFrame(delta=AzureTTSSettings(rate="0.7", style="sad")) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55s-update-settings-google-http-tts.py b/examples/foundational/55s-update-settings-google-http-tts.py index 6c302411a..ae3070124 100644 --- a/examples/foundational/55s-update-settings-google-http-tts.py +++ b/examples/foundational/55s-update-settings-google-http-tts.py @@ -99,7 +99,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Google HTTP TTS settings: speaking_rate=1.4") await task.queue_frame( - TTSUpdateSettingsFrame(update=GoogleHttpTTSSettings(speaking_rate=1.4)) + TTSUpdateSettingsFrame(delta=GoogleHttpTTSSettings(speaking_rate=1.4)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55s-update-settings-google-stream-tts.py b/examples/foundational/55s-update-settings-google-stream-tts.py index 42e07c64b..1aba64254 100644 --- a/examples/foundational/55s-update-settings-google-stream-tts.py +++ b/examples/foundational/55s-update-settings-google-stream-tts.py @@ -99,7 +99,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Google Stream TTS settings: speaking_rate=1.4") await task.queue_frame( - TTSUpdateSettingsFrame(update=GoogleStreamTTSSettings(speaking_rate=1.4)) + TTSUpdateSettingsFrame(delta=GoogleStreamTTSSettings(speaking_rate=1.4)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55t-update-settings-playht-tts.py b/examples/foundational/55t-update-settings-playht-tts.py index ec468a81c..d79120d99 100644 --- a/examples/foundational/55t-update-settings-playht-tts.py +++ b/examples/foundational/55t-update-settings-playht-tts.py @@ -102,7 +102,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating PlayHT TTS settings: speed=1.3") - await task.queue_frame(TTSUpdateSettingsFrame(update=PlayHTTTSSettings(speed=1.3))) + await task.queue_frame(TTSUpdateSettingsFrame(delta=PlayHTTTSSettings(speed=1.3))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55u-update-settings-rime-http-tts.py b/examples/foundational/55u-update-settings-rime-http-tts.py index 7b1c9b0fe..28e58ba08 100644 --- a/examples/foundational/55u-update-settings-rime-http-tts.py +++ b/examples/foundational/55u-update-settings-rime-http-tts.py @@ -104,7 +104,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Rime TTS settings: voice=rex") - await task.queue_frame(TTSUpdateSettingsFrame(update=RimeTTSSettings(voice="rex"))) + await task.queue_frame(TTSUpdateSettingsFrame(delta=RimeTTSSettings(voice="rex"))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55u-update-settings-rime-tts.py b/examples/foundational/55u-update-settings-rime-tts.py index 0704645f5..8992cb6db 100644 --- a/examples/foundational/55u-update-settings-rime-tts.py +++ b/examples/foundational/55u-update-settings-rime-tts.py @@ -101,7 +101,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Rime TTS settings: voice=bond") - await task.queue_frame(TTSUpdateSettingsFrame(update=RimeTTSSettings(voice="bond"))) + await task.queue_frame(TTSUpdateSettingsFrame(delta=RimeTTSSettings(voice="bond"))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55v-update-settings-lmnt-tts.py b/examples/foundational/55v-update-settings-lmnt-tts.py index d98462e20..01bc15ddf 100644 --- a/examples/foundational/55v-update-settings-lmnt-tts.py +++ b/examples/foundational/55v-update-settings-lmnt-tts.py @@ -101,7 +101,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info('Updating LMNT TTS settings: voice="tyler"') - await task.queue_frame(TTSUpdateSettingsFrame(update=LmntTTSSettings(voice="tyler"))) + await task.queue_frame(TTSUpdateSettingsFrame(delta=LmntTTSSettings(voice="tyler"))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55w-update-settings-fish-tts.py b/examples/foundational/55w-update-settings-fish-tts.py index 82722ec34..72a2160ba 100644 --- a/examples/foundational/55w-update-settings-fish-tts.py +++ b/examples/foundational/55w-update-settings-fish-tts.py @@ -102,7 +102,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Fish Audio TTS settings: prosody_speed=1.5") await task.queue_frame( - TTSUpdateSettingsFrame(update=FishAudioTTSSettings(prosody_speed=1.5)) + TTSUpdateSettingsFrame(delta=FishAudioTTSSettings(prosody_speed=1.5)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55x-update-settings-minimax-tts.py b/examples/foundational/55x-update-settings-minimax-tts.py index 306b8f2bd..fdb486415 100644 --- a/examples/foundational/55x-update-settings-minimax-tts.py +++ b/examples/foundational/55x-update-settings-minimax-tts.py @@ -105,7 +105,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info('Updating MiniMax TTS settings: speed=1.5, emotion="happy"') await task.queue_frame( - TTSUpdateSettingsFrame(update=MiniMaxTTSSettings(speed=1.5, emotion="happy")) + TTSUpdateSettingsFrame(delta=MiniMaxTTSSettings(speed=1.5, emotion="happy")) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55y-update-settings-groq-tts.py b/examples/foundational/55y-update-settings-groq-tts.py index e6ce851c6..86dc1f98a 100644 --- a/examples/foundational/55y-update-settings-groq-tts.py +++ b/examples/foundational/55y-update-settings-groq-tts.py @@ -98,7 +98,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Groq TTS settings: speed=1.5") - await task.queue_frame(TTSUpdateSettingsFrame(update=GroqTTSSettings(speed=1.5))) + await task.queue_frame(TTSUpdateSettingsFrame(delta=GroqTTSSettings(speed=1.5))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55z-update-settings-hume-tts.py b/examples/foundational/55z-update-settings-hume-tts.py index 427b99bab..493550469 100644 --- a/examples/foundational/55z-update-settings-hume-tts.py +++ b/examples/foundational/55z-update-settings-hume-tts.py @@ -103,7 +103,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info('Updating Hume TTS settings: speed=2.0, description="Speak with excitement"') await task.queue_frame( TTSUpdateSettingsFrame( - update=HumeTTSSettings(speed=2.0, description="Speak with excitement") + delta=HumeTTSSettings(speed=2.0, description="Speak with excitement") ) ) diff --git a/examples/foundational/55za-update-settings-neuphonic-http-tts.py b/examples/foundational/55za-update-settings-neuphonic-http-tts.py index 056b32349..6e1d18e4a 100644 --- a/examples/foundational/55za-update-settings-neuphonic-http-tts.py +++ b/examples/foundational/55za-update-settings-neuphonic-http-tts.py @@ -103,7 +103,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Neuphonic HTTP TTS settings: speed=1.4") - await task.queue_frame(TTSUpdateSettingsFrame(update=NeuphonicTTSSettings(speed=1.4))) + await task.queue_frame(TTSUpdateSettingsFrame(delta=NeuphonicTTSSettings(speed=1.4))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55za-update-settings-neuphonic-tts.py b/examples/foundational/55za-update-settings-neuphonic-tts.py index 187594c7e..861167a20 100644 --- a/examples/foundational/55za-update-settings-neuphonic-tts.py +++ b/examples/foundational/55za-update-settings-neuphonic-tts.py @@ -98,7 +98,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Neuphonic TTS settings: speed=1.4") - await task.queue_frame(TTSUpdateSettingsFrame(update=NeuphonicTTSSettings(speed=1.4))) + await task.queue_frame(TTSUpdateSettingsFrame(delta=NeuphonicTTSSettings(speed=1.4))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zb-update-settings-inworld-http-tts.py b/examples/foundational/55zb-update-settings-inworld-http-tts.py index 933a27013..99353b87f 100644 --- a/examples/foundational/55zb-update-settings-inworld-http-tts.py +++ b/examples/foundational/55zb-update-settings-inworld-http-tts.py @@ -103,9 +103,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Inworld TTS settings: speaking_rate=1.5, temperature=0.8") await task.queue_frame( - TTSUpdateSettingsFrame( - update=InworldTTSSettings(speaking_rate=1.5, temperature=0.8) - ) + TTSUpdateSettingsFrame(delta=InworldTTSSettings(speaking_rate=1.5, temperature=0.8)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55zb-update-settings-inworld-tts.py b/examples/foundational/55zb-update-settings-inworld-tts.py index f8a66bdd8..104001c15 100644 --- a/examples/foundational/55zb-update-settings-inworld-tts.py +++ b/examples/foundational/55zb-update-settings-inworld-tts.py @@ -99,7 +99,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Inworld TTS settings: speaking_rate=1.5, temperature=0.8") await task.queue_frame( - TTSUpdateSettingsFrame(update=InworldTTSSettings(speaking_rate=1.5, temperature=0.8)) + TTSUpdateSettingsFrame(delta=InworldTTSSettings(speaking_rate=1.5, temperature=0.8)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55zc-update-settings-gemini-tts.py b/examples/foundational/55zc-update-settings-gemini-tts.py index 6af28e69f..21b678047 100644 --- a/examples/foundational/55zc-update-settings-gemini-tts.py +++ b/examples/foundational/55zc-update-settings-gemini-tts.py @@ -108,7 +108,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info('Updating Gemini TTS settings: prompt="Speak slowly and dramatically"') await task.queue_frame( - TTSUpdateSettingsFrame(update=GeminiTTSSettings(prompt="Speak slowly and dramatically")) + TTSUpdateSettingsFrame(delta=GeminiTTSSettings(prompt="Speak slowly and dramatically")) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55zd-update-settings-aws-polly-tts.py b/examples/foundational/55zd-update-settings-aws-polly-tts.py index 3d9f72cf4..4392e7b6f 100644 --- a/examples/foundational/55zd-update-settings-aws-polly-tts.py +++ b/examples/foundational/55zd-update-settings-aws-polly-tts.py @@ -98,7 +98,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info('Updating AWS Polly TTS settings: rate="fast"') - await task.queue_frame(TTSUpdateSettingsFrame(update=AWSPollyTTSSettings(rate="fast"))) + await task.queue_frame(TTSUpdateSettingsFrame(delta=AWSPollyTTSSettings(rate="fast"))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55ze-update-settings-sarvam-http-tts.py b/examples/foundational/55ze-update-settings-sarvam-http-tts.py index 0afce361a..7832a805a 100644 --- a/examples/foundational/55ze-update-settings-sarvam-http-tts.py +++ b/examples/foundational/55ze-update-settings-sarvam-http-tts.py @@ -102,7 +102,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Sarvam TTS settings: pace=1.5") - await task.queue_frame(TTSUpdateSettingsFrame(update=SarvamHttpTTSSettings(pace=1.5))) + await task.queue_frame(TTSUpdateSettingsFrame(delta=SarvamHttpTTSSettings(pace=1.5))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55ze-update-settings-sarvam-tts.py b/examples/foundational/55ze-update-settings-sarvam-tts.py index 98408c4b8..e63c6046d 100644 --- a/examples/foundational/55ze-update-settings-sarvam-tts.py +++ b/examples/foundational/55ze-update-settings-sarvam-tts.py @@ -98,7 +98,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Sarvam TTS settings: pace=1.5") - await task.queue_frame(TTSUpdateSettingsFrame(update=SarvamTTSSettings(pace=1.5))) + await task.queue_frame(TTSUpdateSettingsFrame(delta=SarvamTTSSettings(pace=1.5))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zf-update-settings-camb-tts.py b/examples/foundational/55zf-update-settings-camb-tts.py index 1fe758849..82cc4a638 100644 --- a/examples/foundational/55zf-update-settings-camb-tts.py +++ b/examples/foundational/55zf-update-settings-camb-tts.py @@ -99,7 +99,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Camb TTS settings: language -> Spanish") - await task.queue_frame(TTSUpdateSettingsFrame(update=CambTTSSettings(language=Language.ES))) + await task.queue_frame(TTSUpdateSettingsFrame(delta=CambTTSSettings(language=Language.ES))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zg-update-settings-hathora-tts.py b/examples/foundational/55zg-update-settings-hathora-tts.py index 363ac7d85..9f6b6bd0a 100644 --- a/examples/foundational/55zg-update-settings-hathora-tts.py +++ b/examples/foundational/55zg-update-settings-hathora-tts.py @@ -101,7 +101,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Hathora TTS settings: speed=1.3") - await task.queue_frame(TTSUpdateSettingsFrame(update=HathoraTTSSettings(speed=1.3))) + await task.queue_frame(TTSUpdateSettingsFrame(delta=HathoraTTSSettings(speed=1.3))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zh-update-settings-resembleai-tts.py b/examples/foundational/55zh-update-settings-resembleai-tts.py index 39b745500..44688ee25 100644 --- a/examples/foundational/55zh-update-settings-resembleai-tts.py +++ b/examples/foundational/55zh-update-settings-resembleai-tts.py @@ -103,7 +103,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info("Updating ResembleAI TTS settings: voice (changed)") await task.queue_frame( TTSUpdateSettingsFrame( - update=ResembleAITTSSettings(voice=os.getenv("RESEMBLE_VOICE_UUID_ALT")) + delta=ResembleAITTSSettings(voice=os.getenv("RESEMBLE_VOICE_UUID_ALT")) ) ) diff --git a/examples/foundational/55zi-update-settings-azure-llm.py b/examples/foundational/55zi-update-settings-azure-llm.py index 94cb723e3..43161b103 100644 --- a/examples/foundational/55zi-update-settings-azure-llm.py +++ b/examples/foundational/55zi-update-settings-azure-llm.py @@ -106,7 +106,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Azure LLM settings: temperature=0.1") - await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zi-update-settings-openai-llm.py b/examples/foundational/55zi-update-settings-openai-llm.py index a8c253bc2..d84259cc3 100644 --- a/examples/foundational/55zi-update-settings-openai-llm.py +++ b/examples/foundational/55zi-update-settings-openai-llm.py @@ -102,7 +102,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating OpenAI LLM settings: temperature=0.1") - await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zj-update-settings-anthropic-llm.py b/examples/foundational/55zj-update-settings-anthropic-llm.py index 4c8341a6a..354702880 100644 --- a/examples/foundational/55zj-update-settings-anthropic-llm.py +++ b/examples/foundational/55zj-update-settings-anthropic-llm.py @@ -101,7 +101,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Anthropic LLM settings: temperature=0.1") - await task.queue_frame(LLMUpdateSettingsFrame(update=AnthropicLLMSettings(temperature=0.1))) + await task.queue_frame(LLMUpdateSettingsFrame(delta=AnthropicLLMSettings(temperature=0.1))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zk-update-settings-google-llm.py b/examples/foundational/55zk-update-settings-google-llm.py index 140c0fccb..cd03a34cb 100644 --- a/examples/foundational/55zk-update-settings-google-llm.py +++ b/examples/foundational/55zk-update-settings-google-llm.py @@ -101,7 +101,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Google LLM settings: temperature=0.1") - await task.queue_frame(LLMUpdateSettingsFrame(update=GoogleLLMSettings(temperature=0.1))) + await task.queue_frame(LLMUpdateSettingsFrame(delta=GoogleLLMSettings(temperature=0.1))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zk-update-settings-google-vertex-llm.py b/examples/foundational/55zk-update-settings-google-vertex-llm.py index 41c0b8a37..3feba582f 100644 --- a/examples/foundational/55zk-update-settings-google-vertex-llm.py +++ b/examples/foundational/55zk-update-settings-google-vertex-llm.py @@ -106,7 +106,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Google Vertex LLM settings: temperature=0.1") - await task.queue_frame(LLMUpdateSettingsFrame(update=GoogleLLMSettings(temperature=0.1))) + await task.queue_frame(LLMUpdateSettingsFrame(delta=GoogleLLMSettings(temperature=0.1))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zl-update-settings-azure-realtime.py b/examples/foundational/55zl-update-settings-azure-realtime.py index b8f049db0..247bde14b 100644 --- a/examples/foundational/55zl-update-settings-azure-realtime.py +++ b/examples/foundational/55zl-update-settings-azure-realtime.py @@ -102,7 +102,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info("Updating Azure Realtime LLM settings: output_modalities=['text']") await task.queue_frame( LLMUpdateSettingsFrame( - update=OpenAIRealtimeLLMSettings( + delta=OpenAIRealtimeLLMSettings( session_properties=events.SessionProperties(output_modalities=["text"]) ) ) @@ -112,7 +112,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info("Updating Azure Realtime LLM settings: output_modalities=['audio']") await task.queue_frame( LLMUpdateSettingsFrame( - update=OpenAIRealtimeLLMSettings( + delta=OpenAIRealtimeLLMSettings( session_properties=events.SessionProperties(output_modalities=["audio"]) ) ) diff --git a/examples/foundational/55zl-update-settings-openai-realtime.py b/examples/foundational/55zl-update-settings-openai-realtime.py index 9c18d528e..f5c4afa26 100644 --- a/examples/foundational/55zl-update-settings-openai-realtime.py +++ b/examples/foundational/55zl-update-settings-openai-realtime.py @@ -101,7 +101,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info("Updating OpenAI Realtime LLM settings: output_modalities=['text']") await task.queue_frame( LLMUpdateSettingsFrame( - update=OpenAIRealtimeLLMSettings( + delta=OpenAIRealtimeLLMSettings( session_properties=events.SessionProperties(output_modalities=["text"]) ) ) @@ -111,7 +111,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info("Updating OpenAI Realtime LLM settings: output_modalities=['audio']") await task.queue_frame( LLMUpdateSettingsFrame( - update=OpenAIRealtimeLLMSettings( + delta=OpenAIRealtimeLLMSettings( session_properties=events.SessionProperties(output_modalities=["audio"]) ) ) diff --git a/examples/foundational/55zm-update-settings-gemini-live-vertex.py b/examples/foundational/55zm-update-settings-gemini-live-vertex.py index 575fbe090..96bd7a1c6 100644 --- a/examples/foundational/55zm-update-settings-gemini-live-vertex.py +++ b/examples/foundational/55zm-update-settings-gemini-live-vertex.py @@ -91,9 +91,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Gemini Live Vertex LLM settings: temperature=0.1") - await task.queue_frame( - LLMUpdateSettingsFrame(update=GeminiLiveLLMSettings(temperature=0.1)) - ) + await task.queue_frame(LLMUpdateSettingsFrame(delta=GeminiLiveLLMSettings(temperature=0.1))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zm-update-settings-gemini-live.py b/examples/foundational/55zm-update-settings-gemini-live.py index 8ad635fd5..a00343ac3 100644 --- a/examples/foundational/55zm-update-settings-gemini-live.py +++ b/examples/foundational/55zm-update-settings-gemini-live.py @@ -89,9 +89,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Gemini Live LLM settings: temperature=0.1") - await task.queue_frame( - LLMUpdateSettingsFrame(update=GeminiLiveLLMSettings(temperature=0.1)) - ) + await task.queue_frame(LLMUpdateSettingsFrame(delta=GeminiLiveLLMSettings(temperature=0.1))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zn-update-settings-ultravox-realtime.py b/examples/foundational/55zn-update-settings-ultravox-realtime.py index 967d40741..5bcbded6b 100644 --- a/examples/foundational/55zn-update-settings-ultravox-realtime.py +++ b/examples/foundational/55zn-update-settings-ultravox-realtime.py @@ -112,13 +112,13 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Ultravox Realtime LLM settings: output_medium=text") await task.queue_frame( - LLMUpdateSettingsFrame(update=UltravoxRealtimeLLMSettings(output_medium="text")) + LLMUpdateSettingsFrame(delta=UltravoxRealtimeLLMSettings(output_medium="text")) ) await asyncio.sleep(10) logger.info("Updating Ultravox Realtime LLM settings: output_medium=voice") await task.queue_frame( - LLMUpdateSettingsFrame(update=UltravoxRealtimeLLMSettings(output_medium="voice")) + LLMUpdateSettingsFrame(delta=UltravoxRealtimeLLMSettings(output_medium="voice")) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55zo-update-settings-grok-realtime.py b/examples/foundational/55zo-update-settings-grok-realtime.py index 7d7370f7b..9444f126a 100644 --- a/examples/foundational/55zo-update-settings-grok-realtime.py +++ b/examples/foundational/55zo-update-settings-grok-realtime.py @@ -101,7 +101,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info("Updating Grok Realtime LLM settings: voice='Rex'") await task.queue_frame( LLMUpdateSettingsFrame( - update=GrokRealtimeLLMSettings( + delta=GrokRealtimeLLMSettings( session_properties=events.SessionProperties(voice="Rex") ) ) diff --git a/examples/foundational/55zp-update-settings-aws-bedrock-llm.py b/examples/foundational/55zp-update-settings-aws-bedrock-llm.py index 1c2781e72..3d3ee8fb5 100644 --- a/examples/foundational/55zp-update-settings-aws-bedrock-llm.py +++ b/examples/foundational/55zp-update-settings-aws-bedrock-llm.py @@ -105,9 +105,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating AWS Bedrock LLM settings: temperature=0.1") - await task.queue_frame( - LLMUpdateSettingsFrame(update=AWSBedrockLLMSettings(temperature=0.1)) - ) + await task.queue_frame(LLMUpdateSettingsFrame(delta=AWSBedrockLLMSettings(temperature=0.1))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zq-update-settings-fal-stt.py b/examples/foundational/55zq-update-settings-fal-stt.py index 9792961f2..c0f0a134a 100644 --- a/examples/foundational/55zq-update-settings-fal-stt.py +++ b/examples/foundational/55zq-update-settings-fal-stt.py @@ -101,7 +101,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info('Updating Fal STT settings: task="translate"') - await task.queue_frame(STTUpdateSettingsFrame(update=FalSTTSettings(task="translate"))) + await task.queue_frame(STTUpdateSettingsFrame(delta=FalSTTSettings(task="translate"))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zr-update-settings-gradium-stt.py b/examples/foundational/55zr-update-settings-gradium-stt.py index 6a1a25c3c..636d27bd8 100644 --- a/examples/foundational/55zr-update-settings-gradium-stt.py +++ b/examples/foundational/55zr-update-settings-gradium-stt.py @@ -104,7 +104,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Gradium STT settings: delay_in_frames=5") - await task.queue_frame(STTUpdateSettingsFrame(update=GradiumSTTSettings(delay_in_frames=5))) + await task.queue_frame(STTUpdateSettingsFrame(delta=GradiumSTTSettings(delay_in_frames=5))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zs-update-settings-hathora-stt.py b/examples/foundational/55zs-update-settings-hathora-stt.py index f3aca9c89..db5ed4d2a 100644 --- a/examples/foundational/55zs-update-settings-hathora-stt.py +++ b/examples/foundational/55zs-update-settings-hathora-stt.py @@ -104,7 +104,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Hathora STT settings: language=es") await task.queue_frame( - STTUpdateSettingsFrame(update=HathoraSTTSettings(language=Language.ES)) + STTUpdateSettingsFrame(delta=HathoraSTTSettings(language=Language.ES)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55zt-update-settings-nvidia-segmented-stt.py b/examples/foundational/55zt-update-settings-nvidia-segmented-stt.py index 624da149e..60a042c5f 100644 --- a/examples/foundational/55zt-update-settings-nvidia-segmented-stt.py +++ b/examples/foundational/55zt-update-settings-nvidia-segmented-stt.py @@ -102,7 +102,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating NVIDIA Segmented STT settings: profanity_filter=True") await task.queue_frame( - STTUpdateSettingsFrame(update=NvidiaSegmentedSTTSettings(profanity_filter=True)) + STTUpdateSettingsFrame(delta=NvidiaSegmentedSTTSettings(profanity_filter=True)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55zt-update-settings-nvidia-stt.py b/examples/foundational/55zt-update-settings-nvidia-stt.py index 0e7b6a74a..415f10b12 100644 --- a/examples/foundational/55zt-update-settings-nvidia-stt.py +++ b/examples/foundational/55zt-update-settings-nvidia-stt.py @@ -103,7 +103,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating NVIDIA STT settings: language=es") await task.queue_frame( - STTUpdateSettingsFrame(update=NvidiaSTTSettings(language=Language.ES)) + STTUpdateSettingsFrame(delta=NvidiaSTTSettings(language=Language.ES)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55zu-update-settings-openai-realtime-stt.py b/examples/foundational/55zu-update-settings-openai-realtime-stt.py index 1f1592df7..2bcd35f52 100644 --- a/examples/foundational/55zu-update-settings-openai-realtime-stt.py +++ b/examples/foundational/55zu-update-settings-openai-realtime-stt.py @@ -103,7 +103,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating OpenAI Realtime STT settings: language=es") await task.queue_frame( - STTUpdateSettingsFrame(update=OpenAIRealtimeSTTSettings(language=Language.ES)) + STTUpdateSettingsFrame(delta=OpenAIRealtimeSTTSettings(language=Language.ES)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55zv-update-settings-asyncai-http-tts.py b/examples/foundational/55zv-update-settings-asyncai-http-tts.py index 206a80eed..9688f1bac 100644 --- a/examples/foundational/55zv-update-settings-asyncai-http-tts.py +++ b/examples/foundational/55zv-update-settings-asyncai-http-tts.py @@ -108,7 +108,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating AsyncAI HTTP TTS settings: language=es") await task.queue_frame( - TTSUpdateSettingsFrame(update=AsyncAITTSSettings(language=Language.ES)) + TTSUpdateSettingsFrame(delta=AsyncAITTSSettings(language=Language.ES)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55zv-update-settings-asyncai-tts.py b/examples/foundational/55zv-update-settings-asyncai-tts.py index f910e5fe3..fe096b4be 100644 --- a/examples/foundational/55zv-update-settings-asyncai-tts.py +++ b/examples/foundational/55zv-update-settings-asyncai-tts.py @@ -103,7 +103,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating AsyncAI TTS settings: language=es") await task.queue_frame( - TTSUpdateSettingsFrame(update=AsyncAITTSSettings(language=Language.ES)) + TTSUpdateSettingsFrame(delta=AsyncAITTSSettings(language=Language.ES)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55zw-update-settings-gradium-tts.py b/examples/foundational/55zw-update-settings-gradium-tts.py index 39090d5fa..d1069bfa4 100644 --- a/examples/foundational/55zw-update-settings-gradium-tts.py +++ b/examples/foundational/55zw-update-settings-gradium-tts.py @@ -103,7 +103,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info('Updating Gradium TTS settings: voice="LFZvm12tW_z0xfGo"') await task.queue_frame( - TTSUpdateSettingsFrame(update=GradiumTTSSettings(voice="LFZvm12tW_z0xfGo")) + TTSUpdateSettingsFrame(delta=GradiumTTSSettings(voice="LFZvm12tW_z0xfGo")) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55zx-update-settings-cerebras-llm.py b/examples/foundational/55zx-update-settings-cerebras-llm.py index 72aa8518d..6123487a3 100644 --- a/examples/foundational/55zx-update-settings-cerebras-llm.py +++ b/examples/foundational/55zx-update-settings-cerebras-llm.py @@ -102,7 +102,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Cerebras LLM settings: temperature=0.1") - await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zy-update-settings-deepseek-llm.py b/examples/foundational/55zy-update-settings-deepseek-llm.py index de4e4149e..60cbab30b 100644 --- a/examples/foundational/55zy-update-settings-deepseek-llm.py +++ b/examples/foundational/55zy-update-settings-deepseek-llm.py @@ -102,7 +102,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating DeepSeek LLM settings: temperature=0.1") - await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zz-update-settings-fireworks-llm.py b/examples/foundational/55zz-update-settings-fireworks-llm.py index d864cacb2..97554ae19 100644 --- a/examples/foundational/55zz-update-settings-fireworks-llm.py +++ b/examples/foundational/55zz-update-settings-fireworks-llm.py @@ -105,7 +105,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Fireworks LLM settings: temperature=0.1") - await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zza-update-settings-grok-llm.py b/examples/foundational/55zza-update-settings-grok-llm.py index dbf07f21d..8ce081e66 100644 --- a/examples/foundational/55zza-update-settings-grok-llm.py +++ b/examples/foundational/55zza-update-settings-grok-llm.py @@ -102,7 +102,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Grok LLM settings: temperature=0.1") - await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zzb-update-settings-groq-llm.py b/examples/foundational/55zzb-update-settings-groq-llm.py index 8244f611a..afde4499d 100644 --- a/examples/foundational/55zzb-update-settings-groq-llm.py +++ b/examples/foundational/55zzb-update-settings-groq-llm.py @@ -104,7 +104,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Groq LLM settings: temperature=0.1") - await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zzc-update-settings-mistral-llm.py b/examples/foundational/55zzc-update-settings-mistral-llm.py index 642eda3c5..7eba98e97 100644 --- a/examples/foundational/55zzc-update-settings-mistral-llm.py +++ b/examples/foundational/55zzc-update-settings-mistral-llm.py @@ -102,7 +102,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Mistral LLM settings: temperature=0.1") - await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zzd-update-settings-nvidia-llm.py b/examples/foundational/55zzd-update-settings-nvidia-llm.py index 5ffa0ff23..ee57a3a24 100644 --- a/examples/foundational/55zzd-update-settings-nvidia-llm.py +++ b/examples/foundational/55zzd-update-settings-nvidia-llm.py @@ -104,7 +104,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating NVIDIA LLM settings: temperature=0.1") - await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zze-update-settings-ollama-llm.py b/examples/foundational/55zze-update-settings-ollama-llm.py index ca3714943..e22719ec1 100644 --- a/examples/foundational/55zze-update-settings-ollama-llm.py +++ b/examples/foundational/55zze-update-settings-ollama-llm.py @@ -102,7 +102,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating OLLama LLM settings: temperature=0.1") - await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zzf-update-settings-openrouter-llm.py b/examples/foundational/55zzf-update-settings-openrouter-llm.py index 90606a572..fc3732192 100644 --- a/examples/foundational/55zzf-update-settings-openrouter-llm.py +++ b/examples/foundational/55zzf-update-settings-openrouter-llm.py @@ -102,7 +102,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating OpenRouter LLM settings: temperature=0.1") - await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zzg-update-settings-perplexity-llm.py b/examples/foundational/55zzg-update-settings-perplexity-llm.py index 771b1c794..f55975685 100644 --- a/examples/foundational/55zzg-update-settings-perplexity-llm.py +++ b/examples/foundational/55zzg-update-settings-perplexity-llm.py @@ -101,7 +101,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Perplexity LLM settings: temperature=0.1") - await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zzh-update-settings-qwen-llm.py b/examples/foundational/55zzh-update-settings-qwen-llm.py index 81ace2117..f31dc05a5 100644 --- a/examples/foundational/55zzh-update-settings-qwen-llm.py +++ b/examples/foundational/55zzh-update-settings-qwen-llm.py @@ -102,7 +102,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Qwen LLM settings: temperature=0.1") - await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zzi-update-settings-sambanova-llm.py b/examples/foundational/55zzi-update-settings-sambanova-llm.py index 82382a6bd..96122cc03 100644 --- a/examples/foundational/55zzi-update-settings-sambanova-llm.py +++ b/examples/foundational/55zzi-update-settings-sambanova-llm.py @@ -102,7 +102,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating SambaNova LLM settings: temperature=0.1") - await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zzj-update-settings-together-llm.py b/examples/foundational/55zzj-update-settings-together-llm.py index 1f0a0557f..710ef894a 100644 --- a/examples/foundational/55zzj-update-settings-together-llm.py +++ b/examples/foundational/55zzj-update-settings-together-llm.py @@ -105,7 +105,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating Together LLM settings: temperature=0.1") - await task.queue_frame(LLMUpdateSettingsFrame(update=OpenAILLMSettings(temperature=0.1))) + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zzk-update-settings-aws-nova-sonic-llm.py b/examples/foundational/55zzk-update-settings-aws-nova-sonic-llm.py index 1faafdbac..301270797 100644 --- a/examples/foundational/55zzk-update-settings-aws-nova-sonic-llm.py +++ b/examples/foundational/55zzk-update-settings-aws-nova-sonic-llm.py @@ -99,7 +99,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info("Updating AWS Nova Sonic LLM settings: temperature=0.1") await task.queue_frame( - LLMUpdateSettingsFrame(update=AWSNovaSonicLLMSettings(temperature=0.1)) + LLMUpdateSettingsFrame(delta=AWSNovaSonicLLMSettings(temperature=0.1)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55zzl-update-settings-nvidia-tts.py b/examples/foundational/55zzl-update-settings-nvidia-tts.py index b92651496..a8bd50dcd 100644 --- a/examples/foundational/55zzl-update-settings-nvidia-tts.py +++ b/examples/foundational/55zzl-update-settings-nvidia-tts.py @@ -100,7 +100,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info('Updating NVIDIA TTS settings: language="ES_US"') await task.queue_frame( - TTSUpdateSettingsFrame(update=NvidiaTTSSettings(language=Language.ES_US)) + TTSUpdateSettingsFrame(delta=NvidiaTTSSettings(language=Language.ES_US)) ) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/55zzm-update-settings-speechmatics-tts.py b/examples/foundational/55zzm-update-settings-speechmatics-tts.py index 36b66fe53..39ed792dd 100644 --- a/examples/foundational/55zzm-update-settings-speechmatics-tts.py +++ b/examples/foundational/55zzm-update-settings-speechmatics-tts.py @@ -104,7 +104,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await asyncio.sleep(10) logger.info('Updating Speechmatics TTS settings: voice="theo"') await task.queue_frame( - TTSUpdateSettingsFrame(update=SpeechmaticsTTSSettings(voice="theo")) + TTSUpdateSettingsFrame(delta=SpeechmaticsTTSSettings(voice="theo")) ) @transport.event_handler("on_client_disconnected") diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index 79aedb771..0d6b3f18a 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -2121,21 +2121,21 @@ class TTSStoppedFrame(ControlFrame): class ServiceUpdateSettingsFrame(ControlFrame): """Base frame for updating service settings. - Supports both a ``settings`` dict (for backward compatibility) and an - ``update`` object. When both are provided, ``update`` takes precedence. + Supports both a ``settings`` dict (for backward compatibility) and a + ``delta`` object. When both are provided, ``delta`` takes precedence. Parameters: settings: Dictionary of setting name to value mappings. .. deprecated:: 0.0.103 - Use ``update`` with a typed settings object instead. + Use ``delta`` with a typed settings object instead. - update: :class:`~pipecat.services.settings.ServiceSettings` object - describing the delta to apply. + delta: :class:`~pipecat.services.settings.ServiceSettings` delta-mode + object describing the fields to change. """ settings: Mapping[str, Any] = field(default_factory=dict) - update: Optional["ServiceSettings"] = None + delta: Optional["ServiceSettings"] = None @dataclass diff --git a/src/pipecat/services/ai_service.py b/src/pipecat/services/ai_service.py index 8b32d7222..f092c2b49 100644 --- a/src/pipecat/services/ai_service.py +++ b/src/pipecat/services/ai_service.py @@ -42,7 +42,7 @@ class AIService(FrameProcessor): **kwargs: Additional arguments passed to the parent FrameProcessor. """ super().__init__(**kwargs) - self._settings: ServiceSettings = ServiceSettings(model="") + self._settings: ServiceSettings = ServiceSettings() # Here in case subclass doesn't implement more specific settings (hopefully shouldn't happen) self._session_properties: Dict[str, Any] = {} self._tracing_enabled: bool = False self._tracing_context = None @@ -73,6 +73,7 @@ class AIService(FrameProcessor): Args: frame: The start frame containing initialization parameters. """ + self._settings.validate_complete() self._tracing_enabled = frame.enable_tracing self._tracing_context = frame.tracing_context @@ -98,10 +99,10 @@ class AIService(FrameProcessor): """ pass - async def _update_settings(self, update: ServiceSettings) -> Dict[str, Any]: - """Apply a settings update and return the changed fields. + async def _update_settings(self, delta: ServiceSettings) -> Dict[str, Any]: + """Apply a settings delta and return the changed fields. - The update is applied to ``_settings`` and a dict mapping each changed + The delta is applied to ``_settings`` and a dict mapping each changed field name to its **pre-update** value is returned. The ``model`` field is handled specially: when it changes, ``set_model_name`` is called. @@ -110,12 +111,12 @@ class AIService(FrameProcessor): to react to specific changed fields (e.g. reconnect on voice change). Args: - update: A settings delta. + delta: A delta-mode settings object. Returns: Dict mapping changed field names to their previous values. """ - changed = self._settings.apply_update(update) + changed = self._settings.apply_update(delta) if "model" in changed: self._sync_model_name_to_metrics() diff --git a/src/pipecat/services/anthropic/llm.py b/src/pipecat/services/anthropic/llm.py index e79792bb0..047159515 100644 --- a/src/pipecat/services/anthropic/llm.py +++ b/src/pipecat/services/anthropic/llm.py @@ -254,6 +254,11 @@ class AnthropicLLMService(LLMService): temperature=params.temperature, top_k=params.top_k, top_p=params.top_p, + frequency_penalty=None, + presence_penalty=None, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, thinking=params.thinking, extra=params.extra if isinstance(params.extra, dict) else {}, ) diff --git a/src/pipecat/services/assemblyai/stt.py b/src/pipecat/services/assemblyai/stt.py index 6a33b6a20..44ae123b7 100644 --- a/src/pipecat/services/assemblyai/stt.py +++ b/src/pipecat/services/assemblyai/stt.py @@ -116,6 +116,7 @@ class AssemblyAISTTService(WebsocketSTTService): self._api_key = api_key self._settings = AssemblyAISTTSettings( + model=None, language=language, connection_params=connection_params, ) @@ -186,18 +187,18 @@ class AssemblyAISTTService(WebsocketSTTService): """ return True - async def _update_settings(self, update: STTSettings) -> dict[str, Any]: - """Apply a settings update. + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta. Settings are stored but not applied to the active connection. Args: - update: A :class:`STTSettings` (or ``AssemblyAISTTSettings``) delta. + delta: A :class:`STTSettings` (or ``AssemblyAISTTSettings``) delta. Returns: Dict mapping changed field names to their previous values. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if not changed: return changed diff --git a/src/pipecat/services/asyncai/tts.py b/src/pipecat/services/asyncai/tts.py index 289c327b0..d55062c4f 100644 --- a/src/pipecat/services/asyncai/tts.py +++ b/src/pipecat/services/asyncai/tts.py @@ -176,12 +176,12 @@ class AsyncAITTSService(AudioContextTTSService): self._receive_task = None self._keepalive_task = None - async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: - """Apply a settings update. + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta. Settings are stored but not applied to the active connection. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if not changed: return changed diff --git a/src/pipecat/services/aws/llm.py b/src/pipecat/services/aws/llm.py index de994463b..34e869c69 100644 --- a/src/pipecat/services/aws/llm.py +++ b/src/pipecat/services/aws/llm.py @@ -827,6 +827,12 @@ class AWSBedrockLLMService(LLMService): max_tokens=params.max_tokens, temperature=params.temperature, top_p=params.top_p, + top_k=None, + frequency_penalty=None, + presence_penalty=None, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, latency=params.latency, additional_model_request_fields=params.additional_model_request_fields if isinstance(params.additional_model_request_fields, dict) diff --git a/src/pipecat/services/aws/nova_sonic/llm.py b/src/pipecat/services/aws/nova_sonic/llm.py index eba5cc21b..e51a1842c 100644 --- a/src/pipecat/services/aws/nova_sonic/llm.py +++ b/src/pipecat/services/aws/nova_sonic/llm.py @@ -267,6 +267,12 @@ class AWSNovaSonicLLMService(LLMService): temperature=params.temperature, max_tokens=params.max_tokens, top_p=params.top_p, + top_k=None, + frequency_penalty=None, + presence_penalty=None, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, endpointing_sensitivity=params.endpointing_sensitivity, ) self._sync_model_name_to_metrics() @@ -338,12 +344,12 @@ class AWSNovaSonicLLMService(LLMService): # settings # - async def _update_settings(self, update: AWSNovaSonicLLMSettings) -> dict[str, Any]: - """Apply a settings update. + async def _update_settings(self, delta: AWSNovaSonicLLMSettings) -> dict[str, Any]: + """Apply a settings delta. Settings are stored but not applied to the active connection. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if not changed: return changed diff --git a/src/pipecat/services/aws/stt.py b/src/pipecat/services/aws/stt.py index c53e3648c..1d8ae84f5 100644 --- a/src/pipecat/services/aws/stt.py +++ b/src/pipecat/services/aws/stt.py @@ -148,12 +148,12 @@ class AWSTranscribeSTTService(WebsocketSTTService): } return encoding_map.get(encoding, encoding) - async def _update_settings(self, update: STTSettings) -> dict[str, Any]: - """Apply a settings update. + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta. Settings are stored but not applied to the active connection. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if not changed: return changed diff --git a/src/pipecat/services/aws/tts.py b/src/pipecat/services/aws/tts.py index 4e071465d..8b06ad7e9 100644 --- a/src/pipecat/services/aws/tts.py +++ b/src/pipecat/services/aws/tts.py @@ -209,6 +209,7 @@ class AWSPollyTTSService(TTSService): self._aws_session = aioboto3.Session() self._settings = AWSPollyTTSSettings( + model=None, voice=voice_id, engine=params.engine, language=self.language_to_service_language(params.language) diff --git a/src/pipecat/services/azure/stt.py b/src/pipecat/services/azure/stt.py index d161b3829..096f236a6 100644 --- a/src/pipecat/services/azure/stt.py +++ b/src/pipecat/services/azure/stt.py @@ -110,6 +110,7 @@ class AzureSTTService(STTService): self._audio_stream = None self._speech_recognizer = None self._settings = AzureSTTSettings( + model=None, region=region, language=language_to_azure_language(language), sample_rate=sample_rate, @@ -134,12 +135,12 @@ class AzureSTTService(STTService): """ return language_to_azure_language(language) - async def _update_settings(self, update: STTSettings) -> dict[str, Any]: - """Apply a settings update. + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta. Settings are stored but not applied to the active recognizer. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) # TODO: someday we could reconnect here to apply updated settings. # Code might look something like the below: diff --git a/src/pipecat/services/azure/tts.py b/src/pipecat/services/azure/tts.py index 7672a846c..3a176055d 100644 --- a/src/pipecat/services/azure/tts.py +++ b/src/pipecat/services/azure/tts.py @@ -156,6 +156,7 @@ class AzureBaseTTSService: params = params or AzureBaseTTSService.InputParams() self._settings = AzureTTSSettings( + model=None, emphasis=params.emphasis, language=self.language_to_service_language(params.language) if params.language diff --git a/src/pipecat/services/cartesia/stt.py b/src/pipecat/services/cartesia/stt.py index 6a30f9a53..20ff04963 100644 --- a/src/pipecat/services/cartesia/stt.py +++ b/src/pipecat/services/cartesia/stt.py @@ -294,16 +294,16 @@ class CartesiaSTTService(WebsocketSTTService): await self._disconnect_websocket() - async def _update_settings(self, update: STTSettings) -> dict[str, Any]: - """Apply a settings update. + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta. Args: - update: A :class:`STTSettings` (or ``CartesiaSTTSettings``) delta. + delta: A :class:`STTSettings` (or ``CartesiaSTTSettings``) delta. Returns: Dict mapping changed field names to their previous values. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) # TODO: someday we could reconnect here to apply updated settings. # Code might look something like the below: diff --git a/src/pipecat/services/cartesia/tts.py b/src/pipecat/services/cartesia/tts.py index 3f6fe2c21..f31cc2421 100644 --- a/src/pipecat/services/cartesia/tts.py +++ b/src/pipecat/services/cartesia/tts.py @@ -28,7 +28,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven, is_given +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import AudioContextTTSService, TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.text.base_text_aggregator import BaseTextAggregator @@ -443,7 +443,7 @@ class CartesiaTTSService(AudioContextTTSService): voice_config["mode"] = "id" voice_config["id"] = self._settings.voice - if is_given(self._settings.emotion) and self._settings.emotion: + if self._settings.emotion: with warnings.catch_warnings(): warnings.simplefilter("always") warnings.warn( @@ -469,18 +469,18 @@ class CartesiaTTSService(AudioContextTTSService): "use_original_timestamps": False if self._settings.model == "sonic" else True, } - if is_given(self._settings.language) and self._settings.language: + if self._settings.language: msg["language"] = self._settings.language - if is_given(self._settings.speed) and self._settings.speed: + if self._settings.speed: msg["speed"] = self._settings.speed - if is_given(self._settings.generation_config) and self._settings.generation_config: + if self._settings.generation_config: msg["generation_config"] = self._settings.generation_config.model_dump( exclude_none=True ) - if is_given(self._settings.pronunciation_dict_id) and self._settings.pronunciation_dict_id: + if self._settings.pronunciation_dict_id: msg["pronunciation_dict_id"] = self._settings.pronunciation_dict_id return json.dumps(msg) @@ -811,7 +811,7 @@ class CartesiaHttpTTSService(TTSService): try: voice_config = {"mode": "id", "id": self._settings.voice} - if is_given(self._settings.emotion) and self._settings.emotion: + if self._settings.emotion: with warnings.catch_warnings(): warnings.simplefilter("always") warnings.warn( @@ -836,21 +836,18 @@ class CartesiaHttpTTSService(TTSService): "output_format": output_format, } - if is_given(self._settings.language) and self._settings.language: + if self._settings.language: payload["language"] = self._settings.language - if is_given(self._settings.speed) and self._settings.speed: + if self._settings.speed: payload["speed"] = self._settings.speed - if is_given(self._settings.generation_config) and self._settings.generation_config: + if self._settings.generation_config: payload["generation_config"] = self._settings.generation_config.model_dump( exclude_none=True ) - if ( - is_given(self._settings.pronunciation_dict_id) - and self._settings.pronunciation_dict_id - ): + if self._settings.pronunciation_dict_id: payload["pronunciation_dict_id"] = self._settings.pronunciation_dict_id yield TTSStartedFrame(context_id=context_id) diff --git a/src/pipecat/services/deepgram/flux/stt.py b/src/pipecat/services/deepgram/flux/stt.py index 410d272da..f0018d5c8 100644 --- a/src/pipecat/services/deepgram/flux/stt.py +++ b/src/pipecat/services/deepgram/flux/stt.py @@ -384,12 +384,12 @@ class DeepgramFluxSTTService(WebsocketSTTService): """ return True - async def _update_settings(self, update: DeepgramFluxSTTSettings) -> dict[str, Any]: - """Apply a settings update. + async def _update_settings(self, delta: DeepgramFluxSTTSettings) -> dict[str, Any]: + """Apply a settings delta. Settings are stored but not applied to the active connection. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if not changed: return changed diff --git a/src/pipecat/services/deepgram/stt.py b/src/pipecat/services/deepgram/stt.py index 28b8a211e..0792ea3c9 100644 --- a/src/pipecat/services/deepgram/stt.py +++ b/src/pipecat/services/deepgram/stt.py @@ -206,25 +206,25 @@ class DeepgramSTTService(STTService): """ return True - async def _update_settings(self, update: STTSettings) -> dict[str, Any]: - """Apply a settings update, keeping ``live_options`` in sync. + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta, keeping ``live_options`` in sync. Top-level ``model`` and ``language`` are the source of truth. When - they are given in *update* their values are propagated into + they are given in *delta* their values are propagated into ``live_options``. When only ``live_options`` is given, its ``model`` and ``language`` are propagated *up* to the top-level fields. Any change triggers a WebSocket reconnect. """ # Determine which top-level fields are explicitly provided. - model_given = isinstance(update, DeepgramSTTSettings) and is_given( - getattr(update, "model", NOT_GIVEN) + model_given = isinstance(delta, DeepgramSTTSettings) and is_given( + getattr(delta, "model", NOT_GIVEN) ) - language_given = isinstance(update, DeepgramSTTSettings) and is_given( - getattr(update, "language", NOT_GIVEN) + language_given = isinstance(delta, DeepgramSTTSettings) and is_given( + getattr(delta, "language", NOT_GIVEN) ) - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if not changed: return changed diff --git a/src/pipecat/services/deepgram/stt_sagemaker.py b/src/pipecat/services/deepgram/stt_sagemaker.py index 64bb2ba8f..3820f8d84 100644 --- a/src/pipecat/services/deepgram/stt_sagemaker.py +++ b/src/pipecat/services/deepgram/stt_sagemaker.py @@ -163,25 +163,25 @@ class DeepgramSageMakerSTTService(STTService): """ return True - async def _update_settings(self, update: STTSettings) -> dict[str, Any]: - """Apply a settings update, keeping ``live_options`` in sync. + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta, keeping ``live_options`` in sync. Top-level ``model`` and ``language`` are the source of truth. When - they are given in *update* their values are propagated into + they are given in *delta* their values are propagated into ``live_options``. When only ``live_options`` is given, its ``model`` and ``language`` are propagated *up* to the top-level fields. Any change triggers a reconnect. """ # Determine which top-level fields are explicitly provided. - model_given = isinstance(update, DeepgramSageMakerSTTSettings) and is_given( - getattr(update, "model", NOT_GIVEN) + model_given = isinstance(delta, DeepgramSageMakerSTTSettings) and is_given( + getattr(delta, "model", NOT_GIVEN) ) - language_given = isinstance(update, DeepgramSageMakerSTTSettings) and is_given( - getattr(update, "language", NOT_GIVEN) + language_given = isinstance(delta, DeepgramSageMakerSTTSettings) and is_given( + getattr(delta, "language", NOT_GIVEN) ) - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if not changed: return changed diff --git a/src/pipecat/services/deepgram/tts.py b/src/pipecat/services/deepgram/tts.py index 9216d7aa7..b3973bba2 100644 --- a/src/pipecat/services/deepgram/tts.py +++ b/src/pipecat/services/deepgram/tts.py @@ -109,6 +109,7 @@ class DeepgramTTSService(WebsocketTTSService): self._settings = DeepgramTTSSettings( model=voice, voice=voice, + language=None, encoding=encoding, ) self._sync_model_name_to_metrics() @@ -183,16 +184,16 @@ class DeepgramTTSService(WebsocketTTSService): await self._disconnect_websocket() - async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: - """Apply a settings update. + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta. Args: - update: A :class:`TTSSettings` (or ``DeepgramTTSSettings``) delta. + delta: A :class:`TTSSettings` (or ``DeepgramTTSSettings``) delta. Returns: Dict mapping changed field names to their previous values. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) # Deepgram uses voice as the model, so keep them in sync for metrics if "voice" in changed: @@ -401,6 +402,7 @@ class DeepgramHttpTTSService(TTSService): self._settings = DeepgramTTSSettings( model=voice, voice=voice, + language=None, encoding=encoding, ) self._sync_model_name_to_metrics() diff --git a/src/pipecat/services/deepgram/tts_sagemaker.py b/src/pipecat/services/deepgram/tts_sagemaker.py index 8447c96f0..798a62bf8 100644 --- a/src/pipecat/services/deepgram/tts_sagemaker.py +++ b/src/pipecat/services/deepgram/tts_sagemaker.py @@ -107,6 +107,7 @@ class DeepgramSageMakerTTSService(TTSService): self._settings = DeepgramSageMakerTTSSettings( model=voice, voice=voice, + language=None, encoding=encoding, ) self._sync_model_name_to_metrics() @@ -220,13 +221,13 @@ class DeepgramSageMakerTTSService(TTSService): logger.debug("Disconnected from Deepgram TTS on SageMaker") await self._call_event_handler("on_disconnected") - async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: - """Apply a settings update and reconnect if necessary. + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta and reconnect if necessary. Since all settings are part of the SageMaker session query string, any setting change requires reconnecting to apply the new values. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if not changed: return changed diff --git a/src/pipecat/services/elevenlabs/stt.py b/src/pipecat/services/elevenlabs/stt.py index 5ff91f597..7a821304d 100644 --- a/src/pipecat/services/elevenlabs/stt.py +++ b/src/pipecat/services/elevenlabs/stt.py @@ -302,19 +302,19 @@ class ElevenLabsSTTService(SegmentedSTTService): """ return language_to_elevenlabs_language(language) - async def _update_settings(self, update: STTSettings) -> dict[str, Any]: - """Apply a settings update. + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta. Converts language to ElevenLabs format before applying and keeps ``_model_id`` in sync with the model setting. Args: - update: A :class:`STTSettings` (or ``ElevenLabsSTTSettings``) delta. + delta: A :class:`STTSettings` (or ``ElevenLabsSTTSettings``) delta. Returns: Dict mapping changed field names to their previous values. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if "model" in changed: self._model_id = self._settings.model @@ -541,19 +541,19 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): """ return True - async def _update_settings(self, update: STTSettings) -> dict[str, Any]: - """Apply a settings update and reconnect if anything changed. + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta and reconnect if anything changed. Converts language to ElevenLabs format before applying and keeps ``_model_id`` in sync. Args: - update: A :class:`STTSettings` (or ``ElevenLabsRealtimeSTTSettings``) delta. + delta: A :class:`STTSettings` (or ``ElevenLabsRealtimeSTTSettings``) delta. Returns: Dict mapping changed field names to their previous values. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if not changed: return changed diff --git a/src/pipecat/services/elevenlabs/tts.py b/src/pipecat/services/elevenlabs/tts.py index 8d51e9dde..20d46481d 100644 --- a/src/pipecat/services/elevenlabs/tts.py +++ b/src/pipecat/services/elevenlabs/tts.py @@ -44,7 +44,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven, is_given +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import ( AudioContextTTSService, TTSService, @@ -166,7 +166,7 @@ def build_elevenlabs_voice_settings( val = ( getattr(settings, key, None) if isinstance(settings, TTSSettings) else settings.get(key) ) - if val is not None and is_given(val): + if val is not None: voice_settings[key] = val return voice_settings or None @@ -470,24 +470,24 @@ class ElevenLabsTTSService(AudioContextTTSService): voice_settings = {} for key in voice_setting_keys: val = getattr(ts, key, None) - if val is not None and is_given(val): + if val is not None: voice_settings[key] = val return voice_settings or None - async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: - """Apply a settings update, reconnecting as needed. + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta, reconnecting as needed. Uses the declarative ``URL_FIELDS`` and ``VOICE_SETTINGS_FIELDS`` sets on :class:`ElevenLabsTTSSettings` to decide whether to reconnect the WebSocket or close the current audio context. Args: - update: A :class:`TTSSettings` (or ``ElevenLabsTTSSettings``) delta. + delta: A :class:`TTSSettings` (or ``ElevenLabsTTSSettings``) delta. Returns: Dict mapping changed field names to their previous values. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if not changed: return changed @@ -967,16 +967,16 @@ class ElevenLabsHttpTTSService(TTSService): def _set_voice_settings(self): return build_elevenlabs_voice_settings(self._settings) - async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: - """Apply a settings update and rebuild voice settings. + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta and rebuild voice settings. Args: - update: A :class:`TTSSettings` (or ``ElevenLabsHttpTTSSettings``) delta. + delta: A :class:`TTSSettings` (or ``ElevenLabsHttpTTSSettings``) delta. Returns: Dict mapping changed field names to their previous values. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if changed: self._voice_settings = self._set_voice_settings() return changed @@ -1116,10 +1116,7 @@ class ElevenLabsHttpTTSService(TTSService): locator.model_dump() for locator in self._pronunciation_dictionary_locators ] - if ( - is_given(self._settings.apply_text_normalization) - and self._settings.apply_text_normalization is not None - ): + if self._settings.apply_text_normalization is not None: payload["apply_text_normalization"] = self._settings.apply_text_normalization language = self._settings.language @@ -1140,10 +1137,7 @@ class ElevenLabsHttpTTSService(TTSService): params = { "output_format": self._output_format, } - if ( - is_given(self._settings.optimize_streaming_latency) - and self._settings.optimize_streaming_latency is not None - ): + if self._settings.optimize_streaming_latency is not None: params["optimize_streaming_latency"] = self._settings.optimize_streaming_latency try: diff --git a/src/pipecat/services/fal/stt.py b/src/pipecat/services/fal/stt.py index 91b5a25c8..923c3c2ea 100644 --- a/src/pipecat/services/fal/stt.py +++ b/src/pipecat/services/fal/stt.py @@ -224,6 +224,7 @@ class FalSTTService(SegmentedSTTService): self._fal_client = fal_client.AsyncClient(key=api_key or os.getenv("FAL_KEY")) self._settings = FalSTTSettings( + model=None, language=self.language_to_service_language(params.language) if params.language else "en", diff --git a/src/pipecat/services/fish/tts.py b/src/pipecat/services/fish/tts.py index 0d84ea7ab..1927b6cac 100644 --- a/src/pipecat/services/fish/tts.py +++ b/src/pipecat/services/fish/tts.py @@ -196,18 +196,18 @@ class FishAudioTTSService(InterruptibleTTSService): """ return True - async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: - """Apply a settings update and reconnect if needed. + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta and reconnect if needed. Any change to voice or model triggers a WebSocket reconnect. Args: - update: A :class:`TTSSettings` (or ``FishAudioTTSSettings``) delta. + delta: A :class:`TTSSettings` (or ``FishAudioTTSSettings``) delta. Returns: Dict mapping changed field names to their previous values. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if changed: await self._disconnect() diff --git a/src/pipecat/services/gladia/stt.py b/src/pipecat/services/gladia/stt.py index 8d968e00f..d0a6f5a84 100644 --- a/src/pipecat/services/gladia/stt.py +++ b/src/pipecat/services/gladia/stt.py @@ -280,7 +280,7 @@ class GladiaSTTService(WebsocketSTTService): self._region = region self._url = url self._receive_task = None - self._settings = GladiaSTTSettings(model=model, input_params=params) + self._settings = GladiaSTTSettings(model=model, language=None, input_params=params) self._sync_model_name_to_metrics() # Session management @@ -379,18 +379,18 @@ class GladiaSTTService(WebsocketSTTService): await super().start(frame) await self._connect() - async def _update_settings(self, update: GladiaSTTSettings) -> dict[str, Any]: - """Apply settings update. + async def _update_settings(self, delta: GladiaSTTSettings) -> dict[str, Any]: + """Apply settings delta. Settings are stored but not applied to the active session. Args: - update: A settings delta. + delta: A settings delta. Returns: Dict mapping changed field names to their previous values. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if not changed: return changed diff --git a/src/pipecat/services/google/gemini_live/llm.py b/src/pipecat/services/google/gemini_live/llm.py index 84b06a86d..037b23bb3 100644 --- a/src/pipecat/services/google/gemini_live/llm.py +++ b/src/pipecat/services/google/gemini_live/llm.py @@ -750,6 +750,9 @@ class GeminiLiveLLMService(LLMService): temperature=params.temperature, top_k=params.top_k, top_p=params.top_p, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, modalities=params.modalities, language=self._language_code, media_resolution=params.media_resolution, @@ -806,12 +809,12 @@ class GeminiLiveLLMService(LLMService): """ return True - async def _update_settings(self, update: LLMSettings) -> dict[str, Any]: - """Apply a settings update. + async def _update_settings(self, delta: LLMSettings) -> dict[str, Any]: + """Apply a settings delta. Settings are stored but not applied to the active connection. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if not changed: return changed diff --git a/src/pipecat/services/google/llm.py b/src/pipecat/services/google/llm.py index 2004aa15a..1c6f56669 100644 --- a/src/pipecat/services/google/llm.py +++ b/src/pipecat/services/google/llm.py @@ -807,6 +807,11 @@ class GoogleLLMService(LLMService): temperature=params.temperature, top_k=params.top_k, top_p=params.top_p, + frequency_penalty=None, + presence_penalty=None, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, thinking=params.thinking, extra=params.extra if isinstance(params.extra, dict) else {}, ) diff --git a/src/pipecat/services/google/stt.py b/src/pipecat/services/google/stt.py index 72d4f12b6..e294be20a 100644 --- a/src/pipecat/services/google/stt.py +++ b/src/pipecat/services/google/stt.py @@ -554,7 +554,9 @@ class GoogleSTTService(STTService): self._client = speech_v2.SpeechAsyncClient(credentials=creds, client_options=client_options) self._settings = GoogleSTTSettings( + language=None, languages=list(params.language_list), + language_codes=None, model=params.model, use_separate_recognition_per_channel=params.use_separate_recognition_per_channel, enable_automatic_punctuation=params.enable_automatic_punctuation, @@ -597,11 +599,9 @@ class GoogleSTTService(STTService): Returns: List[str]: Google STT language code strings. """ - from pipecat.services.settings import is_given - - if is_given(self._settings.languages): + if self._settings.languages: return [self.language_to_service_language(lang) for lang in self._settings.languages] - if is_given(self._settings.language_codes): + if self._settings.language_codes: return list(self._settings.language_codes) return ["en-US"] @@ -632,8 +632,8 @@ class GoogleSTTService(STTService): logger.debug(f"Switching STT languages to: {languages}") await self._update_settings(GoogleSTTSettings(languages=list(languages))) - async def _update_settings(self, update: GoogleSTTSettings) -> dict[str, Any]: - """Apply settings update and reconnect if anything changed. + async def _update_settings(self, delta: GoogleSTTSettings) -> dict[str, Any]: + """Apply settings delta and reconnect if anything changed. Handles ``language`` from base ``set_language`` by converting it to ``languages``. Emits a deprecation warning if ``language_codes`` is @@ -641,7 +641,7 @@ class GoogleSTTService(STTService): Reconnects the stream on any change. Args: - update: A settings delta. + delta: A settings delta. Returns: Dict mapping changed field names to their previous values. @@ -649,13 +649,13 @@ class GoogleSTTService(STTService): from pipecat.services.settings import is_given # If base set_language sent a Language value, convert to languages list - if is_given(update.language): - update.languages = [update.language] + if is_given(delta.language): + delta.languages = [delta.language] # Clear language so the base class doesn't try to store it - update.language = NOT_GIVEN + delta.language = NOT_GIVEN # Warn on deprecated language_codes usage - if is_given(update.language_codes): + if is_given(delta.language_codes): with warnings.catch_warnings(): warnings.simplefilter("always") warnings.warn( @@ -665,7 +665,7 @@ class GoogleSTTService(STTService): stacklevel=2, ) - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if changed: await self._reconnect_if_needed() @@ -745,34 +745,34 @@ class GoogleSTTService(STTService): DeprecationWarning, ) # Build a settings delta from the provided options - update = GoogleSTTSettings() + delta = GoogleSTTSettings() if languages is not None: - update.languages = list(languages) + delta.languages = list(languages) if model is not None: - update.model = model + delta.model = model if enable_automatic_punctuation is not None: - update.enable_automatic_punctuation = enable_automatic_punctuation + delta.enable_automatic_punctuation = enable_automatic_punctuation if enable_spoken_punctuation is not None: - update.enable_spoken_punctuation = enable_spoken_punctuation + delta.enable_spoken_punctuation = enable_spoken_punctuation if enable_spoken_emojis is not None: - update.enable_spoken_emojis = enable_spoken_emojis + delta.enable_spoken_emojis = enable_spoken_emojis if profanity_filter is not None: - update.profanity_filter = profanity_filter + delta.profanity_filter = profanity_filter if enable_word_time_offsets is not None: - update.enable_word_time_offsets = enable_word_time_offsets + delta.enable_word_time_offsets = enable_word_time_offsets if enable_word_confidence is not None: - update.enable_word_confidence = enable_word_confidence + delta.enable_word_confidence = enable_word_confidence if enable_interim_results is not None: - update.enable_interim_results = enable_interim_results + delta.enable_interim_results = enable_interim_results if enable_voice_activity_events is not None: - update.enable_voice_activity_events = enable_voice_activity_events + delta.enable_voice_activity_events = enable_voice_activity_events if location is not None: logger.debug(f"Updating location to: {location}") self._location = location - await self._update_settings(update) + await self._update_settings(delta) async def _connect(self): """Initialize streaming recognition config and stream.""" diff --git a/src/pipecat/services/google/tts.py b/src/pipecat/services/google/tts.py index 1103c69e4..3416ee6d4 100644 --- a/src/pipecat/services/google/tts.py +++ b/src/pipecat/services/google/tts.py @@ -608,6 +608,7 @@ class GoogleHttpTTSService(TTSService): self._location = location self._settings = GoogleHttpTTSSettings( + model=None, pitch=params.pitch, rate=params.rate, speaking_rate=params.speaking_rate, @@ -688,20 +689,20 @@ class GoogleHttpTTSService(TTSService): """ return language_to_google_tts_language(language) - async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: """Override to handle speaking_rate validation. Args: - update: Settings delta. Can include 'speaking_rate' (float). + delta: Settings delta. Can include 'speaking_rate' (float). """ - if isinstance(update, GoogleHttpTTSSettings) and is_given(update.speaking_rate): - rate_value = float(update.speaking_rate) + if isinstance(delta, GoogleHttpTTSSettings) and is_given(delta.speaking_rate): + rate_value = float(delta.speaking_rate) if not (0.25 <= rate_value <= 2.0): logger.warning( f"Invalid speaking_rate value: {rate_value}. Must be between 0.25 and 2.0" ) - update.speaking_rate = NOT_GIVEN - return await super()._update_settings(update) + delta.speaking_rate = NOT_GIVEN + return await super()._update_settings(delta) def _construct_ssml(self, text: str) -> str: ssml = "" @@ -1021,6 +1022,7 @@ class GoogleTTSService(GoogleBaseTTSService): self._location = location self._settings = GoogleStreamTTSSettings( + model=None, language=self.language_to_service_language(params.language) if params.language else "en-US", @@ -1032,20 +1034,20 @@ class GoogleTTSService(GoogleBaseTTSService): credentials, credentials_path ) - async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: """Override to handle speaking_rate validation. Args: - update: Settings delta. Can include 'speaking_rate' (float). + delta: Settings delta. Can include 'speaking_rate' (float). """ - if isinstance(update, GoogleStreamTTSSettings) and is_given(update.speaking_rate): - rate_value = float(update.speaking_rate) + if isinstance(delta, GoogleStreamTTSSettings) and is_given(delta.speaking_rate): + rate_value = float(delta.speaking_rate) if not (0.25 <= rate_value <= 2.0): logger.warning( f"Invalid speaking_rate value: {rate_value}. Must be between 0.25 and 2.0" ) - update.speaking_rate = NOT_GIVEN - return await super()._update_settings(update) + delta.speaking_rate = NOT_GIVEN + return await super()._update_settings(delta) @traced_tts async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: @@ -1230,6 +1232,7 @@ class GeminiTTSService(GoogleBaseTTSService): self._location = location self._model = model self._settings = GeminiTTSSettings( + model=None, language=self.language_to_service_language(params.language) if params.language else "en-US", @@ -1267,19 +1270,19 @@ class GeminiTTSService(GoogleBaseTTSService): f"Current rate of {self.sample_rate}Hz may cause issues." ) - async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: - """Apply a settings update with voice validation. + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta with voice validation. Args: - update: Settings delta. Can include 'voice', 'prompt', etc. + delta: Settings delta. Can include 'voice', 'prompt', etc. Returns: Dict mapping changed field names to their previous values. """ - if is_given(update.voice) and update.voice not in self.AVAILABLE_VOICES: - logger.warning(f"Voice '{update.voice}' not in known voices list. Using anyway.") + if is_given(delta.voice) and delta.voice not in self.AVAILABLE_VOICES: + logger.warning(f"Voice '{delta.voice}' not in known voices list. Using anyway.") - return await super()._update_settings(update) + return await super()._update_settings(delta) @traced_tts async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: diff --git a/src/pipecat/services/gradium/stt.py b/src/pipecat/services/gradium/stt.py index 1583fac3c..4ec7bf6ff 100644 --- a/src/pipecat/services/gradium/stt.py +++ b/src/pipecat/services/gradium/stt.py @@ -28,7 +28,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven, is_given +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import GRADIUM_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.transcriptions.language import Language, resolve_language @@ -148,8 +148,9 @@ class GradiumSTTService(WebsocketSTTService): params = params or GradiumSTTService.InputParams() self._settings = GradiumSTTSettings( + model=None, language=params.language, - delay_in_frames=params.delay_in_frames if params.delay_in_frames else NOT_GIVEN, + delay_in_frames=params.delay_in_frames or None, ) self._receive_task = None @@ -171,16 +172,16 @@ class GradiumSTTService(WebsocketSTTService): """ return True - async def _update_settings(self, update: STTSettings) -> dict[str, Any]: - """Apply a settings update, sync params, and reconnect. + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta, sync params, and reconnect. Args: - update: A :class:`STTSettings` (or ``GradiumSTTSettings``) delta. + delta: A :class:`STTSettings` (or ``GradiumSTTSettings``) delta. Returns: Dict mapping changed field names to their previous values. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if not changed: return changed @@ -326,11 +327,11 @@ class GradiumSTTService(WebsocketSTTService): json_config = {} if self._json_config: json_config = json.loads(self._json_config) - if is_given(self._settings.language) and self._settings.language: + if self._settings.language: gradium_language = language_to_gradium_language(self._settings.language) if gradium_language: json_config["language"] = gradium_language - if is_given(self._settings.delay_in_frames) and self._settings.delay_in_frames: + if self._settings.delay_in_frames: json_config["delay_in_frames"] = self._settings.delay_in_frames if json_config: setup_msg["json_config"] = json_config diff --git a/src/pipecat/services/gradium/tts.py b/src/pipecat/services/gradium/tts.py index d10f6258d..703289706 100644 --- a/src/pipecat/services/gradium/tts.py +++ b/src/pipecat/services/gradium/tts.py @@ -105,6 +105,7 @@ class GradiumTTSService(AudioContextTTSService): self._settings = GradiumTTSSettings( model=model, voice=voice_id, + language=None, output_format="pcm", ) @@ -119,16 +120,16 @@ class GradiumTTSService(AudioContextTTSService): """ return True - async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: - """Apply a settings update and reconnect if voice changed. + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta and reconnect if voice changed. Args: - update: A :class:`TTSSettings` (or ``GradiumTTSSettings``) delta. + delta: A :class:`TTSSettings` (or ``GradiumTTSSettings``) delta. Returns: Dict mapping changed field names to their previous values. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if "voice" in changed: await self._disconnect() await self._connect() diff --git a/src/pipecat/services/grok/realtime/llm.py b/src/pipecat/services/grok/realtime/llm.py index 14c93c94a..4f6a62e24 100644 --- a/src/pipecat/services/grok/realtime/llm.py +++ b/src/pipecat/services/grok/realtime/llm.py @@ -151,6 +151,16 @@ class GrokRealtimeLLMService(LLMService): self.base_url = base_url self._settings = GrokRealtimeLLMSettings( + model=None, + temperature=None, + max_tokens=None, + top_p=None, + top_k=None, + frequency_penalty=None, + presence_penalty=None, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, session_properties=session_properties or events.SessionProperties(), ) @@ -358,9 +368,9 @@ class GrokRealtimeLLMService(LLMService): """ # Backward-compatible dict path: frame.settings contains SessionProperties # fields, not our Settings fields, so we construct SessionProperties - # directly. The frame.update path falls through to super, which calls + # directly. The frame.delta path falls through to super, which calls # _update_settings → our override handles the rest. - if isinstance(frame, LLMUpdateSettingsFrame) and frame.update is None: + if isinstance(frame, LLMUpdateSettingsFrame) and frame.delta is None: self._settings.session_properties = events.SessionProperties(**frame.settings) await self._send_session_update() await self.push_frame(frame, direction) @@ -463,13 +473,13 @@ class GrokRealtimeLLMService(LLMService): return await self.push_error(error_msg=f"Error sending client event: {e}", exception=e) - async def _update_settings(self, update): - """Apply a settings update, sending a session update if needed.""" + async def _update_settings(self, delta): + """Apply a settings delta, sending a session update if needed.""" # Capture current sample rates before the update replaces them. input_rate = self._get_configured_sample_rate("input") output_rate = self._get_configured_sample_rate("output") - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if "session_properties" in changed: if input_rate and output_rate: diff --git a/src/pipecat/services/hume/tts.py b/src/pipecat/services/hume/tts.py index b5a064334..4b13226cc 100644 --- a/src/pipecat/services/hume/tts.py +++ b/src/pipecat/services/hume/tts.py @@ -137,6 +137,7 @@ class HumeTTSService(TTSService): params = params or HumeTTSService.InputParams() self._settings = HumeTTSSettings( + model=None, voice=voice_id, description=params.description, speed=params.speed, @@ -210,7 +211,7 @@ class HumeTTSService(TTSService): """Runtime updates via key/value pair. .. deprecated:: 0.0.103 - Use ``TTSUpdateSettingsFrame(update=HumeTTSSettings(...))`` instead. + Use ``TTSUpdateSettingsFrame(delta=HumeTTSSettings(...))`` instead. Args: key: The name of the setting to update. Recognized keys are: @@ -224,7 +225,7 @@ class HumeTTSService(TTSService): warnings.simplefilter("always") warnings.warn( "'update_setting' is deprecated, use " - "'TTSUpdateSettingsFrame(update=HumeTTSSettings(...))' instead.", + "'TTSUpdateSettingsFrame(delta=HumeTTSSettings(...))' instead.", DeprecationWarning, stacklevel=2, ) diff --git a/src/pipecat/services/inworld/tts.py b/src/pipecat/services/inworld/tts.py index 8e457aabc..2f35dc27c 100644 --- a/src/pipecat/services/inworld/tts.py +++ b/src/pipecat/services/inworld/tts.py @@ -29,7 +29,7 @@ from pipecat import version as pipecat_version USER_AGENT = f"pipecat/{pipecat_version()}" from pydantic import BaseModel -from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven, is_given +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven try: from websockets.asyncio.client import connect as websocket_connect @@ -173,17 +173,16 @@ class InworldHttpTTSService(TTSService): self._settings = InworldTTSSettings( model=model, voice=voice_id, + language=None, audio_encoding=encoding, audio_sample_rate=0, + speaking_rate=params.speaking_rate, + temperature=params.temperature, + timestamp_transport_strategy=params.timestamp_transport_strategy, + auto_mode=None, # Not applicable for HTTP TTS + apply_text_normalization=None, # Not applicable for HTTP TTS ) - if params.temperature is not None: - self._settings.temperature = params.temperature - if params.speaking_rate is not None: - self._settings.speaking_rate = params.speaking_rate - if params.timestamp_transport_strategy is not None: - self._settings.timestamp_transport_strategy = params.timestamp_transport_strategy - self._cumulative_time = 0.0 self._sync_model_name_to_metrics() @@ -286,7 +285,7 @@ class InworldHttpTTSService(TTSService): "audioEncoding": self._settings.audio_encoding, "sampleRateHertz": self._settings.audio_sample_rate, } - if is_given(self._settings.speaking_rate): + if self._settings.speaking_rate is not None: audio_config["speakingRate"] = self._settings.speaking_rate payload = { @@ -296,12 +295,12 @@ class InworldHttpTTSService(TTSService): "audioConfig": audio_config, } - if is_given(self._settings.temperature): + if self._settings.temperature is not None: payload["temperature"] = self._settings.temperature # Use WORD timestamps for simplicity and correct spacing/capitalization payload["timestampType"] = self._timestamp_type - if is_given(self._settings.timestamp_transport_strategy): + if self._settings.timestamp_transport_strategy is not None: payload["timestampTransportStrategy"] = self._settings.timestamp_transport_strategy request_id = str(uuid.uuid4()) @@ -549,25 +548,17 @@ class InworldTTSService(AudioContextTTSService): self._settings = InworldTTSSettings( model=model, voice=voice_id, + language=None, audio_encoding=encoding, audio_sample_rate=0, + speaking_rate=params.speaking_rate, + temperature=params.temperature, + apply_text_normalization=params.apply_text_normalization, + timestamp_transport_strategy=params.timestamp_transport_strategy, + auto_mode=params.auto_mode if params.auto_mode is not None else aggregate_sentences, ) self._timestamp_type = "WORD" - if params.temperature is not None: - self._settings.temperature = params.temperature - if params.speaking_rate is not None: - self._settings.speaking_rate = params.speaking_rate - if params.apply_text_normalization is not None: - self._settings.apply_text_normalization = params.apply_text_normalization - if params.timestamp_transport_strategy is not None: - self._settings.timestamp_transport_strategy = params.timestamp_transport_strategy - - if params.auto_mode is not None: - self._settings.auto_mode = params.auto_mode - else: - self._settings.auto_mode = aggregate_sentences - self._buffer_settings = { "maxBufferDelayMs": params.max_buffer_delay_ms, "bufferCharThreshold": params.buffer_char_threshold, @@ -757,12 +748,12 @@ class InworldTTSService(AudioContextTTSService): await self._disconnect_websocket() - async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: - """Apply a settings update. + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta. Settings are stored but not applied to the active connection. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if not changed: return changed @@ -959,7 +950,7 @@ class InworldTTSService(AudioContextTTSService): "audioEncoding": self._settings.audio_encoding, "sampleRateHertz": self._settings.audio_sample_rate, } - if is_given(self._settings.speaking_rate): + if self._settings.speaking_rate is not None: audio_config["speakingRate"] = self._settings.speaking_rate create_config: Dict[str, Any] = { @@ -968,13 +959,13 @@ class InworldTTSService(AudioContextTTSService): "audioConfig": audio_config, } - if is_given(self._settings.temperature): + if self._settings.temperature is not None: create_config["temperature"] = self._settings.temperature - if is_given(self._settings.apply_text_normalization): + if self._settings.apply_text_normalization is not None: create_config["applyTextNormalization"] = self._settings.apply_text_normalization - if is_given(self._settings.auto_mode): + if self._settings.auto_mode is not None: create_config["autoMode"] = self._settings.auto_mode - if is_given(self._settings.timestamp_transport_strategy): + if self._settings.timestamp_transport_strategy is not None: create_config["timestampTransportStrategy"] = ( self._settings.timestamp_transport_strategy ) diff --git a/src/pipecat/services/kokoro/tts.py b/src/pipecat/services/kokoro/tts.py index 6e848ae87..519c565ba 100644 --- a/src/pipecat/services/kokoro/tts.py +++ b/src/pipecat/services/kokoro/tts.py @@ -144,6 +144,7 @@ class KokoroTTSService(TTSService): self._lang_code = language_to_kokoro_language(params.language) self._settings = KokoroTTSSettings( + model=None, voice=voice_id, language=language_to_kokoro_language(params.language), lang_code=language_to_kokoro_language(params.language), diff --git a/src/pipecat/services/llm_service.py b/src/pipecat/services/llm_service.py index e4f1adb45..df1bc6d08 100644 --- a/src/pipecat/services/llm_service.py +++ b/src/pipecat/services/llm_service.py @@ -59,7 +59,7 @@ from pipecat.processors.aggregators.llm_response import ( from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.processors.frame_processor import FrameDirection from pipecat.services.ai_service import AIService -from pipecat.services.settings import LLMSettings, is_given +from pipecat.services.settings import LLMSettings from pipecat.turns.user_turn_completion_mixin import UserTurnCompletionLLMServiceMixin from pipecat.utils.context.llm_context_summarization import ( LLMContextSummarizationUtil, @@ -312,19 +312,21 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): await self._cancel_sequential_runner_task() await self._cancel_summary_task() - async def _update_settings(self, update: LLMSettings) -> dict[str, Any]: - """Apply a settings update, handling turn-completion fields. + async def _update_settings(self, delta: LLMSettings) -> dict[str, Any]: + """Apply a settings delta, handling turn-completion fields. Args: - update: An LLM settings delta. + delta: An LLM settings delta. Returns: Dict mapping changed field names to their previous values. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if "filter_incomplete_user_turns" in changed: - self._filter_incomplete_user_turns = self._settings.filter_incomplete_user_turns + self._filter_incomplete_user_turns = ( + self._settings.filter_incomplete_user_turns or False + ) logger.info( f"{self}: Incomplete turn filtering " f"{'enabled' if self._filter_incomplete_user_turns else 'disabled'}" @@ -349,20 +351,20 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): elif isinstance(frame, LLMConfigureOutputFrame): self._skip_tts = frame.skip_tts elif isinstance(frame, LLMUpdateSettingsFrame): - if frame.update is not None: - await self._update_settings(frame.update) + if frame.delta is not None: + await self._update_settings(frame.delta) elif frame.settings: # Backward-compatible path: convert legacy dict to settings object. with warnings.catch_warnings(): warnings.simplefilter("always") warnings.warn( "Passing a dict via LLMUpdateSettingsFrame(settings={...}) is deprecated " - "since 0.0.103, use LLMUpdateSettingsFrame(update=LLMSettings(...)) instead.", + "since 0.0.103, use LLMUpdateSettingsFrame(delta=LLMSettings(...)) instead.", DeprecationWarning, stacklevel=2, ) - update = type(self._settings).from_mapping(frame.settings) - await self._update_settings(update) + delta = type(self._settings).from_mapping(frame.settings) + await self._update_settings(delta) elif isinstance(frame, LLMContextSummaryRequestFrame): await self._handle_summary_request(frame) diff --git a/src/pipecat/services/lmnt/tts.py b/src/pipecat/services/lmnt/tts.py index f70bfa402..b7ebb19ea 100644 --- a/src/pipecat/services/lmnt/tts.py +++ b/src/pipecat/services/lmnt/tts.py @@ -206,16 +206,16 @@ class LmntTTSService(InterruptibleTTSService): await self._disconnect_websocket() - async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: - """Apply a settings update. + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta. Args: - update: A :class:`TTSSettings` (or ``LmntTTSSettings``) delta. + delta: A :class:`TTSSettings` (or ``LmntTTSSettings``) delta. Returns: Dict mapping changed field names to their previous values. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if changed: await self._disconnect() diff --git a/src/pipecat/services/minimax/tts.py b/src/pipecat/services/minimax/tts.py index 54925f7e4..388b7ee9e 100644 --- a/src/pipecat/services/minimax/tts.py +++ b/src/pipecat/services/minimax/tts.py @@ -26,7 +26,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) -from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven, is_given +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -240,13 +240,19 @@ class MiniMaxHttpTTSService(TTSService): self._settings = MiniMaxTTSSettings( model=model, voice=voice_id, + language=None, stream=True, speed=params.speed, volume=params.volume, pitch=params.pitch, + language_boost=None, + emotion=None, + text_normalization=None, + latex_read=None, audio_bitrate=128000, audio_format="pcm", audio_channel=1, + audio_sample_rate=0, ) self._sync_model_name_to_metrics() @@ -351,11 +357,11 @@ class MiniMaxHttpTTSService(TTSService): "vol": self._settings.volume, "pitch": self._settings.pitch, } - if is_given(self._settings.emotion): + if self._settings.emotion is not None: voice_setting["emotion"] = self._settings.emotion - if is_given(self._settings.text_normalization): + if self._settings.text_normalization is not None: voice_setting["text_normalization"] = self._settings.text_normalization - if is_given(self._settings.latex_read): + if self._settings.latex_read is not None: voice_setting["latex_read"] = self._settings.latex_read # Build audio_setting dict for API @@ -374,7 +380,7 @@ class MiniMaxHttpTTSService(TTSService): "model": self._settings.model, "text": text, } - if is_given(self._settings.language_boost): + if self._settings.language_boost is not None: payload["language_boost"] = self._settings.language_boost try: diff --git a/src/pipecat/services/neuphonic/tts.py b/src/pipecat/services/neuphonic/tts.py index dd2360e4c..e076958c4 100644 --- a/src/pipecat/services/neuphonic/tts.py +++ b/src/pipecat/services/neuphonic/tts.py @@ -147,6 +147,7 @@ class NeuphonicTTSService(InterruptibleTTSService): self._api_key = api_key self._url = url self._settings = NeuphonicTTSSettings( + model=None, language=self.language_to_service_language(params.language), speed=params.speed, encoding=encoding, @@ -179,9 +180,9 @@ class NeuphonicTTSService(InterruptibleTTSService): """ return language_to_neuphonic_lang_code(language) - async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: - """Apply a settings update and reconnect with new configuration.""" - changed = await super()._update_settings(update) + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta and reconnect with new configuration.""" + changed = await super()._update_settings(delta) if changed: await self._disconnect() await self._connect() @@ -450,6 +451,7 @@ class NeuphonicHttpTTSService(TTSService): self._session = aiohttp_session self._base_url = url.rstrip("/") self._settings = NeuphonicTTSSettings( + model=None, voice=voice_id, language=self.language_to_service_language(params.language) or "en", speed=params.speed, diff --git a/src/pipecat/services/nvidia/stt.py b/src/pipecat/services/nvidia/stt.py index fd924204e..be9002b14 100644 --- a/src/pipecat/services/nvidia/stt.py +++ b/src/pipecat/services/nvidia/stt.py @@ -579,16 +579,16 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): self._config = self._create_recognition_config() logger.debug(f"Initialized NvidiaSegmentedSTTService with model: {self._settings.model}") - async def _update_settings(self, update: STTSettings) -> dict[str, Any]: - """Apply a settings update and sync internal state. + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta and sync internal state. Args: - update: A :class:`STTSettings` (or ``NvidiaSegmentedSTTSettings``) delta. + delta: A :class:`STTSettings` (or ``NvidiaSegmentedSTTSettings``) delta. Returns: Dict mapping changed field names to their previous values. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if changed: self._config = self._create_recognition_config() diff --git a/src/pipecat/services/nvidia/tts.py b/src/pipecat/services/nvidia/tts.py index ade5da63d..12bcf8c21 100644 --- a/src/pipecat/services/nvidia/tts.py +++ b/src/pipecat/services/nvidia/tts.py @@ -150,12 +150,12 @@ class NvidiaTTSService(TTSService): stacklevel=2, ) - async def _update_settings(self, update: NvidiaTTSSettings) -> dict[str, Any]: - """Apply a settings update. + async def _update_settings(self, delta: NvidiaTTSSettings) -> dict[str, Any]: + """Apply a settings delta. Settings are stored but not applied to the active connection. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if not changed: return changed # TODO: reconnect gRPC client to apply changed settings. diff --git a/src/pipecat/services/openai/base_llm.py b/src/pipecat/services/openai/base_llm.py index 960ebd6f7..9ba0583a1 100644 --- a/src/pipecat/services/openai/base_llm.py +++ b/src/pipecat/services/openai/base_llm.py @@ -144,9 +144,12 @@ class BaseOpenAILLMService(LLMService): seed=params.seed, temperature=params.temperature, top_p=params.top_p, + top_k=None, max_tokens=params.max_tokens, max_completion_tokens=params.max_completion_tokens, service_tier=params.service_tier, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, extra=params.extra if isinstance(params.extra, dict) else {}, ) self._retry_timeout_secs = retry_timeout_secs diff --git a/src/pipecat/services/openai/realtime/llm.py b/src/pipecat/services/openai/realtime/llm.py index d765fea75..39c0290fc 100644 --- a/src/pipecat/services/openai/realtime/llm.py +++ b/src/pipecat/services/openai/realtime/llm.py @@ -178,6 +178,15 @@ class OpenAIRealtimeLLMService(LLMService): self._settings = OpenAIRealtimeLLMSettings( model=model, + temperature=None, + max_tokens=None, + top_p=None, + top_k=None, + frequency_penalty=None, + presence_penalty=None, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, session_properties=session_properties or events.SessionProperties(), ) self._sync_model_name_to_metrics() @@ -415,9 +424,9 @@ class OpenAIRealtimeLLMService(LLMService): """ # Backward-compatible dict path: frame.settings contains SessionProperties # fields, not our Settings fields, so we construct SessionProperties - # directly. The frame.update path falls through to super, which calls + # directly. The frame.delta path falls through to super, which calls # _update_settings → our override handles the rest. - if isinstance(frame, LLMUpdateSettingsFrame) and frame.update is None: + if isinstance(frame, LLMUpdateSettingsFrame) and frame.delta is None: self._settings.session_properties = events.SessionProperties(**frame.settings) await self._send_session_update() await self.push_frame(frame, direction) @@ -536,9 +545,9 @@ class OpenAIRealtimeLLMService(LLMService): # treat a send-side error as fatal. await self.push_error(error_msg=f"Error sending client event: {e}", exception=e) - async def _update_settings(self, update): - """Apply a settings update, sending a session update if needed.""" - changed = await super()._update_settings(update) + async def _update_settings(self, delta): + """Apply a settings delta, sending a session update if needed.""" + changed = await super()._update_settings(delta) if "session_properties" in changed: await self._send_session_update() self._warn_unhandled_updated_settings(changed.keys() - {"session_properties"}) diff --git a/src/pipecat/services/openai/stt.py b/src/pipecat/services/openai/stt.py index 13a37a2b1..8b690d015 100644 --- a/src/pipecat/services/openai/stt.py +++ b/src/pipecat/services/openai/stt.py @@ -35,7 +35,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven, is_given +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import OPENAI_REALTIME_TTFS_P99, OPENAI_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.services.whisper.base_stt import BaseWhisperSTTService, Transcription @@ -268,19 +268,19 @@ class OpenAIRealtimeSTTService(WebsocketSTTService): """ return True - async def _update_settings(self, update: STTSettings) -> dict[str, Any]: - """Apply a settings update and send session update if needed. + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta and send session update if needed. Keeps ``_language_code`` and ``_prompt`` in sync with settings and sends a ``session.update`` to the server when the session is active. Args: - update: A :class:`STTSettings` (or ``OpenAIRealtimeSTTSettings``) delta. + delta: A :class:`STTSettings` (or ``OpenAIRealtimeSTTSettings``) delta. Returns: Dict mapping changed field names to their previous values. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if not changed: return changed diff --git a/src/pipecat/services/openai_realtime_beta/openai.py b/src/pipecat/services/openai_realtime_beta/openai.py index efac34223..6ffccfbef 100644 --- a/src/pipecat/services/openai_realtime_beta/openai.py +++ b/src/pipecat/services/openai_realtime_beta/openai.py @@ -163,6 +163,15 @@ class OpenAIRealtimeBetaLLMService(LLMService): self._settings = OpenAIRealtimeBetaLLMSettings( model=model, + temperature=None, + max_tokens=None, + top_p=None, + top_k=None, + frequency_penalty=None, + presence_penalty=None, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, session_properties=session_properties or events.SessionProperties(), ) self._sync_model_name_to_metrics() @@ -361,9 +370,9 @@ class OpenAIRealtimeBetaLLMService(LLMService): """ # Backward-compatible dict path: frame.settings contains SessionProperties # fields, not our Settings fields, so we construct SessionProperties - # directly. The frame.update path falls through to super, which calls + # directly. The frame.delta path falls through to super, which calls # _update_settings → our override handles the rest. - if isinstance(frame, LLMUpdateSettingsFrame) and frame.update is None: + if isinstance(frame, LLMUpdateSettingsFrame) and frame.delta is None: self._settings.session_properties = events.SessionProperties(**frame.settings) await self._send_session_update() await self.push_frame(frame, direction) @@ -480,9 +489,9 @@ class OpenAIRealtimeBetaLLMService(LLMService): # treat a send-side error as fatal. await self.push_error(error_msg=f"Error sending client event: {e}", exception=e) - async def _update_settings(self, update): - """Apply a settings update, sending a session update if needed.""" - changed = await super()._update_settings(update) + async def _update_settings(self, delta): + """Apply a settings delta, sending a session update if needed.""" + changed = await super()._update_settings(delta) if "session_properties" in changed: await self._send_session_update() return changed diff --git a/src/pipecat/services/perplexity/llm.py b/src/pipecat/services/perplexity/llm.py index 04f25621d..e03bace8d 100644 --- a/src/pipecat/services/perplexity/llm.py +++ b/src/pipecat/services/perplexity/llm.py @@ -11,8 +11,6 @@ an OpenAI-compatible interface. It handles Perplexity's unique token usage reporting patterns while maintaining compatibility with the Pipecat framework. """ -from openai import NOT_GIVEN - from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams from pipecat.metrics.metrics import LLMTokenUsage from pipecat.processors.aggregators.llm_context import LLMContext @@ -72,15 +70,15 @@ class PerplexityLLMService(OpenAILLMService): } # Add OpenAI-compatible parameters if they're set - if self._settings.frequency_penalty is not NOT_GIVEN: + if self._settings.frequency_penalty is not None: params["frequency_penalty"] = self._settings.frequency_penalty - if self._settings.presence_penalty is not NOT_GIVEN: + if self._settings.presence_penalty is not None: params["presence_penalty"] = self._settings.presence_penalty - if self._settings.temperature is not NOT_GIVEN: + if self._settings.temperature is not None: params["temperature"] = self._settings.temperature - if self._settings.top_p is not NOT_GIVEN: + if self._settings.top_p is not None: params["top_p"] = self._settings.top_p - if self._settings.max_tokens is not NOT_GIVEN: + if self._settings.max_tokens is not None: params["max_tokens"] = self._settings.max_tokens return params diff --git a/src/pipecat/services/piper/tts.py b/src/pipecat/services/piper/tts.py index 0b43d96d2..e6a2c6943 100644 --- a/src/pipecat/services/piper/tts.py +++ b/src/pipecat/services/piper/tts.py @@ -71,7 +71,7 @@ class PiperTTSService(TTSService): """ super().__init__(**kwargs) - self._settings = PiperTTSSettings(voice=voice_id) + self._settings = PiperTTSSettings(model=None, voice=voice_id, language=None) download_dir = download_dir or Path.cwd() @@ -96,12 +96,12 @@ class PiperTTSService(TTSService): """ return True - async def _update_settings(self, update: PiperTTSSettings) -> dict[str, Any]: - """Apply a settings update. + async def _update_settings(self, delta: PiperTTSSettings) -> dict[str, Any]: + """Apply a settings delta. Settings are stored but not applied to the active connection. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if not changed: return changed # TODO: voice changes would require re-downloading and loading the model. @@ -207,7 +207,7 @@ class PiperHttpTTSService(TTSService): self._base_url = base_url self._session = aiohttp_session - self._settings = PiperHttpTTSSettings(voice=voice_id) + self._settings = PiperHttpTTSSettings(model=None, voice=voice_id, language=None) def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. diff --git a/src/pipecat/services/playht/tts.py b/src/pipecat/services/playht/tts.py index ff34725f9..08a87209c 100644 --- a/src/pipecat/services/playht/tts.py +++ b/src/pipecat/services/playht/tts.py @@ -34,7 +34,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven, is_given +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import InterruptibleTTSService, TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -204,6 +204,7 @@ class PlayHTTTSService(InterruptibleTTSService): voice_engine=voice_engine, speed=params.speed, seed=params.seed, + playht_sample_rate=0, ) self._sync_model_name_to_metrics() @@ -215,12 +216,12 @@ class PlayHTTTSService(InterruptibleTTSService): """ return True - async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: - """Apply a settings update. + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta. Settings are stored but not applied to the active connection. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if not changed: return changed @@ -569,6 +570,7 @@ class PlayHTHttpTTSService(TTSService): voice_engine=voice_engine, speed=params.speed, seed=params.seed, + playht_sample_rate=0, ) self._sync_model_name_to_metrics() diff --git a/src/pipecat/services/resembleai/tts.py b/src/pipecat/services/resembleai/tts.py index 177a4c10e..b1b8d1de8 100644 --- a/src/pipecat/services/resembleai/tts.py +++ b/src/pipecat/services/resembleai/tts.py @@ -100,6 +100,7 @@ class ResembleAITTSService(AudioContextTTSService): self._api_key = api_key self._url = url self._settings = ResembleAITTSSettings( + model=None, voice=voice_id, precision=precision, output_format=output_format, diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index 6e795cc3d..6c3ff4e23 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -31,7 +31,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven, is_given +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import ( AudioContextTTSService, InterruptibleTTSService, @@ -233,27 +233,19 @@ class RimeTTSService(AudioContextTTSService): samplingRate=0, # updated in start() language=self.language_to_service_language(params.language) if params.language - else NOT_GIVEN, - segment=params.segment if params.segment is not None else NOT_GIVEN, + else None, + segment=params.segment, # Arcana params - repetition_penalty=params.repetition_penalty - if params.repetition_penalty is not None - else NOT_GIVEN, - temperature=params.temperature if params.temperature is not None else NOT_GIVEN, - top_p=params.top_p if params.top_p is not None else NOT_GIVEN, + repetition_penalty=params.repetition_penalty, + temperature=params.temperature, + top_p=params.top_p, # Mistv2 params - speedAlpha=params.speed_alpha if params.speed_alpha is not None else NOT_GIVEN, - reduceLatency=params.reduce_latency if params.reduce_latency is not None else NOT_GIVEN, - pauseBetweenBrackets=params.pause_between_brackets - if params.pause_between_brackets is not None - else NOT_GIVEN, - phonemizeBetweenBrackets=params.phonemize_between_brackets - if params.phonemize_between_brackets is not None - else NOT_GIVEN, - noTextNormalization=params.no_text_normalization - if params.no_text_normalization is not None - else NOT_GIVEN, - saveOovs=params.save_oovs if params.save_oovs is not None else NOT_GIVEN, + speedAlpha=params.speed_alpha, + reduceLatency=params.reduce_latency, + pauseBetweenBrackets=params.pause_between_brackets, + phonemizeBetweenBrackets=params.phonemize_between_brackets, + noTextNormalization=params.no_text_normalization, + saveOovs=params.save_oovs, ) self._sync_model_name_to_metrics() @@ -295,32 +287,32 @@ class RimeTTSService(AudioContextTTSService): "audioFormat": self._settings.audioFormat, "samplingRate": self._settings.samplingRate, } - if is_given(self._settings.language): + if self._settings.language is not None: params["lang"] = self._settings.language - if is_given(self._settings.segment): + if self._settings.segment is not None: params["segment"] = self._settings.segment if self._settings.model == "arcana": - if is_given(self._settings.repetition_penalty): + if self._settings.repetition_penalty is not None: params["repetition_penalty"] = self._settings.repetition_penalty - if is_given(self._settings.temperature): + if self._settings.temperature is not None: params["temperature"] = self._settings.temperature - if is_given(self._settings.top_p): + if self._settings.top_p is not None: params["top_p"] = self._settings.top_p else: # mistv2/mist - if is_given(self._settings.speedAlpha): + if self._settings.speedAlpha is not None: params["speedAlpha"] = self._settings.speedAlpha - if is_given(self._settings.reduceLatency): + if self._settings.reduceLatency is not None: params["reduceLatency"] = self._settings.reduceLatency - if is_given(self._settings.pauseBetweenBrackets): + if self._settings.pauseBetweenBrackets is not None: params["pauseBetweenBrackets"] = json.dumps(self._settings.pauseBetweenBrackets) - if is_given(self._settings.phonemizeBetweenBrackets): + if self._settings.phonemizeBetweenBrackets is not None: params["phonemizeBetweenBrackets"] = json.dumps( self._settings.phonemizeBetweenBrackets ) - if is_given(self._settings.noTextNormalization): + if self._settings.noTextNormalization is not None: params["noTextNormalization"] = json.dumps(self._settings.noTextNormalization) - if is_given(self._settings.saveOovs): + if self._settings.saveOovs is not None: params["saveOovs"] = json.dumps(self._settings.saveOovs) return params @@ -350,13 +342,13 @@ class RimeTTSService(AudioContextTTSService): self._extra_msg_fields["inlineSpeedAlpha"] = ",".join(speed_vals + [str(speed)]) return f"[{text}]" - async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: - """Apply a settings update and reconnect if necessary. + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta and reconnect if necessary. Since all settings are WebSocket URL query parameters, any setting change requires reconnecting to apply the new values. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if changed and self._websocket: await self._disconnect() @@ -665,11 +657,19 @@ class RimeHttpTTSService(TTSService): language=self.language_to_service_language(params.language) if params.language else "eng", + audioFormat="pcm", + samplingRate=0, + segment=None, speedAlpha=params.speed_alpha, reduceLatency=params.reduce_latency, pauseBetweenBrackets=params.pause_between_brackets, phonemizeBetweenBrackets=params.phonemize_between_brackets, - inlineSpeedAlpha=params.inline_speed_alpha if params.inline_speed_alpha else NOT_GIVEN, + noTextNormalization=None, + saveOovs=None, + inlineSpeedAlpha=params.inline_speed_alpha if params.inline_speed_alpha else None, + repetition_penalty=None, + temperature=None, + top_p=None, voice=voice_id, ) self._sync_model_name_to_metrics() @@ -719,7 +719,7 @@ class RimeHttpTTSService(TTSService): "pauseBetweenBrackets": self._settings.pauseBetweenBrackets, "phonemizeBetweenBrackets": self._settings.phonemizeBetweenBrackets, } - if is_given(self._settings.inlineSpeedAlpha): + if self._settings.inlineSpeedAlpha is not None: payload["inlineSpeedAlpha"] = self._settings.inlineSpeedAlpha payload["text"] = text payload["speaker"] = self._settings.voice @@ -846,13 +846,11 @@ class RimeNonJsonTTSService(InterruptibleTTSService): samplingRate=sample_rate, language=self.language_to_service_language(params.language) if params.language - else NOT_GIVEN, - segment=params.segment if params.segment is not None else NOT_GIVEN, - repetition_penalty=params.repetition_penalty - if params.repetition_penalty is not None - else NOT_GIVEN, - temperature=params.temperature if params.temperature is not None else NOT_GIVEN, - top_p=params.top_p if params.top_p is not None else NOT_GIVEN, + else None, + segment=params.segment, + repetition_penalty=params.repetition_penalty, + temperature=params.temperature, + top_p=params.top_p, ) self._sync_model_name_to_metrics() # Add any extra parameters for future compatibility @@ -940,15 +938,15 @@ class RimeNonJsonTTSService(InterruptibleTTSService): "audioFormat": self._settings.audioFormat, "samplingRate": self._settings.samplingRate, } - if is_given(self._settings.language): + if self._settings.language is not None: settings_dict["lang"] = self._settings.language - if is_given(self._settings.segment): + if self._settings.segment is not None: settings_dict["segment"] = self._settings.segment - if is_given(self._settings.repetition_penalty): + if self._settings.repetition_penalty is not None: settings_dict["repetition_penalty"] = self._settings.repetition_penalty - if is_given(self._settings.temperature): + if self._settings.temperature is not None: settings_dict["temperature"] = self._settings.temperature - if is_given(self._settings.top_p): + if self._settings.top_p is not None: settings_dict["top_p"] = self._settings.top_p # Include extras settings_dict.update(self._settings.extra) @@ -1046,13 +1044,13 @@ class RimeNonJsonTTSService(InterruptibleTTSService): except Exception as e: yield ErrorFrame(error=f"Unknown error occurred: {e}") - async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: - """Apply a settings update and reconnect if necessary. + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta and reconnect if necessary. Since all settings are WebSocket URL query parameters, any setting change requires reconnecting to apply the new values. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if changed: logger.debug("Settings changed, reconnecting WebSocket with new parameters") diff --git a/src/pipecat/services/sarvam/stt.py b/src/pipecat/services/sarvam/stt.py index 0128c1a22..02d6e250f 100644 --- a/src/pipecat/services/sarvam/stt.py +++ b/src/pipecat/services/sarvam/stt.py @@ -274,8 +274,8 @@ class SarvamSTTService(STTService): self._settings = SarvamSTTSettings( model=model, language=params.language, - prompt=params.prompt if params.prompt is not None else NOT_GIVEN, - mode=mode if mode is not None else NOT_GIVEN, + prompt=params.prompt, + mode=mode, vad_signals=params.vad_signals, high_vad_sensitivity=params.high_vad_sensitivity, ) @@ -329,11 +329,11 @@ class SarvamSTTService(STTService): if self._socket_client: await self._socket_client.flush() - async def _update_settings(self, update: STTSettings) -> dict[str, Any]: - """Apply a settings update, validate, sync state, and reconnect. + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta, validate, sync state, and reconnect. Args: - update: A :class:`STTSettings` (or ``SarvamSTTSettings``) delta. + delta: A :class:`STTSettings` (or ``SarvamSTTSettings``) delta. Returns: Dict mapping changed field names to their previous values. @@ -342,26 +342,26 @@ class SarvamSTTService(STTService): ValueError: If a setting is not supported by the current model. """ # Validate against model capabilities before applying - if is_given(update.language) and update.language is not None: + if is_given(delta.language) and delta.language is not None: if not self._config.supports_language: raise ValueError( f"Model '{self._settings.model}' does not support language parameter " "(auto-detects language)." ) - if isinstance(update, SarvamSTTSettings): - if is_given(update.prompt) and update.prompt is not None: + if isinstance(delta, SarvamSTTSettings): + if is_given(delta.prompt) and delta.prompt is not None: if not self._config.supports_prompt: raise ValueError( f"Model '{self._settings.model}' does not support prompt parameter." ) - if is_given(update.mode) and update.mode is not None: + if is_given(delta.mode) and delta.mode is not None: if not self._config.supports_mode: raise ValueError( f"Model '{self._settings.model}' does not support mode parameter." ) - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) # TODO: someday we could reconnect here to apply updated settings. # Code might look something like the below: @@ -510,16 +510,12 @@ class SarvamSTTService(STTService): connect_kwargs["language_code"] = language_string # Add mode for models that support it - if self._config.supports_mode and is_given(self._settings.mode): + if self._config.supports_mode and self._settings.mode is not None: connect_kwargs["mode"] = self._settings.mode # Prompt support differs across sarvamai versions. Prefer connect-time prompt # when available and gracefully degrade if the SDK doesn't accept it. - if ( - is_given(self._settings.prompt) - and self._settings.prompt is not None - and self._config.supports_prompt - ): + if self._settings.prompt is not None and self._config.supports_prompt: connect_kwargs["prompt"] = self._settings.prompt def _connect_with_sdk_headers(connect_fn, **kwargs): @@ -561,11 +557,7 @@ class SarvamSTTService(STTService): self._socket_client = await self._websocket_context.__aenter__() # Fallback for SDKs that support runtime prompt updates. - if ( - is_given(self._settings.prompt) - and self._settings.prompt is not None - and self._config.supports_prompt - ): + if self._settings.prompt is not None and self._config.supports_prompt: prompt_setter = getattr(self._socket_client, "set_prompt", None) if callable(prompt_setter): await prompt_setter(self._settings.prompt) diff --git a/src/pipecat/services/sarvam/tts.py b/src/pipecat/services/sarvam/tts.py index 191689f5a..45c283ff1 100644 --- a/src/pipecat/services/sarvam/tts.py +++ b/src/pipecat/services/sarvam/tts.py @@ -62,7 +62,7 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.sarvam._sdk import sdk_headers -from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven, is_given +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import InterruptibleTTSService, TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -486,6 +486,9 @@ class SarvamHttpTTSService(TTSService): True if self._config.preprocessing_always_enabled else params.enable_preprocessing ), pace=pace, + pitch=None, + loudness=None, + temperature=None, model=model, voice=voice_id, ) @@ -559,19 +562,19 @@ class SarvamHttpTTSService(TTSService): "sample_rate": self.sample_rate, "enable_preprocessing": self._settings.enable_preprocessing, "model": self._settings.model, - "pace": self._settings.pace if is_given(self._settings.pace) else 1.0, + "pace": self._settings.pace if self._settings.pace is not None else 1.0, } # Add model-specific parameters based on config if self._config.supports_pitch: - payload["pitch"] = self._settings.pitch if is_given(self._settings.pitch) else 0.0 + payload["pitch"] = self._settings.pitch if self._settings.pitch is not None else 0.0 if self._config.supports_loudness: payload["loudness"] = ( - self._settings.loudness if is_given(self._settings.loudness) else 1.0 + self._settings.loudness if self._settings.loudness is not None else 1.0 ) if self._config.supports_temperature: payload["temperature"] = ( - self._settings.temperature if is_given(self._settings.temperature) else 0.6 + self._settings.temperature if self._settings.temperature is not None else 0.6 ) headers = { @@ -849,6 +852,9 @@ class SarvamTTSService(InterruptibleTTSService): output_audio_codec=params.output_audio_codec, output_audio_bitrate=params.output_audio_bitrate, pace=pace, + pitch=None, + loudness=None, + temperature=None, model=model, voice=voice_id, ) @@ -949,9 +955,9 @@ class SarvamTTSService(InterruptibleTTSService): if isinstance(frame, (LLMFullResponseEndFrame, EndFrame)): await self.flush_audio() - async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: - """Apply a settings update and resend config if voice changed.""" - changed = await super()._update_settings(update) + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta and resend config if voice changed.""" + changed = await super()._update_settings(delta) if changed: await self._send_config() @@ -1027,11 +1033,11 @@ class SarvamTTSService(InterruptibleTTSService): "pace": self._settings.pace, "model": self._settings.model, } - if is_given(self._settings.pitch): + if self._settings.pitch is not None: config_data["pitch"] = self._settings.pitch - if is_given(self._settings.loudness): + if self._settings.loudness is not None: config_data["loudness"] = self._settings.loudness - if is_given(self._settings.temperature): + if self._settings.temperature is not None: config_data["temperature"] = self._settings.temperature logger.debug(f"Config being sent is {config_data}") config_message = {"type": "config", "data": config_data} diff --git a/src/pipecat/services/settings.py b/src/pipecat/services/settings.py index 4664ecd39..7de476c64 100644 --- a/src/pipecat/services/settings.py +++ b/src/pipecat/services/settings.py @@ -6,24 +6,32 @@ """Settings infrastructure for Pipecat AI services. -This module provides dataclass-based settings objects for service configuration. -Each service type has a corresponding settings class (e.g. ``TTSSettings``, -``LLMSettings``) whose fields use the ``NOT_GIVEN`` sentinel to distinguish -"leave unchanged" from an explicit ``None``. +Each service type has a settings dataclass (``LLMSettings``, ``TTSSettings``, +``STTSettings``, or a service-specific subclass). The same class is used in +two distinct modes: -Key concepts: +**Store mode** — the service's ``self._settings`` object that holds the full +current state. Every field must have a real value; ``NOT_GIVEN`` is never +valid here. Services that don't support an inherited field should set it to +``None``. ``validate_complete()`` (called automatically in +``AIService.start()``) enforces this invariant. -- **NOT_GIVEN sentinel**: A value meaning "this field was not provided in the - update". Distinct from ``None`` (which may be a valid value for a setting). -- **Settings as both state and delta**: The same class is used for the - service's current settings *and* for update objects. Fields set to - ``NOT_GIVEN`` are simply skipped when applying an update. -- **apply_update**: Applies a delta onto a target settings object and returns - a dict mapping each changed field name to its previous value. -- **from_mapping**: Constructs a settings object from a plain dict, - supporting field aliases (e.g. ``"voice_id"`` → ``"voice"``). -- **Extras**: Unknown keys land in the ``extra`` dict so services that have - non-standard settings don't lose data. +**Delta mode** — a sparse update object carried by an +``*UpdateSettingsFrame``. Only the fields the caller wants to change are set; +all others remain at their default of ``NOT_GIVEN``. ``apply_update()`` +merges a delta into a store, skipping any ``NOT_GIVEN`` fields. + +Key helpers: + +- ``NOT_GIVEN`` / ``is_given()`` — sentinel and check for "field not provided + in this delta". +- ``apply_update(delta)`` — merge a delta into a store, returning changed + fields. +- ``from_mapping(dict)`` — build a delta from a plain dict (for backward + compatibility with dict-based ``*UpdateSettingsFrame``). +- ``validate_complete()`` — assert that a store has no ``NOT_GIVEN`` fields. +- ``extra`` dict — overflow for service-specific keys that don't map to a + declared field. """ from __future__ import annotations @@ -45,12 +53,15 @@ if TYPE_CHECKING: class _NotGiven: - """Sentinel indicating a settings field was not provided. + """Sentinel meaning "this field was not included in the delta". - ``NOT_GIVEN`` means "the caller did not supply this value" — distinct from - ``None``, which may be a legitimate setting value. It is used as the - default for every settings field so that ``apply_update`` can tell which - fields the caller actually wants to change. + ``NOT_GIVEN`` is distinct from ``None`` (which is a valid stored value, + typically meaning "this service doesn't support this field"). Every + settings field defaults to ``NOT_GIVEN`` so that delta-mode objects are + sparse by default and ``apply_update`` can skip untouched fields. + + ``NOT_GIVEN`` must never appear in a store-mode object — see + ``validate_complete()``. """ _instance: Optional[_NotGiven] = None @@ -68,11 +79,25 @@ class _NotGiven: NOT_GIVEN: _NotGiven = _NotGiven() -"""Singleton sentinel meaning "this field was not included in the update".""" +"""Singleton sentinel meaning "this field was not included in the delta". + +Valid only in delta-mode settings objects. Must never appear in a service's +``self._settings`` (store mode) — use ``None`` instead for unsupported fields. +""" def is_given(value: Any) -> bool: - """Check whether a value was explicitly provided (i.e. is not ``NOT_GIVEN``). + """Check whether a delta field was explicitly provided. + + Typically used when processing a delta to decide whether a field + should be applied:: + + if is_given(delta.voice): + # caller wants to change the voice + ... + + For store-mode objects this always returns ``True`` (since + ``validate_complete`` ensures no ``NOT_GIVEN`` fields remain). Args: value: The value to check. @@ -94,28 +119,38 @@ _S = TypeVar("_S", bound="ServiceSettings") class ServiceSettings: """Base class for runtime-updatable service settings. - These settings represent the subset of a service's configuration that can + These settings capture the subset of a service's configuration that can be changed **while the pipeline is running** (e.g. switching the model or changing the voice). They are *not* meant to capture every constructor parameter — only those that support live updates via ``*UpdateSettingsFrame``. Every AI service type (LLM, TTS, STT) extends this with its own fields. - Fields default to ``NOT_GIVEN`` so that an instance can represent either - the full current state **or** a sparse update delta. Note that in the full - current state, **all fields will be given** (i.e. ``NOT_GIVEN`` is reserved - for update deltas). + Each instance operates in one of two modes (see module docstring): + + - **Store mode** (``self._settings``): holds the full current state. + Every field must be a real value — ``NOT_GIVEN`` is never valid. + Use ``None`` for inherited fields the service doesn't support. + Enforced at runtime by ``validate_complete()``. + - **Delta mode** (``*UpdateSettingsFrame``): a sparse update. + Only fields the caller wants to change are set; all others stay at + the default ``NOT_GIVEN`` and are skipped by ``apply_update()``. Parameters: - model: The model identifier used by the service. + model: The model identifier used by the service. Set to ``None`` + in store mode if the service has no model concept. extra: Overflow dict for service-specific keys that don't map to a declared field. """ # -- common fields ------------------------------------------------------- - model: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - """AI model identifier (e.g. ``"gpt-4o"``, ``"eleven_turbo_v2_5"``).""" + model: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + """AI model identifier (e.g. ``"gpt-4o"``, ``"eleven_turbo_v2_5"``). + + Defaults to ``NOT_GIVEN`` for delta mode. In store mode, set to a + model string or ``None`` if the service has no model concept. + """ extra: Dict[str, Any] = field(default_factory=dict) """Catch-all for service-specific keys that have no declared field.""" @@ -132,10 +167,14 @@ class ServiceSettings: # -- public API ---------------------------------------------------------- def given_fields(self) -> Dict[str, Any]: - """Return a dict of only the fields that were explicitly provided. + """Return a dict of only the fields that are not ``NOT_GIVEN``. - Skips ``NOT_GIVEN`` values and the ``extra`` field itself. Entries - from ``extra`` are included at the top level. + Primarily useful for delta-mode objects to inspect which fields were + set. For a store-mode object this returns all declared fields (since + none should be ``NOT_GIVEN``). + + Skips the ``extra`` field itself but merges its entries into the + returned dict at the top level. Returns: Dictionary mapping field names to their provided values. @@ -150,18 +189,18 @@ class ServiceSettings: result.update(self.extra) return result - def apply_update(self: _S, update: _S) -> Dict[str, Any]: - """Apply *update* onto this settings object, returning changed fields. + def apply_update(self: _S, delta: _S) -> Dict[str, Any]: + """Merge a delta-mode object into this store-mode object. - Only fields in *update* that are **given** (i.e. not ``NOT_GIVEN``) + Only fields in *delta* that are **given** (i.e. not ``NOT_GIVEN``) are considered. A field is "changed" if its new value differs from the current value. - The ``extra`` dicts are merged: keys present in the update overwrite + The ``extra`` dicts are merged: keys present in the delta overwrite keys in the target. Args: - update: A settings object of the same type containing the delta. + delta: A delta-mode settings object of the same type. Returns: A dict mapping each changed field name to its **pre-update** value. @@ -170,7 +209,9 @@ class ServiceSettings: Examples:: + # store-mode object (all fields given) current = TTSSettings(voice="alice", language="en") + # delta-mode object (only voice is set) delta = TTSSettings(voice="bob") changed = current.apply_update(delta) # changed == {"voice": "alice"} @@ -180,7 +221,7 @@ class ServiceSettings: for f in fields(self): if f.name == "extra": continue - new_val = getattr(update, f.name) + new_val = getattr(delta, f.name) if not is_given(new_val): continue old_val = getattr(self, f.name) @@ -189,7 +230,7 @@ class ServiceSettings: changed[f.name] = old_val # Merge extra - for key, new_val in update.extra.items(): + for key, new_val in delta.extra.items(): old_val = self.extra.get(key, NOT_GIVEN) if old_val != new_val: self.extra[key] = new_val @@ -199,10 +240,12 @@ class ServiceSettings: @classmethod def from_mapping(cls: Type[_S], settings: Mapping[str, Any]) -> _S: - """Construct a settings object from a plain dictionary. + """Build a **delta-mode** settings object from a plain dictionary. This exists for backward compatibility with code that passes plain - dicts via ``*UpdateSettingsFrame(settings={...})``. + dicts via ``*UpdateSettingsFrame(settings={...})``. The returned + object is a delta: only the keys present in *settings* are set; + all other fields remain ``NOT_GIVEN``. Keys are matched to dataclass fields by name. Keys listed in ``_aliases`` are translated to their canonical name first. Any @@ -212,13 +255,14 @@ class ServiceSettings: settings: A dictionary of setting names to values. Returns: - A new settings instance with the corresponding fields populated. + A new delta-mode settings instance. Examples:: - update = TTSSettings.from_mapping({"voice_id": "alice", "speed": 1.2}) - # update.voice == "alice" (via alias) - # update.extra == {"speed": 1.2} + delta = TTSSettings.from_mapping({"voice_id": "alice", "speed": 1.2}) + # delta.voice == "alice" (via alias) + # delta.language is NOT_GIVEN (not in the dict) + # delta.extra == {"speed": 1.2} """ field_names = {f.name for f in fields(cls)} - {"extra"} kwargs: Dict[str, Any] = {} @@ -236,6 +280,31 @@ class ServiceSettings: instance.extra = extra return instance + def validate_complete(self) -> None: + """Check that this is a valid store-mode object (no ``NOT_GIVEN`` fields). + + Called automatically by ``AIService.start()`` to catch fields that a + service forgot to initialize in its ``__init__``. Can also be called + manually after constructing a store-mode settings object. + + Logs a warning for each uninitialized field. Failure to initialize + all fields may or may not cause runtime issues — it depends on + whether and how the service actually reads the field — but it indicates + a deviation from expectations and should be fixed. + """ + missing = [ + f.name + for f in fields(self) + if f.name != "extra" and isinstance(getattr(self, f.name), _NotGiven) + ] + if missing: + names = ", ".join(missing) + logger.error( + f"{type(self).__name__}: the following fields are NOT_GIVEN: {names}. " + f"All settings fields should be initialized in the service's " + f"__init__ (use None for unsupported fields)." + ) + def copy(self: _S) -> _S: """Return a deep copy of this settings instance. @@ -254,7 +323,12 @@ class ServiceSettings: class LLMSettings(ServiceSettings): """Runtime-updatable settings for LLM services. - See ``ServiceSettings`` for the general concept. + Used in both store and delta mode — see ``ServiceSettings``. + + These fields are common across LLM providers. Not every provider supports + every field; in store mode, set unsupported fields to ``None`` (e.g. a + service that doesn't support ``seed`` should initialize it as + ``seed=None``). Parameters: model: LLM model identifier. @@ -274,15 +348,15 @@ class LLMSettings(ServiceSettings): and prompts for incomplete turns. """ - temperature: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - max_tokens: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - top_p: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - top_k: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - frequency_penalty: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - presence_penalty: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - seed: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - filter_incomplete_user_turns: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - user_turn_completion_config: UserTurnCompletionConfig | _NotGiven = field( + temperature: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + max_tokens: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + top_p: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + top_k: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + frequency_penalty: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + presence_penalty: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + seed: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + filter_incomplete_user_turns: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + user_turn_completion_config: UserTurnCompletionConfig | None | _NotGiven = field( default_factory=lambda: NOT_GIVEN ) @@ -291,23 +365,25 @@ class LLMSettings(ServiceSettings): class TTSSettings(ServiceSettings): """Runtime-updatable settings for TTS services. - See ``ServiceSettings`` for the general concept. + Used in both store and delta mode — see ``ServiceSettings``. + + In store mode, set unsupported fields to ``None`` (e.g. ``language=None`` + if the service doesn't expose a language setting). Parameters: model: TTS model identifier. voice: Voice identifier or name. - language: Language for speech synthesis. The union type reflects the - *input* side: callers may pass a ``Language`` enum or a raw string. - However, the **stored** value is always a service-specific string - — ``TTSService._update_settings`` converts ``Language`` enums via - ``language_to_service_language()`` before writing, and ``__init__`` - methods do the same at construction time. Code that reads - ``self._settings.language`` after initialisation can treat it as - ``str``. + language: Language for speech synthesis. The union type reflects the + *input* side: callers may pass a ``Language`` enum or a raw string + in a delta. However, the **stored** value (in store mode) is + always a service-specific string or ``None`` — + ``TTSService._update_settings`` converts ``Language`` enums via + ``language_to_service_language()`` before writing, and + ``__init__`` methods do the same at construction time. """ voice: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - language: Language | str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + language: Language | str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) _aliases: ClassVar[Dict[str, str]] = {"voice_id": "voice"} @@ -316,18 +392,20 @@ class TTSSettings(ServiceSettings): class STTSettings(ServiceSettings): """Runtime-updatable settings for STT services. - See ``ServiceSettings`` for the general concept. + Used in both store and delta mode — see ``ServiceSettings``. + + In store mode, set unsupported fields to ``None`` (e.g. ``language=None`` + if the service auto-detects language). Parameters: model: STT model identifier. - language: Language for speech recognition. The union type reflects the - *input* side: callers may pass a ``Language`` enum or a raw string. - However, the **stored** value is always a service-specific string - — ``STTService._update_settings`` converts ``Language`` enums via - ``language_to_service_language()`` before writing, and ``__init__`` - methods do the same at construction time. Code that reads - ``self._settings.language`` after initialisation can treat it as - ``str``. + language: Language for speech recognition. The union type reflects the + *input* side: callers may pass a ``Language`` enum or a raw string + in a delta. However, the **stored** value (in store mode) is + always a service-specific string or ``None`` — + ``STTService._update_settings`` converts ``Language`` enums via + ``language_to_service_language()`` before writing, and + ``__init__`` methods do the same at construction time. """ - language: Language | str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + language: Language | str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) diff --git a/src/pipecat/services/soniox/stt.py b/src/pipecat/services/soniox/stt.py index 630e11862..356b23162 100644 --- a/src/pipecat/services/soniox/stt.py +++ b/src/pipecat/services/soniox/stt.py @@ -225,25 +225,25 @@ class SonioxSTTService(WebsocketSTTService): await super().start(frame) await self._connect() - async def _update_settings(self, update: SonioxSTTSettings) -> dict[str, Any]: - """Apply a settings update, keeping ``input_params`` in sync. + async def _update_settings(self, delta: SonioxSTTSettings) -> dict[str, Any]: + """Apply a settings delta, keeping ``input_params`` in sync. Top-level ``model`` is the source of truth. When it is given in - *update* its value is propagated into ``input_params``. When only + *delta* its value is propagated into ``input_params``. When only ``input_params`` is given, its ``model`` is propagated *up* to the top-level field. Settings are stored but not applied to the active connection. Args: - update: A settings delta. + delta: A settings delta. Returns: Dict mapping changed field names to their previous values. """ - model_given = is_given(getattr(update, "model", NOT_GIVEN)) + model_given = is_given(getattr(delta, "model", NOT_GIVEN)) - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if not changed: return changed diff --git a/src/pipecat/services/speechmatics/stt.py b/src/pipecat/services/speechmatics/stt.py index 4a476c01f..61bf8b69f 100644 --- a/src/pipecat/services/speechmatics/stt.py +++ b/src/pipecat/services/speechmatics/stt.py @@ -33,7 +33,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven, is_given +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import SPEECHMATICS_TTFS_P99 from pipecat.services.stt_service import STTService from pipecat.transcriptions.language import Language, resolve_language @@ -429,6 +429,7 @@ class SpeechmaticsSTTService(STTService): # Settings — seeded from InputParams self._settings = SpeechmaticsSTTSettings( + model=None, language=params.language, domain=params.domain, turn_detection_mode=params.turn_detection_mode, @@ -492,8 +493,8 @@ class SpeechmaticsSTTService(STTService): await super().start(frame) await self._connect() - async def _update_settings(self, update: SpeechmaticsSTTSettings) -> dict[str, Any]: - """Apply settings update, reconnecting only when necessary. + async def _update_settings(self, delta: SpeechmaticsSTTSettings) -> dict[str, Any]: + """Apply settings delta, reconnecting only when necessary. Fields are classified into three categories (see ``SpeechmaticsSTTSettings``): @@ -506,12 +507,12 @@ class SpeechmaticsSTTService(STTService): time and therefore require a full disconnect / reconnect. Args: - update: A settings delta. + delta: A settings delta. Returns: Dict mapping changed field names to their previous values. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if not changed: return changed @@ -674,21 +675,21 @@ class SpeechmaticsSTTService(STTService): # Language + domain language = s.language config.language = self._language_to_speechmatics_language(language) - config.domain = s.domain if is_given(s.domain) else None + config.domain = s.domain if s.domain is not None else None config.output_locale = self._locale_to_speechmatics_locale(config.language, language) # Speaker config config.speaker_config = SpeakerFocusConfig( - focus_speakers=s.focus_speakers if is_given(s.focus_speakers) else [], - ignore_speakers=s.ignore_speakers if is_given(s.ignore_speakers) else [], - focus_mode=s.focus_mode if is_given(s.focus_mode) else SpeakerFocusMode.RETAIN, + focus_speakers=s.focus_speakers if s.focus_speakers is not None else [], + ignore_speakers=s.ignore_speakers if s.ignore_speakers is not None else [], + focus_mode=s.focus_mode if s.focus_mode is not None else SpeakerFocusMode.RETAIN, ) - config.known_speakers = s.known_speakers if is_given(s.known_speakers) else [] + config.known_speakers = s.known_speakers if s.known_speakers is not None else [] # Custom dictionary - config.additional_vocab = s.additional_vocab if is_given(s.additional_vocab) else [] + config.additional_vocab = s.additional_vocab if s.additional_vocab is not None else [] - # Advanced parameters — only set if given (not NOT_GIVEN or None) + # Advanced parameters — only set if not None for param in [ "operating_point", "max_delay", @@ -703,17 +704,17 @@ class SpeechmaticsSTTService(STTService): "prefer_current_speaker", ]: val = getattr(s, param) - if is_given(val) and val is not None: + if val is not None: setattr(config, param, val) # Extra parameters - if is_given(s.extra_params) and isinstance(s.extra_params, dict): + if isinstance(s.extra_params, dict): for key, value in s.extra_params.items(): if hasattr(config, key): setattr(config, key, value) # Enable sentences - split = s.split_sentences if is_given(s.split_sentences) else False + split = s.split_sentences if s.split_sentences is not None else False config.speech_segment_config = SpeechSegmentConfig(emit_sentences=split or False) return config diff --git a/src/pipecat/services/speechmatics/tts.py b/src/pipecat/services/speechmatics/tts.py index 7c8d9fca5..32fb0c2b3 100644 --- a/src/pipecat/services/speechmatics/tts.py +++ b/src/pipecat/services/speechmatics/tts.py @@ -108,7 +108,9 @@ class SpeechmaticsTTSService(TTSService): params = params or SpeechmaticsTTSService.InputParams() self._settings = SpeechmaticsTTSSettings( + model=None, voice=voice_id, + language=None, max_retries=params.max_retries, ) diff --git a/src/pipecat/services/stt_service.py b/src/pipecat/services/stt_service.py index dfdae6de6..80448223c 100644 --- a/src/pipecat/services/stt_service.py +++ b/src/pipecat/services/stt_service.py @@ -262,8 +262,8 @@ class STTService(AIService): await self._cancel_ttfb_timeout() await self._cancel_keepalive_task() - async def _update_settings(self, update: STTSettings) -> dict[str, Any]: - """Apply an STT settings update. + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply an STT settings delta. Handles ``model`` (via parent). Translates ``Language`` enum values before applying so the stored value is a service-specific string. @@ -272,18 +272,18 @@ class STTService(AIService): changed-field dict. Args: - update: An STT settings delta. + delta: An STT settings delta. Returns: Dict mapping changed field names to their previous values. """ # Translate language *before* applying so the stored value is canonical - if is_given(update.language) and isinstance(update.language, Language): - converted = self.language_to_service_language(update.language) + if is_given(delta.language) and isinstance(delta.language, Language): + converted = self.language_to_service_language(delta.language) if converted is not None: - update.language = converted + delta.language = converted - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) return changed async def process_audio_frame(self, frame: AudioRawFrame, direction: FrameDirection): @@ -349,20 +349,20 @@ class STTService(AIService): await self._handle_vad_user_stopped_speaking(frame) await self.push_frame(frame, direction) elif isinstance(frame, STTUpdateSettingsFrame): - if frame.update is not None: - await self._update_settings(frame.update) + if frame.delta is not None: + await self._update_settings(frame.delta) elif frame.settings: # Backward-compatible path: convert legacy dict to settings object. with warnings.catch_warnings(): warnings.simplefilter("always") warnings.warn( "Passing a dict via STTUpdateSettingsFrame(settings={...}) is deprecated " - "since 0.0.103, use STTUpdateSettingsFrame(update=STTSettings(...)) instead.", + "since 0.0.103, use STTUpdateSettingsFrame(delta=STTSettings(...)) instead.", DeprecationWarning, stacklevel=2, ) - update = type(self._settings).from_mapping(frame.settings) - await self._update_settings(update) + delta = type(self._settings).from_mapping(frame.settings) + await self._update_settings(delta) elif isinstance(frame, STTMuteFrame): self._muted = frame.mute logger.debug(f"STT service {'muted' if frame.mute else 'unmuted'}") diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index e7b57833d..59920c342 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -196,9 +196,7 @@ class TTSService(AIService): self._append_trailing_space: bool = append_trailing_space self._init_sample_rate = sample_rate self._sample_rate = 0 - self._settings = TTSSettings( - voice="" - ) # Here in case subclass doesn't implement more specific settings (hopefully shouldn't happen) + self._settings = TTSSettings() # Here in case subclass doesn't implement more specific settings (hopefully shouldn't happen) self._text_aggregator: BaseTextAggregator = text_aggregator or SimpleTextAggregator() if text_aggregator: import warnings @@ -440,24 +438,24 @@ class TTSService(AIService): if not (agg_type == aggregation_type and func == transform_function) ] - async def _update_settings(self, update: TTSSettings) -> dict[str, Any]: - """Apply a TTS settings update. + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a TTS settings delta. Translates language to service-specific value before applying. Args: - update: A TTS settings delta. + delta: A TTS settings delta. Returns: Dict mapping changed field names to their previous values. """ # Translate language *before* applying so the stored value is canonical - if is_given(update.language) and isinstance(update.language, Language): - converted = self.language_to_service_language(update.language) + if is_given(delta.language) and isinstance(delta.language, Language): + converted = self.language_to_service_language(delta.language) if converted is not None: - update.language = converted + delta.language = converted - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) return changed @@ -548,20 +546,20 @@ class TTSService(AIService): await self.flush_audio() self._processing_text = processing_text elif isinstance(frame, TTSUpdateSettingsFrame): - if frame.update is not None: - await self._update_settings(frame.update) + if frame.delta is not None: + await self._update_settings(frame.delta) elif frame.settings: # Backward-compatible path: convert legacy dict to settings object. with warnings.catch_warnings(): warnings.simplefilter("always") warnings.warn( "Passing a dict via TTSUpdateSettingsFrame(settings={...}) is deprecated " - "since 0.0.103, use TTSUpdateSettingsFrame(update=TTSSettings(...)) instead.", + "since 0.0.103, use TTSUpdateSettingsFrame(delta=TTSSettings(...)) instead.", DeprecationWarning, stacklevel=2, ) - update = type(self._settings).from_mapping(frame.settings) - await self._update_settings(update) + delta = type(self._settings).from_mapping(frame.settings) + await self._update_settings(delta) elif isinstance(frame, BotStoppedSpeakingFrame): await self._maybe_resume_frame_processing() await self.push_frame(frame, direction) diff --git a/src/pipecat/services/ultravox/llm.py b/src/pipecat/services/ultravox/llm.py index 436653c7e..11525258a 100644 --- a/src/pipecat/services/ultravox/llm.py +++ b/src/pipecat/services/ultravox/llm.py @@ -177,7 +177,19 @@ class UltravoxRealtimeLLMService(LLMService): **kwargs: Additional arguments passed to parent LLMService. """ super().__init__(**kwargs) - self._settings = UltravoxRealtimeLLMSettings() + self._settings = UltravoxRealtimeLLMSettings( + model=None, + temperature=None, + max_tokens=None, + top_p=None, + top_k=None, + frequency_penalty=None, + presence_penalty=None, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, + output_medium=None, + ) self._params = params if one_shot_selected_tools: if not isinstance(self._params, OneShotInputParams): @@ -325,8 +337,8 @@ class UltravoxRealtimeLLMService(LLMService): await self.cancel_task(self._receive_task, timeout=1.0) self._receive_task = None - async def _update_settings(self, update: UltravoxRealtimeLLMSettings): - changed = await super()._update_settings(update) + async def _update_settings(self, delta: UltravoxRealtimeLLMSettings): + changed = await super()._update_settings(delta) if "output_medium" in changed: await self._update_output_medium(self._settings.output_medium) self._warn_unhandled_updated_settings(changed.keys() - {"output_medium"}) diff --git a/src/pipecat/services/whisper/base_stt.py b/src/pipecat/services/whisper/base_stt.py index 9def3c2f1..9d2b3ab51 100644 --- a/src/pipecat/services/whisper/base_stt.py +++ b/src/pipecat/services/whisper/base_stt.py @@ -174,13 +174,13 @@ class BaseWhisperSTTService(SegmentedSTTService): def _create_client(self, api_key: Optional[str], base_url: Optional[str]): return AsyncOpenAI(api_key=api_key, base_url=base_url) - async def _update_settings(self, update: STTSettings) -> dict[str, Any]: - """Apply a settings update, syncing instance variables. + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta, syncing instance variables. Keeps ``_language``, ``_prompt``, and ``_temperature`` in sync with the settings fields. """ - changed = await super()._update_settings(update) + changed = await super()._update_settings(delta) if "language" in changed: self._language = self._settings.language diff --git a/src/pipecat/services/xtts/tts.py b/src/pipecat/services/xtts/tts.py index ba2eb4fc2..ab06ffb5a 100644 --- a/src/pipecat/services/xtts/tts.py +++ b/src/pipecat/services/xtts/tts.py @@ -114,6 +114,7 @@ class XTTSService(TTSService): super().__init__(sample_rate=sample_rate, **kwargs) self._settings = XTTSTTSSettings( + model=None, voice=voice_id, language=self.language_to_service_language(language), base_url=base_url, From 8c9ccf8f82aa83ed6f877f14eba3dfab0a30cfd5 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 24 Feb 2026 11:47:29 -0500 Subject: [PATCH 082/189] Bump various deprecation messages from mentioning version 0.0.103 to 0.0.104 --- src/pipecat/frames/frames.py | 2 +- src/pipecat/services/google/stt.py | 2 +- src/pipecat/services/hume/tts.py | 2 +- src/pipecat/services/llm_service.py | 2 +- src/pipecat/services/nvidia/stt.py | 2 +- src/pipecat/services/nvidia/tts.py | 2 +- src/pipecat/services/stt_service.py | 6 +++--- src/pipecat/services/tts_service.py | 6 +++--- 8 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index 0d6b3f18a..c69ddc931 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -2127,7 +2127,7 @@ class ServiceUpdateSettingsFrame(ControlFrame): Parameters: settings: Dictionary of setting name to value mappings. - .. deprecated:: 0.0.103 + .. deprecated:: 0.0.104 Use ``delta`` with a typed settings object instead. delta: :class:`~pipecat.services.settings.ServiceSettings` delta-mode diff --git a/src/pipecat/services/google/stt.py b/src/pipecat/services/google/stt.py index e294be20a..ac3afa7a3 100644 --- a/src/pipecat/services/google/stt.py +++ b/src/pipecat/services/google/stt.py @@ -368,7 +368,7 @@ class GoogleSTTSettings(STTSettings): language_codes: List of Google STT language code strings (e.g. ``["en-US"]``). - .. deprecated:: 0.0.103 + .. deprecated:: 0.0.104 Use ``languages`` instead. If both are provided, ``languages`` takes precedence. This field is here just for backward compatibility with dict-based settings updates. diff --git a/src/pipecat/services/hume/tts.py b/src/pipecat/services/hume/tts.py index 4b13226cc..3fb43ff88 100644 --- a/src/pipecat/services/hume/tts.py +++ b/src/pipecat/services/hume/tts.py @@ -210,7 +210,7 @@ class HumeTTSService(TTSService): async def update_setting(self, key: str, value: Any) -> None: """Runtime updates via key/value pair. - .. deprecated:: 0.0.103 + .. deprecated:: 0.0.104 Use ``TTSUpdateSettingsFrame(delta=HumeTTSSettings(...))`` instead. Args: diff --git a/src/pipecat/services/llm_service.py b/src/pipecat/services/llm_service.py index df1bc6d08..1102e85a1 100644 --- a/src/pipecat/services/llm_service.py +++ b/src/pipecat/services/llm_service.py @@ -359,7 +359,7 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): warnings.simplefilter("always") warnings.warn( "Passing a dict via LLMUpdateSettingsFrame(settings={...}) is deprecated " - "since 0.0.103, use LLMUpdateSettingsFrame(delta=LLMSettings(...)) instead.", + "since 0.0.104, use LLMUpdateSettingsFrame(delta=LLMSettings(...)) instead.", DeprecationWarning, stacklevel=2, ) diff --git a/src/pipecat/services/nvidia/stt.py b/src/pipecat/services/nvidia/stt.py index be9002b14..3bbe04f51 100644 --- a/src/pipecat/services/nvidia/stt.py +++ b/src/pipecat/services/nvidia/stt.py @@ -241,7 +241,7 @@ class NvidiaSTTService(STTService): async def set_model(self, model: str): """Set the ASR model for transcription. - .. deprecated:: 0.0.103 + .. deprecated:: 0.0.104 Model cannot be changed after initialization for NVIDIA Riva streaming STT. Set model and function id in the constructor instead, e.g.:: diff --git a/src/pipecat/services/nvidia/tts.py b/src/pipecat/services/nvidia/tts.py index 12bcf8c21..c6a5f371e 100644 --- a/src/pipecat/services/nvidia/tts.py +++ b/src/pipecat/services/nvidia/tts.py @@ -125,7 +125,7 @@ class NvidiaTTSService(TTSService): async def set_model(self, model: str): """Set the TTS model. - .. deprecated:: 0.0.103 + .. deprecated:: 0.0.104 Model cannot be changed after initialization for NVIDIA Riva TTS. Set model and function id in the constructor instead, e.g.:: diff --git a/src/pipecat/services/stt_service.py b/src/pipecat/services/stt_service.py index 80448223c..eedc8b46d 100644 --- a/src/pipecat/services/stt_service.py +++ b/src/pipecat/services/stt_service.py @@ -184,7 +184,7 @@ class STTService(AIService): async def set_model(self, model: str): """Set the speech recognition model. - .. deprecated:: 0.0.103 + .. deprecated:: 0.0.104 Use ``STTUpdateSettingsFrame(model=...)`` instead. Args: @@ -204,7 +204,7 @@ class STTService(AIService): async def set_language(self, language: Language): """Set the language for speech recognition. - .. deprecated:: 0.0.103 + .. deprecated:: 0.0.104 Use ``STTUpdateSettingsFrame(language=...)`` instead. Args: @@ -357,7 +357,7 @@ class STTService(AIService): warnings.simplefilter("always") warnings.warn( "Passing a dict via STTUpdateSettingsFrame(settings={...}) is deprecated " - "since 0.0.103, use STTUpdateSettingsFrame(delta=STTSettings(...)) instead.", + "since 0.0.104, use STTUpdateSettingsFrame(delta=STTSettings(...)) instead.", DeprecationWarning, stacklevel=2, ) diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index 59920c342..1b65521a1 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -275,7 +275,7 @@ class TTSService(AIService): async def set_model(self, model: str): """Set the TTS model to use. - .. deprecated:: 0.0.103 + .. deprecated:: 0.0.104 Use ``TTSUpdateSettingsFrame(model=...)`` instead. Args: @@ -295,7 +295,7 @@ class TTSService(AIService): async def set_voice(self, voice: str): """Set the voice for speech synthesis. - .. deprecated:: 0.0.103 + .. deprecated:: 0.0.104 Use ``TTSUpdateSettingsFrame(voice=...)`` instead. Args: @@ -554,7 +554,7 @@ class TTSService(AIService): warnings.simplefilter("always") warnings.warn( "Passing a dict via TTSUpdateSettingsFrame(settings={...}) is deprecated " - "since 0.0.103, use TTSUpdateSettingsFrame(delta=TTSSettings(...)) instead.", + "since 0.0.104, use TTSUpdateSettingsFrame(delta=TTSSettings(...)) instead.", DeprecationWarning, stacklevel=2, ) From 0a89d24f70f25f28b5d89d9529b48b6275ec22e1 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 24 Feb 2026 12:00:23 -0500 Subject: [PATCH 083/189] Update some more services to ensure that there are no un-initialized fields in `self._settings` --- src/pipecat/services/resembleai/tts.py | 1 + src/pipecat/services/sarvam/tts.py | 12 +++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/pipecat/services/resembleai/tts.py b/src/pipecat/services/resembleai/tts.py index b1b8d1de8..026d29d3f 100644 --- a/src/pipecat/services/resembleai/tts.py +++ b/src/pipecat/services/resembleai/tts.py @@ -102,6 +102,7 @@ class ResembleAITTSService(AudioContextTTSService): self._settings = ResembleAITTSSettings( model=None, voice=voice_id, + language=None, precision=precision, output_format=output_format, resemble_sample_rate=sample_rate, diff --git a/src/pipecat/services/sarvam/tts.py b/src/pipecat/services/sarvam/tts.py index 45c283ff1..ade547798 100644 --- a/src/pipecat/services/sarvam/tts.py +++ b/src/pipecat/services/sarvam/tts.py @@ -42,7 +42,7 @@ import base64 import json from dataclasses import dataclass, field from enum import Enum -from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple +from typing import Any, AsyncGenerator, ClassVar, Dict, List, Optional, Tuple import aiohttp from loguru import logger @@ -280,7 +280,8 @@ class SarvamTTSSettings(TTSSettings): """Settings for Sarvam WebSocket TTS service. Parameters: - target_language_code: Sarvam language code. + language: Sarvam language code (e.g. ``"hi-IN"``). Uses the standard + ``TTSSettings.language`` field. speech_sample_rate: Audio sample rate as string. enable_preprocessing: Enable text preprocessing. Defaults to False. **Note:** Always enabled for bulbul:v3-beta. @@ -304,7 +305,8 @@ class SarvamTTSSettings(TTSSettings): **Note:** Only supported for bulbul:v3-beta. Ignored for v2. """ - target_language_code: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + _aliases: ClassVar[Dict[str, str]] = {"target_language_code": "language"} + speech_sample_rate: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) enable_preprocessing: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) min_buffer_size: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) @@ -840,7 +842,7 @@ class SarvamTTSService(InterruptibleTTSService): # Build base settings self._settings = SarvamTTSSettings( - target_language_code=( + language=( self.language_to_service_language(params.language) if params.language else "en-IN" ), speech_sample_rate=str(sample_rate), @@ -1022,7 +1024,7 @@ class SarvamTTSService(InterruptibleTTSService): raise Exception("WebSocket not connected") # Build config dict for the API config_data = { - "target_language_code": self._settings.target_language_code, + "target_language_code": self._settings.language, "speaker": self._settings.voice, "speech_sample_rate": self._settings.speech_sample_rate, "enable_preprocessing": self._settings.enable_preprocessing, From b78a293ffb70adae599a98196552d6721cf0db09 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 24 Feb 2026 12:20:14 -0500 Subject: [PATCH 084/189] Flatten `input_params` into individual fields on `SonioxSTTSettings` and `GladiaSTTSettings` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes each service-specific field individually visible to the delta/update mechanism (`apply_update`, `given_fields`) and removes the need for complex sync logic between `input_params` and top-level fields like `model`. - Soniox: replace `input_params: SonioxInputParams` with 8 individual fields, simplify `_update_settings` by removing model sync logic, remove unused `is_given` import - Gladia: replace `input_params: GladiaInputParams` with 11 individual fields, resolve deprecated `language` → `language_config` at init time rather than at `_prepare_settings` time --- src/pipecat/services/gladia/stt.py | 113 ++++++++++++++++++++--------- src/pipecat/services/soniox/stt.py | 72 ++++++++++-------- 2 files changed, 120 insertions(+), 65 deletions(-) diff --git a/src/pipecat/services/gladia/stt.py b/src/pipecat/services/gladia/stt.py index d0a6f5a84..c1ce02d87 100644 --- a/src/pipecat/services/gladia/stt.py +++ b/src/pipecat/services/gladia/stt.py @@ -32,7 +32,13 @@ from pipecat.frames.frames import ( UserStartedSpeakingFrame, UserStoppedSpeakingFrame, ) -from pipecat.services.gladia.config import GladiaInputParams +from pipecat.services.gladia.config import ( + GladiaInputParams, + LanguageConfig, + MessagesConfig, + PreProcessingConfig, + RealtimeProcessingConfig, +) from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import GLADIA_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService @@ -185,10 +191,36 @@ class GladiaSTTSettings(STTSettings): """Settings for Gladia STT service. Parameters: - input_params: Gladia ``GladiaInputParams`` for detailed configuration. + encoding: Audio encoding format. + bit_depth: Audio bit depth. + channels: Number of audio channels. + custom_metadata: Additional metadata to include with requests. + endpointing: Silence duration in seconds to mark end of speech. + maximum_duration_without_endpointing: Maximum utterance duration without silence. + language_config: Detailed language configuration. + pre_processing: Audio pre-processing options. + realtime_processing: Real-time processing features. + messages_config: WebSocket message filtering options. + enable_vad: Enable VAD to trigger end of utterance detection. """ - input_params: GladiaInputParams | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + encoding: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + bit_depth: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + channels: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + custom_metadata: Dict[str, Any] | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + endpointing: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + maximum_duration_without_endpointing: int | None | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + language_config: LanguageConfig | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + pre_processing: PreProcessingConfig | None | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + realtime_processing: RealtimeProcessingConfig | None | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + messages_config: MessagesConfig | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_vad: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class GladiaSTTService(WebsocketSTTService): @@ -280,7 +312,29 @@ class GladiaSTTService(WebsocketSTTService): self._region = region self._url = url self._receive_task = None - self._settings = GladiaSTTSettings(model=model, language=None, input_params=params) + + # Resolve deprecated language → language_config at init time + language_config = params.language_config + if not language_config and params.language: + language_code = self.language_to_service_language(params.language) + if language_code: + language_config = LanguageConfig(languages=[language_code], code_switching=False) + + self._settings = GladiaSTTSettings( + model=model, + language=None, + encoding=params.encoding, + bit_depth=params.bit_depth, + channels=params.channels, + custom_metadata=params.custom_metadata, + endpointing=params.endpointing, + maximum_duration_without_endpointing=params.maximum_duration_without_endpointing, + language_config=language_config, + pre_processing=params.pre_processing, + realtime_processing=params.realtime_processing, + messages_config=params.messages_config, + enable_vad=params.enable_vad, + ) self._sync_model_name_to_metrics() # Session management @@ -321,52 +375,43 @@ class GladiaSTTService(WebsocketSTTService): return language_to_gladia_language(language) def _prepare_settings(self) -> Dict[str, Any]: - params = self._settings.input_params + s = self._settings settings = { - "encoding": params.encoding or "wav/pcm", - "bit_depth": params.bit_depth or 16, + "encoding": s.encoding or "wav/pcm", + "bit_depth": s.bit_depth or 16, "sample_rate": self.sample_rate, - "channels": params.channels or 1, - "model": self._settings.model, + "channels": s.channels or 1, + "model": s.model, } # Add custom_metadata if provided - settings["custom_metadata"] = dict(params.custom_metadata or {}) + settings["custom_metadata"] = dict(s.custom_metadata or {}) settings["custom_metadata"]["pipecat"] = pipecat_version() # Add endpointing parameters if provided - if params.endpointing is not None: - settings["endpointing"] = params.endpointing - if params.maximum_duration_without_endpointing is not None: + if s.endpointing is not None: + settings["endpointing"] = s.endpointing + if s.maximum_duration_without_endpointing is not None: settings["maximum_duration_without_endpointing"] = ( - params.maximum_duration_without_endpointing + s.maximum_duration_without_endpointing ) - # Add language configuration (prioritize language_config over deprecated language) - if params.language_config: - settings["language_config"] = params.language_config.model_dump(exclude_none=True) - elif params.language: # Backward compatibility for deprecated parameter - language_code = self.language_to_service_language(params.language) - if language_code: - settings["language_config"] = { - "languages": [language_code], - "code_switching": False, - } + # Add language configuration + if s.language_config: + settings["language_config"] = s.language_config.model_dump(exclude_none=True) # Add pre_processing configuration if provided - if params.pre_processing: - settings["pre_processing"] = params.pre_processing.model_dump(exclude_none=True) + if s.pre_processing: + settings["pre_processing"] = s.pre_processing.model_dump(exclude_none=True) # Add realtime_processing configuration if provided - if params.realtime_processing: - settings["realtime_processing"] = params.realtime_processing.model_dump( - exclude_none=True - ) + if s.realtime_processing: + settings["realtime_processing"] = s.realtime_processing.model_dump(exclude_none=True) # Add messages_config if provided - if params.messages_config: - settings["messages_config"] = params.messages_config.model_dump(exclude_none=True) + if s.messages_config: + settings["messages_config"] = s.messages_config.model_dump(exclude_none=True) return settings @@ -562,7 +607,7 @@ class GladiaSTTService(WebsocketSTTService): Broadcasts UserStartedSpeakingFrame and optionally triggers interruption when VAD is enabled. """ - if not self._settings.input_params.enable_vad or self._is_speaking: + if not self._settings.enable_vad or self._is_speaking: return logger.debug(f"{self} User started speaking") @@ -577,7 +622,7 @@ class GladiaSTTService(WebsocketSTTService): Broadcasts UserStoppedSpeakingFrame when VAD is enabled. """ - if not self._settings.input_params.enable_vad or not self._is_speaking: + if not self._settings.enable_vad or not self._is_speaking: return self._is_speaking = False await self.broadcast_frame(UserStoppedSpeakingFrame) diff --git a/src/pipecat/services/soniox/stt.py b/src/pipecat/services/soniox/stt.py index 356b23162..3160d19a6 100644 --- a/src/pipecat/services/soniox/stt.py +++ b/src/pipecat/services/soniox/stt.py @@ -24,7 +24,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven, is_given +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import SONIOX_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.transcriptions.language import Language @@ -141,10 +141,28 @@ class SonioxSTTSettings(STTSettings): """Settings for Soniox STT service. Parameters: - input_params: Soniox ``SonioxInputParams`` for detailed configuration. + audio_format: Audio format to use for transcription. + num_channels: Number of channels to use for transcription. + language_hints: List of language hints to use for transcription. + language_hints_strict: If true, strictly enforce language hints. + context: Customization for transcription. String for models with + context_version 1 and SonioxContextObject for models with + context_version 2. + enable_speaker_diarization: Whether to enable speaker diarization. + enable_language_identification: Whether to enable language identification. + client_reference_id: Client reference ID to use for transcription. """ - input_params: SonioxInputParams | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + audio_format: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + num_channels: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + language_hints: List[Language] | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + language_hints_strict: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + context: SonioxContextObject | str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_speaker_diarization: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_language_identification: bool | None | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + client_reference_id: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class SonioxSTTService(WebsocketSTTService): @@ -199,7 +217,15 @@ class SonioxSTTService(WebsocketSTTService): self._settings = SonioxSTTSettings( model=params.model, - input_params=params, + language=None, + audio_format=params.audio_format, + num_channels=params.num_channels, + language_hints=params.language_hints, + language_hints_strict=params.language_hints_strict, + context=params.context, + enable_speaker_diarization=params.enable_speaker_diarization, + enable_language_identification=params.enable_language_identification, + client_reference_id=params.client_reference_id, ) self._sync_model_name_to_metrics() @@ -226,12 +252,7 @@ class SonioxSTTService(WebsocketSTTService): await self._connect() async def _update_settings(self, delta: SonioxSTTSettings) -> dict[str, Any]: - """Apply a settings delta, keeping ``input_params`` in sync. - - Top-level ``model`` is the source of truth. When it is given in - *delta* its value is propagated into ``input_params``. When only - ``input_params`` is given, its ``model`` is propagated *up* to the - top-level field. + """Apply settings delta. Settings are stored but not applied to the active connection. @@ -241,22 +262,11 @@ class SonioxSTTService(WebsocketSTTService): Returns: Dict mapping changed field names to their previous values. """ - model_given = is_given(getattr(delta, "model", NOT_GIVEN)) - changed = await super()._update_settings(delta) if not changed: return changed - # --- Sync model -------------------------------------------------- - if model_given: - # Top-level model wins → push into input_params. - self._settings.input_params.model = self._settings.model - elif "input_params" in changed and self._settings.input_params.model is not None: - # Only input_params was given → pull model up. - self._settings.model = self._settings.input_params.model - self._sync_model_name_to_metrics() - # TODO: someday we could reconnect here to apply updated settings. # Code might look something like the below: # await self._disconnect() @@ -377,26 +387,26 @@ class SonioxSTTService(WebsocketSTTService): # Either one or the other is required. enable_endpoint_detection = not self._vad_force_turn_endpoint - params = self._settings.input_params + s = self._settings - context = params.context + context = s.context if isinstance(context, SonioxContextObject): context = context.model_dump() # Send the initial configuration message. config = { "api_key": self._api_key, - "model": self._settings.model, - "audio_format": params.audio_format, - "num_channels": params.num_channels or 1, + "model": s.model, + "audio_format": s.audio_format, + "num_channels": s.num_channels or 1, "enable_endpoint_detection": enable_endpoint_detection, "sample_rate": self.sample_rate, - "language_hints": _prepare_language_hints(params.language_hints), - "language_hints_strict": params.language_hints_strict, + "language_hints": _prepare_language_hints(s.language_hints), + "language_hints_strict": s.language_hints_strict, "context": context, - "enable_speaker_diarization": params.enable_speaker_diarization, - "enable_language_identification": params.enable_language_identification, - "client_reference_id": params.client_reference_id, + "enable_speaker_diarization": s.enable_speaker_diarization, + "enable_language_identification": s.enable_language_identification, + "client_reference_id": s.client_reference_id, } # Send the configuration message. From b4b9976b9c3adf94a596ec573cbd23a1ce2d0593 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Tue, 24 Feb 2026 11:26:34 -0800 Subject: [PATCH 085/189] Fix SentryMetrics method signatures to match base class Update start_ttfb_metrics, stop_ttfb_metrics, start_processing_metrics, and stop_processing_metrics to accept start_time/end_time keyword arguments matching the updated FrameProcessorMetrics signatures. Closes #3808 --- changelog/3808.fixed.md | 1 + src/pipecat/processors/metrics/sentry.py | 31 +++++++++++++++--------- 2 files changed, 21 insertions(+), 11 deletions(-) create mode 100644 changelog/3808.fixed.md diff --git a/changelog/3808.fixed.md b/changelog/3808.fixed.md new file mode 100644 index 000000000..6bf105bf6 --- /dev/null +++ b/changelog/3808.fixed.md @@ -0,0 +1 @@ +- Fixed `SentryMetrics` method signatures to match updated `FrameProcessorMetrics` base class, resolving `TypeError` when using `start_time`/`end_time` keyword arguments. diff --git a/src/pipecat/processors/metrics/sentry.py b/src/pipecat/processors/metrics/sentry.py index db2c6de63..c865ee470 100644 --- a/src/pipecat/processors/metrics/sentry.py +++ b/src/pipecat/processors/metrics/sentry.py @@ -7,6 +7,7 @@ """Sentry integration for frame processor metrics.""" import asyncio +from typing import Optional from loguru import logger @@ -70,13 +71,18 @@ class SentryMetrics(FrameProcessorMetrics): logger.trace(f"{self} Flushing Sentry metrics") sentry_sdk.flush(timeout=5.0) - async def start_ttfb_metrics(self, report_only_initial_ttfb): + async def start_ttfb_metrics( + self, *, start_time: Optional[float] = None, report_only_initial_ttfb: bool + ): """Start tracking time-to-first-byte metrics. Args: + start_time: Optional start timestamp override. report_only_initial_ttfb: Whether to report only the initial TTFB measurement. """ - await super().start_ttfb_metrics(report_only_initial_ttfb) + await super().start_ttfb_metrics( + start_time=start_time, report_only_initial_ttfb=report_only_initial_ttfb + ) if self._should_report_ttfb and self._sentry_available: self._ttfb_metrics_tx = sentry_sdk.start_transaction( @@ -87,23 +93,25 @@ class SentryMetrics(FrameProcessorMetrics): f"{self} Sentry transaction started (ID: {self._ttfb_metrics_tx.span_id} Name: {self._ttfb_metrics_tx.name})" ) - async def stop_ttfb_metrics(self): + async def stop_ttfb_metrics(self, *, end_time: Optional[float] = None): """Stop tracking time-to-first-byte metrics. - Queues the TTFB transaction for completion and transmission to Sentry. + Args: + end_time: Optional end timestamp override. """ - await super().stop_ttfb_metrics() + await super().stop_ttfb_metrics(end_time=end_time) if self._sentry_available and self._ttfb_metrics_tx: await self._sentry_queue.put(self._ttfb_metrics_tx) self._ttfb_metrics_tx = None - async def start_processing_metrics(self): + async def start_processing_metrics(self, *, start_time: Optional[float] = None): """Start tracking frame processing metrics. - Creates a new Sentry transaction to track processing performance. + Args: + start_time: Optional start timestamp override. """ - await super().start_processing_metrics() + await super().start_processing_metrics(start_time=start_time) if self._sentry_available: self._processing_metrics_tx = sentry_sdk.start_transaction( @@ -114,12 +122,13 @@ class SentryMetrics(FrameProcessorMetrics): f"{self} Sentry transaction started (ID: {self._processing_metrics_tx.span_id} Name: {self._processing_metrics_tx.name})" ) - async def stop_processing_metrics(self): + async def stop_processing_metrics(self, *, end_time: Optional[float] = None): """Stop tracking frame processing metrics. - Queues the processing transaction for completion and transmission to Sentry. + Args: + end_time: Optional end timestamp override. """ - await super().stop_processing_metrics() + await super().stop_processing_metrics(end_time=end_time) if self._sentry_available and self._processing_metrics_tx: await self._sentry_queue.put(self._processing_metrics_tx) From ee46cbce4c626b3ad1f4bab81a4c26d12c06bbd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Tue, 24 Feb 2026 11:38:04 -0800 Subject: [PATCH 086/189] Move skills to pipecat-ai/skills repo, add README instructions Remove bundled Claude Code skills (changelog, cleanup, code-review, docstring, pr-description, pr-submit) that now live in https://github.com/pipecat-ai/skills. Add a section to the README with installation instructions. The update-docs skill remains as it is specific to this repository. --- .claude/skills/changelog/SKILL.md | 47 ---- .claude/skills/cleanup/SKILL.md | 306 ------------------------- .claude/skills/code-review/SKILL.md | 107 --------- .claude/skills/docstring/SKILL.md | 257 --------------------- .claude/skills/pr-description/SKILL.md | 128 ----------- .claude/skills/pr-submit/SKILL.md | 28 --- README.md | 16 ++ 7 files changed, 16 insertions(+), 873 deletions(-) delete mode 100644 .claude/skills/changelog/SKILL.md delete mode 100644 .claude/skills/cleanup/SKILL.md delete mode 100644 .claude/skills/code-review/SKILL.md delete mode 100644 .claude/skills/docstring/SKILL.md delete mode 100644 .claude/skills/pr-description/SKILL.md delete mode 100644 .claude/skills/pr-submit/SKILL.md diff --git a/.claude/skills/changelog/SKILL.md b/.claude/skills/changelog/SKILL.md deleted file mode 100644 index 1ef8f324e..000000000 --- a/.claude/skills/changelog/SKILL.md +++ /dev/null @@ -1,47 +0,0 @@ ---- -name: changelog -description: Create changelog files for important commits in a PR ---- - -Create changelog files for the important commits in this PR. The PR number is provided as an argument. - -## Instructions - -1. Skip changelog for: documentation-only, internal refactoring, test-only, CI changes. - -2. First, check what commits are on the current branch compared to main: - ``` - git log main..HEAD --oneline - ``` - -3. For each significant change, create a changelog file in the `changelog/` folder using the format: - Allowed types: `added`, `changed`, `deprecated`, `removed`, `fixed`, `security`, `performance`, `other` - - `{PR_NUMBER}.added.md` - for new features - - `{PR_NUMBER}.added.2.md`, `{PR_NUMBER}.added.3.md` - for additional entries of the same type - - `{PR_NUMBER}.changed.md` - for changes to existing functionality - - `{PR_NUMBER}.fixed.md` - for bug fixes - - `{PR_NUMBER}.deprecated.md` - for deprecations - - `{PR_NUMBER}.removed.md` - for removed features - - `{PR_NUMBER}.security.md` - for security fixes - - `{PR_NUMBER}.performance.md` - for performance improvements - - `{PR_NUMBER}.other.md` - for other changes - -4. Each changelog file should at least contain a main single line starting with `- ` followed by a clear description of the change. No line wrapping. - -5. If the change is complicated, changelog files can have indented lines after the main line with additional details or code samples. - -6. Use ⚠️ emoji prefix for breaking changes. - -## Example - -For PR #3519 with a new feature and a bug fix: - -`changelog/3519.added.md`: -``` -- Added `SomeNewFeature` for doing something useful. -``` - -`changelog/3519.fixed.md`: -``` -- Fixed an issue where something was not working correctly. -``` diff --git a/.claude/skills/cleanup/SKILL.md b/.claude/skills/cleanup/SKILL.md deleted file mode 100644 index 48c5e0ee8..000000000 --- a/.claude/skills/cleanup/SKILL.md +++ /dev/null @@ -1,306 +0,0 @@ -# Code Cleanup Skill - -The **Code Cleanup Skill** reviews, refactors, and documents code changes in your current branch, ensuring alignment with **Pipecat’s architecture, coding standards, and example patterns**. -It focuses on **readability, correctness, performance, and consistency**, while avoiding breaking changes. - ---- - -## Skill Overview - -This skill analyzes all changes introduced in your branch and performs the following actions: - -1. **Analyze Branch Changes** - - Review uncommitted changes and outgoing commits -2. **Refactor for Readability** - - Improve clarity, naming, structure, and modern Python usage -3. **Enhance Performance** - - Identify safe, conservative optimization opportunities -4. **Add Documentation** - - Apply Pipecat-style, Google-format docstrings -5. **Ensure Pattern Consistency** - - Match existing Pipecat services, pipelines, and examples -6. **Validate Examples** - - Ensure examples follow foundational patterns (e.g. `07-interruptible.py`) - ---- - -## Usage - -Invoke the skill using any of the following commands: - -- “Clean up my branch code” -- “Refactor the changes in my branch” -- “Review and improve my branch code” -- `/cleanup` - ---- - -## What This Skill Does - -### 1. Analyze Branch Changes - -The skill retrieves all uncommitted changes and outgoing commits to understand: - -- New files added -- Modified files -- Code additions and deletions -- Overall scope and intent of changes - ---- - -### 2. Code Refactoring - -#### Readability Improvements - -- Replace tuples with named classes or dataclasses -- Improve variable, method, and class naming -- Extract complex logic into well-named helper methods -- Add missing type hints -- Simplify nested or complex conditionals -- Replace deprecated methods and features -- Normalize formatting to match Pipecat style - -#### Performance Enhancements - -- Identify inefficient loops or repeated work -- Suggest appropriate data structures -- Optimize async workflows and I/O -- Remove redundant operations - -> Performance changes are conservative and non-breaking. - ---- - -### 3. Documentation - -Documentation follows **Google-style docstrings**, consistent with Pipecat conventions. - -#### Class Documentation - -```python -class ExampleService: - """Brief one-line description. - - Detailed explanation of the class purpose, responsibilities, - and important behaviors. - - Supported features: - - - Feature 1 - - Feature 2 - - Feature 3 - """ -``` - -#### Method Documentation - -```python -def process_data(self, data: str, options: Optional[dict] = None) -> bool: - """Process incoming data with optional configuration. - - Args: - data: The input data to process. - options: Optional configuration dictionary. - - Returns: - True if processing succeeded, False otherwise. - - Raises: - ValueError: If data is empty or invalid. - """ -``` - -#### Pydantic Model Parameters - -```python -class InputParams(BaseModel): - """Configuration parameters for the service. - - Parameters: - timeout: Request timeout in seconds. - retry_count: Number of retry attempts. - enable_logging: Whether to enable debug logging. - """ - - timeout: Optional[float] = None - retry_count: int = 3 - enable_logging: bool = False -``` - ---- - -### 4. Pattern Consistency Checks - -#### Service Classes - -- Correct inheritance (`TTSService`, `STTService`, `LLMService`) -- Consistent constructor signatures -- Frame emission patterns -- Metrics support: - - `can_generate_metrics()` - - TTFB metrics - - Usage metrics -- Alignment with similar existing services - -#### Examples - -Validated against `examples/foundational/07-interruptible.py`: - -- Proper `create_transport()` usage -- Correct pipeline structure -- Task setup and observers -- Event handler registration -- Runner and bot entrypoint consistency - ---- - -### 5. Specific Implementation Patterns - -#### Service Implementation - -```python -class ExampleTTSService(TTSService): - - def __init__(self, *, api_key: Optional[str] = None, **kwargs): - super().__init__(**kwargs) - self._api_key = api_key or os.getenv("SERVICE_API_KEY") - - def can_generate_metrics(self) -> bool: - return True - - async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]: - try: - await self.start_ttfb_metrics() - yield TTSStartedFrame() - # ... processing ... - yield TTSAudioRawFrame(...) - finally: - await self.stop_ttfb_metrics() -``` - ---- - -#### Example Structure Pattern - -```python -transport_params = { - "daily": lambda: DailyParams(...), - "twilio": lambda: FastAPIWebsocketParams(...), - "webrtc": lambda: TransportParams(...), -} - -async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): - stt = DeepgramSTTService(...) - tts = SomeTTSService(...) - llm = OpenAILLMService(...) - - context = LLMContext(messages) - user_aggregator, assistant_aggregator = LLMContextAggregatorPair(...) - - pipeline = Pipeline([...]) - task = PipelineTask(pipeline, params=..., observers=[...]) - - @transport.event_handler("on_client_connected") - async def on_client_connected(transport, client): - await task.queue_frames([LLMRunFrame()]) - - runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) - await runner.run(task) - -async def bot(runner_args: RunnerArguments): - """Main bot entry point compatible with Pipecat Cloud.""" - transport = await create_transport(runner_args, transport_params) - await run_bot(transport, runner_args) -``` - ---- - -## Execution Flow - -1. Fetch uncommitted and outgoing changes -2. Categorize files (services, examples, tests, utilities) -3. Analyze each file: - - Readability - - Performance - - Documentation - - Pattern consistency -4. Generate actionable recommendations -5. Apply Pipecat standards - ---- - -## Examples - -### Before: Tuple Usage - -```python -def get_audio_info(self) -> Tuple[int, int]: - return (48000, 1) -``` - -### After: Named Class - -```python -class AudioInfo: - """Audio configuration information. - - Parameters: - sample_rate: Sample rate in Hz. - num_channels: Number of audio channels. - """ - - sample_rate: int - num_channels: int - -def get_audio_info(self) -> AudioInfo: - return AudioInfo(sample_rate=48000, num_channels=1) -``` - ---- - -### Before: Missing Documentation - -```python -class NewTTSService(TTSService): - def __init__(self, api_key: str, voice: str): - self._api_key = api_key - self._voice = voice -``` - -### After: Fully Documented - -```python -class NewTTSService(TTSService): - """Text-to-speech service using NewProvider API. - - Streams PCM audio and emits TTSAudioRawFrame frames compatible - with Pipecat transports. - - Supported features: - - Text-to-speech synthesis - - Streaming PCM audio - - Voice customization - - TTFB metrics - """ - - def __init__(self, *, api_key: str, voice: str, **kwargs): - """Initialize the NewTTSService. - - Args: - api_key: API key for authentication. - voice: Voice identifier to use. - **kwargs: Additional arguments passed to the parent service. - """ - super().__init__(**kwargs) - self._api_key = api_key -``` - ---- - -## Notes - -- Non-breaking improvements only -- Backward compatibility preserved -- Conservative performance changes -- Google-style docstrings -- Pattern checks follow recent Pipecat code diff --git a/.claude/skills/code-review/SKILL.md b/.claude/skills/code-review/SKILL.md deleted file mode 100644 index 036a7f935..000000000 --- a/.claude/skills/code-review/SKILL.md +++ /dev/null @@ -1,107 +0,0 @@ ---- -name: code-review -description: Automated code review for pull requests using multiple specialized agents -disable-model-invocation: true -allowed-tools: Bash(gh issue view:*), Bash(gh search:*), Bash(gh issue list:*), Bash(gh pr comment:*), Bash(gh pr diff:*), Bash(gh pr view:*), Bash(gh pr list:*) ---- - -Provide a code review for the given pull request. - -**Agent assumptions (applies to all agents and subagents):** - -- All tools are functional and will work without error. Do not test tools or make exploratory calls. Make sure this is clear to every subagent that is launched. -- Only call a tool if it is required to complete the task. Every tool call should have a clear purpose. - -To do this, follow these steps precisely: - -1. Launch a haiku agent to check if any of the following are true: - - The pull request is closed - - The pull request is a draft - - The pull request does not need code review (e.g. automated PR, trivial change that is obviously correct) - - Claude has already commented on this PR (check `gh pr view --comments` for comments left by claude) - - If any condition is true, stop and do not proceed. - -Note: Still review Claude generated PR's. - -2. Launch a haiku agent to return a list of file paths (not their contents) for all relevant CLAUDE.md files including: - - The root CLAUDE.md file, if it exists - - Any CLAUDE.md files in directories containing files modified by the pull request - -3. Launch a sonnet agent to view the pull request and return a summary of the changes - -4. Launch 4 agents in parallel to independently review the changes. Each agent should return the list of issues, where each issue includes a description and the reason it was flagged (e.g. "CLAUDE.md adherence", "bug"). The agents should do the following: - - Agents 1 + 2: CLAUDE.md compliance sonnet agents - Audit changes for CLAUDE.md compliance in parallel. Note: When evaluating CLAUDE.md compliance for a file, you should only consider CLAUDE.md files that share a file path with the file or parents. - - Agent 3: Opus bug agent (parallel subagent with agent 4) - Scan for obvious bugs. Focus only on the diff itself without reading extra context. Flag only significant bugs; ignore nitpicks and likely false positives. Do not flag issues that you cannot validate without looking at context outside of the git diff. - - Agent 4: Opus bug agent (parallel subagent with agent 3) - Look for problems that exist in the introduced code. This could be security issues, incorrect logic, etc. Only look for issues that fall within the changed code. - - **CRITICAL: We only want HIGH SIGNAL issues.** Flag issues where: - - The code will fail to compile or parse (syntax errors, type errors, missing imports, unresolved references) - - The code will definitely produce wrong results regardless of inputs (clear logic errors) - - Clear, unambiguous CLAUDE.md violations where you can quote the exact rule being broken - - Do NOT flag: - - Code style or quality concerns - - Potential issues that depend on specific inputs or state - - Subjective suggestions or improvements - - If you are not certain an issue is real, do not flag it. False positives erode trust and waste reviewer time. - - In addition to the above, each subagent should be told the PR title and description. This will help provide context regarding the author's intent. - -5. For each issue found in the previous step by agents 3 and 4, launch parallel subagents to validate the issue. These subagents should get the PR title and description along with a description of the issue. The agent's job is to review the issue to validate that the stated issue is truly an issue with high confidence. For example, if an issue such as "variable is not defined" was flagged, the subagent's job would be to validate that is actually true in the code. Another example would be CLAUDE.md issues. The agent should validate that the CLAUDE.md rule that was violated is scoped for this file and is actually violated. Use Opus subagents for bugs and logic issues, and sonnet agents for CLAUDE.md violations. - -6. Filter out any issues that were not validated in step 5. This step will give us our list of high signal issues for our review. - -7. If issues were found, skip to step 8 to post comments. - - If NO issues were found, post a summary comment using `gh pr comment` (if `--comment` argument is provided): - "No issues found. Checked for bugs and CLAUDE.md compliance." - -8. Create a list of all comments that you plan on leaving. This is only for you to make sure you are comfortable with the comments. Do not post this list anywhere. - -9. Post inline comments for each issue using `gh pr review` with inline comments. For each comment: - - Provide a brief description of the issue - - For small, self-contained fixes, include a committable suggestion block - - For larger fixes (6+ lines, structural changes, or changes spanning multiple locations), describe the issue and suggested fix without a suggestion block - - Never post a committable suggestion UNLESS committing the suggestion fixes the issue entirely. If follow up steps are required, do not leave a committable suggestion. - - **IMPORTANT: Only post ONE comment per unique issue. Do not post duplicate comments.** - -Use this list when evaluating issues in Steps 4 and 5 (these are false positives, do NOT flag): - -- Pre-existing issues -- Something that appears to be a bug but is actually correct -- Pedantic nitpicks that a senior engineer would not flag -- Issues that a linter will catch (do not run the linter to verify) -- General code quality concerns (e.g., lack of test coverage, general security issues) unless explicitly required in CLAUDE.md -- Issues mentioned in CLAUDE.md but explicitly silenced in the code (e.g., via a lint ignore comment) - -Notes: - -- Use gh CLI to interact with GitHub (e.g., fetch pull requests, create comments). Do not use web fetch. -- Create a todo list before starting. -- You must cite and link each issue in inline comments (e.g., if referring to a CLAUDE.md, include a link to it). -- If no issues are found, post a comment with the following format: - ---- - -## Code review - -No issues found. Checked for bugs and CLAUDE.md compliance. - ---- - -- When linking to code in inline comments, follow the following format precisely, otherwise the Markdown preview won't render correctly: `https://github.com/OWNER/REPO/blob/FULL_SHA/path/to/file.py#L10-L15` - - Requires full git sha - - You must provide the full sha. Commands like `https://github.com/owner/repo/blob/$(git rev-parse HEAD)/foo/bar` will not work, since your comment will be directly rendered in Markdown. - - Repo name must match the repo you're code reviewing - - # sign after the file name - - Line range format is L[start]-L[end] - - Provide at least 1 line of context before and after, centered on the line you are commenting about (eg. if you are commenting about lines 5-6, you should link to `L4-7`) diff --git a/.claude/skills/docstring/SKILL.md b/.claude/skills/docstring/SKILL.md deleted file mode 100644 index 1c1e3c905..000000000 --- a/.claude/skills/docstring/SKILL.md +++ /dev/null @@ -1,257 +0,0 @@ ---- -name: docstring -description: Document a Python module and its classes using Google style ---- - -Document a Python module and its classes using Google-style docstrings following project conventions. The class name is provided as an argument. - -## Instructions - -1. First, find the class in the codebase: - ``` - Search for "class ClassName" in src/pipecat/ - ``` - -2. If multiple files contain that class name: - - List all matches with their file paths - - Ask the user which one they want to document - - Wait for confirmation before proceeding - -3. Once the file is identified, read the module to understand its structure: - - Identify all classes, functions, and important type aliases - - Understand the purpose of each component - -4. Apply documentation in this order: - - Module docstring (at top, after imports) - - Class docstrings - - `__init__` methods (always document constructor parameters) - - Public methods (not starting with `_`) - - Dataclass/config classes with field descriptions - -5. Skip documentation for: - - Private methods (starting with `_`) - - Simple dunder methods (`__str__`, `__repr__`, `__post_init__`) - - Very simple pass-through properties - - **Already documented code** - If a class, method, or function already has a complete docstring that follows the project style, do not modify it. A docstring is complete if it has: - - A one-line summary - - Args section (if it has parameters) - - Returns section (if it returns something meaningful) - - Only add or improve documentation where it is missing or incomplete - -## Module Docstring Format - -```python -"""[One-line description of module purpose]. - -[Optional: Longer explanation of functionality, key classes, or use cases.] -""" -``` - -Example: -```python -"""Neuphonic text-to-speech service implementations. - -This module provides WebSocket and HTTP-based integrations with Neuphonic's -text-to-speech API for real-time audio synthesis. -""" -``` - -## Class Docstring Format - -```python -class ClassName: - """One-line summary describing what the class does. - - [Longer description explaining purpose, behavior, and key features. - Use action-oriented language.] - - [Optional: Event handlers, usage notes, or important caveats.] - """ -``` - -Example: -```python -class FrameProcessor(BaseObject): - """Base class for all frame processors in the pipeline. - - Frame processors are the building blocks of Pipecat pipelines, they can be - linked to form complex processing pipelines. They receive frames, process - them, and pass them to the next or previous processor in the chain. - - Event handlers available: - - - on_before_process_frame: Called before a frame is processed - - on_after_process_frame: Called after a frame is processed - - Example:: - - @processor.event_handler("on_before_process_frame") - async def on_before_process_frame(processor, frame): - ... - - @processor.event_handler("on_after_process_frame") - async def on_after_process_frame(processor, frame): - ... - """ -``` - -Note: When listing event handlers, do NOT use backticks. Include an `Example::` section (with double colon for Sphinx) showing the decorator pattern and function signature for each event. - -## Constructor (`__init__`) Format - -```python -def __init__(self, *, param1: Type, param2: Type = default, **kwargs): - """Initialize the [ClassName]. - - Args: - param1: Description of param1 and its purpose. - param2: Description of param2. Defaults to [default]. - **kwargs: Additional arguments passed to parent class. - """ -``` - -Example: -```python -def __init__( - self, - *, - api_key: str, - voice_id: Optional[str] = None, - sample_rate: Optional[int] = 22050, - **kwargs, -): - """Initialize the Neuphonic TTS service. - - Args: - api_key: Neuphonic API key for authentication. - voice_id: ID of the voice to use for synthesis. - sample_rate: Audio sample rate in Hz. Defaults to 22050. - **kwargs: Additional arguments passed to parent InterruptibleTTSService. - """ -``` - -## Method Docstring Format - -```python -async def method_name(self, param1: Type) -> ReturnType: - """One-line summary of what method does. - - [Longer description if behavior isn't obvious.] - - Args: - param1: Description of param1. - - Returns: - Description of return value. - - Raises: - ExceptionType: When this exception is raised. - """ -``` - -Example: -```python -async def put(self, item: Tuple[Frame, FrameDirection, FrameCallback]): - """Put an item into the priority queue. - - System frames (`SystemFrame`) have higher priority than any other - frames. If a non-frame item is provided it will have the highest priority. - - Args: - item: The item to enqueue. - """ -``` - -## Dataclass/Config Format - -```python -@dataclass -class ConfigName: - """One-line description of configuration. - - [Explanation of when/how to use this config.] - - Parameters: - field1: Description of field1. - field2: Description of field2. Defaults to [default]. - """ - - field1: Type - field2: Type = default_value -``` - -Example: -```python -@dataclass -class FrameProcessorSetup: - """Configuration parameters for frame processor initialization. - - Parameters: - clock: The clock instance for timing operations. - task_manager: The task manager for handling async operations. - observer: Optional observer for monitoring frame processing events. - """ - - clock: BaseClock - task_manager: BaseTaskManager - observer: Optional[BaseObserver] = None -``` - -## Enum Documentation Format - -```python -class EnumName(Enum): - """One-line description of the enum purpose. - - [Longer description of how the enum is used.] - - Parameters: - VALUE1: Description of VALUE1. - VALUE2: Description of VALUE2. - """ - - VALUE1 = 1 - VALUE2 = 2 -``` - -## Writing Style Guidelines - -- **Concise and professional** - No casual language or filler words -- **Action-oriented** - Start with verbs: "Processes...", "Manages...", "Converts..." -- **Purpose before implementation** - Explain WHY before HOW -- **Clear parameter descriptions** - Include type hints, defaults, and purpose -- **No redundant type info** - Type hints are in the signature, don't repeat in description -- **Use backticks for code references** - Wrap class names, method names, event names, parameter names, and code snippets in backticks - -Good: "Neuphonic API key for authentication." -Bad: "str: The API key (string) that is used for authenticating with Neuphonic." - -Good: "Triggers `on_speech_started` when the `VADAnalyzer` detects speech." -Bad: "Triggers on_speech_started when the VADAnalyzer detects speech." - -## Deprecation Notice Format - -When documenting deprecated code: - -```python -"""[Description]. - -.. deprecated:: X.X.X - `ClassName` is deprecated and will be removed in a future version. - Use `NewClassName` instead. -""" -``` - -## Checklist - -Before finishing, verify: - -- [ ] Module has a docstring at the top (after copyright header and imports) -- [ ] All public classes have docstrings -- [ ] All `__init__` methods document their parameters -- [ ] All public methods have docstrings with Args/Returns/Raises as needed -- [ ] Dataclasses use "Parameters:" section for field descriptions -- [ ] Enums document each value in "Parameters:" section -- [ ] Writing is concise and action-oriented -- [ ] No documentation added to private methods (starting with `_`) -- [ ] Existing complete docstrings were left unchanged diff --git a/.claude/skills/pr-description/SKILL.md b/.claude/skills/pr-description/SKILL.md deleted file mode 100644 index 666cf2bd1..000000000 --- a/.claude/skills/pr-description/SKILL.md +++ /dev/null @@ -1,128 +0,0 @@ ---- -name: pr-description -description: Update a GitHub PR description with a summary of changes ---- - -Update a GitHub pull request description based on the changes in the PR. - -## Arguments - -``` -/pr-description [--fixes ] -``` - -- `PR_NUMBER` (required): The pull request number to update -- `--fixes` (optional): Comma-separated issue numbers that this PR fixes (e.g., `--fixes 123,456`) - -Examples: -- `/pr-description 3534` -- `/pr-description 3534 --fixes 123` -- `/pr-description 3534 --fixes 123,456,789` - -## Instructions - -1. First, gather information about the PR: - - Use GitHub plugin to get PR details (title, current description, base branch) - - Use local git to get commits: `git log main..HEAD --oneline` - - Use local git to get the diff: `git diff main..HEAD` - - Parse any `--fixes` argument for issue numbers - -2. Check the existing PR description: - - If it already has a complete, accurate description that reflects the changes, do nothing - - If it's missing sections, incomplete, or outdated compared to the actual changes, proceed to update - - If it only has the template placeholder text, generate a full description - -3. Analyze the changes: - - Understand the purpose of each commit - - Identify any breaking changes (API changes, removed features, behavior changes) - - Look for new features, bug fixes, refactoring, or documentation changes - - Collect issue numbers from: - - The `--fixes` argument (if provided) - - Commit messages (patterns like "Fixes #123", "Closes #456", "Resolves #789") - -4. Generate or update the PR description with these sections: - -## PR Description Format - -### Summary (always include) - -Brief bullet points describing what changed and why. Focus on the *purpose* and *impact*, not implementation details. - -```markdown -## Summary - -- Added X to enable Y -- Fixed bug where Z would happen -- Refactored W for better maintainability -``` - -### Breaking Changes (include only if applicable) - -Document any changes that affect existing users or APIs. - -```markdown -## Breaking Changes - -- `ClassName.method()` now requires a `param` argument -- Removed deprecated `old_function()` - use `new_function()` instead -``` - -### Testing (include when non-obvious) - -How to verify the changes work. Skip for trivial changes. - -```markdown -## Testing - -- Run `uv run pytest tests/test_feature.py` to verify the fix -- Example usage: `uv run examples/new_feature.py` -``` - -### Fixes (include if issues are provided or found in commits) - -List issues this PR fixes. GitHub will automatically close these issues when the PR is merged. - -```markdown -## Fixes - -- Fixes #123 -- Fixes #456 -``` - -Note: Use "Fixes #X" format (not "Closes" or "Resolves") for consistency. Each issue should be on its own line with "Fixes" to ensure GitHub auto-closes them. - -## Guidelines - -- **Be concise** - Reviewers should understand the PR in 30 seconds -- **Focus on why** - The diff shows *what* changed, explain *why* -- **Skip empty sections** - Only include sections that have content -- **Use bullet points** - Easier to scan than paragraphs -- **Don't duplicate the diff** - Avoid listing every file or line changed - -## Example Output - -```markdown -## Summary - -- Added `/docstring` skill for documenting Python modules with Google-style docstrings -- Skill finds classes by name and handles conflicts when multiple matches exist -- Skips already-documented code to avoid unnecessary changes - -## Testing - -/docstring ClassName - -## Fixes - -- Fixes #123 -``` - -## Checklist - -Before updating the PR: - -- [ ] Verified existing description needs updating (not already complete) -- [ ] Summary accurately reflects the changes -- [ ] Breaking changes are clearly documented (if any) -- [ ] No unnecessary sections included -- [ ] Description is concise and scannable diff --git a/.claude/skills/pr-submit/SKILL.md b/.claude/skills/pr-submit/SKILL.md deleted file mode 100644 index 5724ddb6e..000000000 --- a/.claude/skills/pr-submit/SKILL.md +++ /dev/null @@ -1,28 +0,0 @@ ---- -name: pr-submit -description: Create and submit a GitHub PR from the current branch ---- - -Submit the current changes as a GitHub pull request. - -## Instructions - -1. Check the current state of the repository: - - Run `git status` to see staged, unstaged, and untracked changes - - Run `git diff` to see current changes - - Run `git log --oneline -10` to see recent commits - -2. If there are uncommitted changes relevant to the PR: - - Ask the user if they want a specific prefix for the branch name (e.g., `alice/`, `fix/`, `feat/`) - - Create a new branch based on the current branch - - Commit the changes using multiple commits if the changes are unrelated - -3. Push the branch and create the PR: - - Push with `-u` flag to set upstream tracking - - Create the PR using `gh pr create` - -4. After the PR is created: - - Run `/changelog ` to generate changelog files, then commit and push them - - Run `/pr-description ` to update the PR description - -5. Return the PR URL to the user. diff --git a/README.md b/README.md index 6d6a56612..058f23128 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,22 @@ Looking for help debugging your pipeline and processors? Check out [Whisker](htt Love terminal applications? Check out [Tail](https://github.com/pipecat-ai/tail), a terminal dashboard for Pipecat. +### 🤖 Claude Code Skills + +Use [Pipecat Skills](https://github.com/pipecat-ai/skills) with [Claude Code](https://claude.ai/code) to scaffold projects, generate changelogs, deploy to Pipecat Cloud, and more. Install the marketplace with: + +``` +claude plugin marketplace add pipecat-ai/skills +``` + +And install the plugins, for example: + +``` +claude plugin install pipecat-dev@pipecat-skills +``` + +there's more! + ### 📺️ Pipecat TV Channel Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.youtube.com/playlist?list=PLzU2zoMTQIHjqC3v4q2XVSR3hGSzwKFwH) channel. From b6f21ab15da08a6577a6ebbcf401346229273e01 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 24 Feb 2026 17:07:20 -0500 Subject: [PATCH 087/189] =?UTF-8?q?Make=20`ServiceUpdateSettingsFrame`=20u?= =?UTF-8?q?ninterruptible=E2=80=94settings=20updates=20are=20generally=20i?= =?UTF-8?q?ndependent=20of=20specific=20utterances.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before this change, settings updates were often not applied. For example, a `TTSUpdateSettingsFrame` queued while the bot was speaking would only have an effect at the end of the bot's reply, and any interruption before the end of the reply would "cancel" the update. --- changelog/3819.changed.md | 4 ++++ src/pipecat/frames/frames.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 changelog/3819.changed.md diff --git a/changelog/3819.changed.md b/changelog/3819.changed.md new file mode 100644 index 000000000..7b43c399c --- /dev/null +++ b/changelog/3819.changed.md @@ -0,0 +1,4 @@ +- `ServiceSettingsUpdateFrame`s are now `UninterruptibleFrame`s. Generally speaking, you don't want a user interruption to prevent a service setting change from going into effect. Note that you usually don't use `ServiceSettingsUpdateFrame` directly, you use one of its subclasses: + - `LLMUpdateSettingsFrame` + - `TTSUpdateSettingsFrame` + - `STTUpdateSettingsFrame` diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index c69ddc931..d359bcfb1 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -2118,7 +2118,7 @@ class TTSStoppedFrame(ControlFrame): @dataclass -class ServiceUpdateSettingsFrame(ControlFrame): +class ServiceUpdateSettingsFrame(ControlFrame, UninterruptibleFrame): """Base frame for updating service settings. Supports both a ``settings`` dict (for backward compatibility) and a From d91c230b8559c2a7d53dcc46a7163e331c554f77 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 24 Feb 2026 18:05:27 -0500 Subject: [PATCH 088/189] Fix breakage when using a generic settings update (e.g. a `TTSSettings`) instead of a more specific one (e.g. a `RimeTTSSettings`). Both should work, assuming you're only changing fields present in the generic settings. --- src/pipecat/services/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pipecat/services/settings.py b/src/pipecat/services/settings.py index 7de476c64..641cc23f5 100644 --- a/src/pipecat/services/settings.py +++ b/src/pipecat/services/settings.py @@ -221,7 +221,7 @@ class ServiceSettings: for f in fields(self): if f.name == "extra": continue - new_val = getattr(delta, f.name) + new_val = getattr(delta, f.name, NOT_GIVEN) if not is_given(new_val): continue old_val = getattr(self, f.name) From d918a20b759bc280fc14a1f2b2897c5936d14966 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 24 Feb 2026 18:14:16 -0500 Subject: [PATCH 089/189] Fix missing field warning in `RimeTTSService` --- src/pipecat/services/rime/tts.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index 6c3ff4e23..248c84008 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -235,6 +235,7 @@ class RimeTTSService(AudioContextTTSService): if params.language else None, segment=params.segment, + inlineSpeedAlpha=None, # Not applicable here # Arcana params repetition_penalty=params.repetition_penalty, temperature=params.temperature, From f421ad9cf672c08ce2bdede5b96824273033b20c Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Tue, 24 Feb 2026 18:57:38 -0500 Subject: [PATCH 090/189] Fix STT TTFB timeout measuring to timeout expiry instead of transcript time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #3776 replaced manual timestamp tracking with stop_ttfb_metrics() in the timeout handler, but without an end_time it uses time.time() at timeout expiry—producing TTFB = timeout + stop_secs (~2.2s) instead of the actual transcript latency. Restore _last_transcript_time tracking so the timeout handler measures to when the transcript arrived, and skip reporting if none arrived. --- src/pipecat/services/stt_service.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/pipecat/services/stt_service.py b/src/pipecat/services/stt_service.py index eedc8b46d..20e8cacc9 100644 --- a/src/pipecat/services/stt_service.py +++ b/src/pipecat/services/stt_service.py @@ -127,6 +127,7 @@ class STTService(AIService): self._user_speaking: bool = False self._finalize_pending: bool = False self._finalize_requested: bool = False + self._last_transcript_time: float = 0 # Keepalive state self._keepalive_timeout = keepalive_timeout @@ -385,6 +386,9 @@ class STTService(AIService): direction: The direction to push the frame. """ if isinstance(frame, TranscriptionFrame): + # Store the transcript time for TTFB calculation + self._last_transcript_time = time.time() + # Set finalized from pending state and auto-reset if self._finalize_pending: frame.finalized = True @@ -438,6 +442,7 @@ class STTService(AIService): self._user_speaking = True self._finalize_requested = False self._finalize_pending = False + self._last_transcript_time = 0 async def _handle_vad_user_stopped_speaking(self, frame: VADUserStoppedSpeakingFrame): """Handle VAD user stopped speaking frame. @@ -467,14 +472,17 @@ class STTService(AIService): ) async def _ttfb_timeout_handler(self): - """Wait for timeout then report TTFB. + """Wait for timeout then report TTFB using the last transcript timestamp. This timeout allows the final transcription to arrive before we calculate - and report TTFB. If no transcription arrived, no TTFB is reported. + and report TTFB. Uses _last_transcript_time as the end time so we measure + to when the transcript actually arrived, not when the timeout fired. + If no transcription arrived, no TTFB is reported. """ try: await asyncio.sleep(self._stt_ttfb_timeout) - await self.stop_ttfb_metrics() + if self._last_transcript_time > 0: + await self.stop_ttfb_metrics(end_time=self._last_transcript_time) except asyncio.CancelledError: # Task was cancelled (new utterance or interruption), which is expected behavior pass From f928206b3ade9bf9531245bcd38b6f2d9c17c8fc Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Tue, 24 Feb 2026 19:02:40 -0500 Subject: [PATCH 091/189] Add changelog for STT TTFB timeout fix --- changelog/3822.fixed.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog/3822.fixed.md diff --git a/changelog/3822.fixed.md b/changelog/3822.fixed.md new file mode 100644 index 000000000..48218845f --- /dev/null +++ b/changelog/3822.fixed.md @@ -0,0 +1 @@ +- Fixed STT TTFB metrics measuring timeout expiry time instead of actual transcript arrival time. \ No newline at end of file From 69d916ca519df7aa4e83aa584914cb97836999cb Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Tue, 24 Feb 2026 20:51:41 -0500 Subject: [PATCH 092/189] Consume InterimTranscriptionFrame and TranslationFrame in LLMUserAggregator These frames were falling through to the else branch and being pushed downstream, unlike TranscriptionFrame which is explicitly consumed. This aligns with how the assistant aggregator already filters them. --- .../aggregators/llm_response_universal.py | 4 ++ tests/test_context_aggregators_universal.py | 40 +++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py index e5884a868..4a28b38d5 100644 --- a/src/pipecat/processors/aggregators/llm_response_universal.py +++ b/src/pipecat/processors/aggregators/llm_response_universal.py @@ -461,6 +461,10 @@ class LLMUserAggregator(LLMContextAggregator): await self.push_frame(frame, direction) elif isinstance(frame, TranscriptionFrame): await self._handle_transcription(frame) + elif isinstance(frame, (InterimTranscriptionFrame, TranslationFrame)): + # Interim transcriptions and translations are consumed here + # and not pushed downstream, same as final TranscriptionFrame. + pass elif isinstance(frame, LLMRunFrame): await self._handle_llm_run(frame) elif isinstance(frame, LLMMessagesAppendFrame): diff --git a/tests/test_context_aggregators_universal.py b/tests/test_context_aggregators_universal.py index 1bba463b0..e86905e1c 100644 --- a/tests/test_context_aggregators_universal.py +++ b/tests/test_context_aggregators_universal.py @@ -12,6 +12,7 @@ from pipecat.frames.frames import ( FunctionCallFromLLM, FunctionCallResultFrame, FunctionCallsStartedFrame, + InterimTranscriptionFrame, InterruptionFrame, LLMContextAssistantTimestampFrame, LLMContextFrame, @@ -26,6 +27,7 @@ from pipecat.frames.frames import ( LLMThoughtTextFrame, StartFrame, TranscriptionFrame, + TranslationFrame, UserMuteStartedFrame, UserStartedSpeakingFrame, UserStoppedSpeakingFrame, @@ -428,6 +430,44 @@ class TestLLMUserAggregator(unittest.IsolatedAsyncioTestCase): ignore_start=False, ) + async def test_interim_transcription_not_pushed_downstream(self): + """InterimTranscriptionFrame should be consumed and not pushed downstream.""" + context = LLMContext() + pipeline = Pipeline([LLMUserAggregator(context)]) + + frames_to_send = [ + InterimTranscriptionFrame(text="Hel", user_id="", timestamp="now"), + InterimTranscriptionFrame(text="Hello", user_id="", timestamp="now"), + ] + # The interim transcription triggers a user turn start via the default + # TranscriptionUserTurnStartStrategy, so we expect turn-related frames + # but NOT the InterimTranscriptionFrame itself. + expected_down_frames = [ + UserStartedSpeakingFrame, + InterruptionFrame, + ] + (down_frames, _) = await run_test( + pipeline, + frames_to_send=frames_to_send, + expected_down_frames=expected_down_frames, + ) + self.assertFalse(any(isinstance(f, InterimTranscriptionFrame) for f in down_frames)) + + async def test_translation_not_pushed_downstream(self): + """TranslationFrame should be consumed and not pushed downstream.""" + context = LLMContext() + pipeline = Pipeline([LLMUserAggregator(context)]) + + frames_to_send = [ + TranslationFrame(text="Hola!", user_id="", timestamp="now", language="es"), + ] + # No downstream frames expected — translations are consumed. + await run_test( + pipeline, + frames_to_send=frames_to_send, + expected_down_frames=[], + ) + class TestLLMAssistantAggregator(unittest.IsolatedAsyncioTestCase): async def test_empty(self): From 167e68672b33c4903601f45d6eb3eed528ec2cd6 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Tue, 24 Feb 2026 20:52:16 -0500 Subject: [PATCH 093/189] Add changelog for InterimTranscriptionFrame/TranslationFrame fix --- changelog/3825.fixed.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog/3825.fixed.md diff --git a/changelog/3825.fixed.md b/changelog/3825.fixed.md new file mode 100644 index 000000000..7cd9ba508 --- /dev/null +++ b/changelog/3825.fixed.md @@ -0,0 +1 @@ +- Fixed `InterimTranscriptionFrame` and `TranslationFrame` being unintentionally pushed downstream in `LLMUserAggregator`. They are now consumed like `TranscriptionFrame`. From a84930dc3eadb28b408ef61db206f0e48b41fd88 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Tue, 24 Feb 2026 23:21:52 -0500 Subject: [PATCH 094/189] Skip empty audio frames after filter buffering Audio filters like RNNoise, KrispViva, and AIC return empty bytes while buffering audio to accumulate their required frame size. These empty frames were flowing downstream, causing misleading "Empty audio frame received for STT service" warnings. Skip the frame in BaseInputTransport when audio is empty, preventing unnecessary processing in VAD and downstream processors. Fixes #3517 --- changelog/3828.fixed.md | 1 + src/pipecat/transports/base_input.py | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 changelog/3828.fixed.md diff --git a/changelog/3828.fixed.md b/changelog/3828.fixed.md new file mode 100644 index 000000000..dd2ee257d --- /dev/null +++ b/changelog/3828.fixed.md @@ -0,0 +1 @@ +- Fixed misleading "Empty audio frame received for STT service" warnings when using audio filters (e.g. `RNNoiseFilter`, `KrispVivaFilter`, `AICFilter`) that buffer audio internally. diff --git a/src/pipecat/transports/base_input.py b/src/pipecat/transports/base_input.py index 77ff61bba..49c28149a 100644 --- a/src/pipecat/transports/base_input.py +++ b/src/pipecat/transports/base_input.py @@ -424,6 +424,11 @@ class BaseInputTransport(FrameProcessor): if self._params.audio_in_filter: frame.audio = await self._params.audio_in_filter.filter(frame.audio) + # Skip frames with no audio data (e.g. filter is buffering). + if not frame.audio: + self._audio_in_queue.task_done() + continue + ################################################################### # DEPRECATED. # From 68e19a730b4d17ddb515700314a68c3f2a811503 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Tue, 24 Feb 2026 23:47:06 -0800 Subject: [PATCH 095/189] Restore dev skills and add marketplace for maintainer workflows Brings back the 6 development workflow skills (changelog, cleanup, code-review, docstring, pr-description, pr-submit) that were moved to pipecat-ai/skills, and adds a .claude-plugin/marketplace.json so other pipecat-ai repos can install them. Updates README contributing section with installation instructions. --- .claude-plugin/marketplace.json | 26 +++ .claude/skills/changelog/SKILL.md | 47 ++++ .claude/skills/cleanup/SKILL.md | 307 +++++++++++++++++++++++++ .claude/skills/code-review/SKILL.md | 107 +++++++++ .claude/skills/docstring/SKILL.md | 256 +++++++++++++++++++++ .claude/skills/pr-description/SKILL.md | 128 +++++++++++ .claude/skills/pr-submit/SKILL.md | 28 +++ README.md | 19 +- 8 files changed, 910 insertions(+), 8 deletions(-) create mode 100644 .claude-plugin/marketplace.json create mode 100644 .claude/skills/changelog/SKILL.md create mode 100644 .claude/skills/cleanup/SKILL.md create mode 100644 .claude/skills/code-review/SKILL.md create mode 100644 .claude/skills/docstring/SKILL.md create mode 100644 .claude/skills/pr-description/SKILL.md create mode 100644 .claude/skills/pr-submit/SKILL.md diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json new file mode 100644 index 000000000..64aac9338 --- /dev/null +++ b/.claude-plugin/marketplace.json @@ -0,0 +1,26 @@ +{ + "name": "pipecat-dev-skills", + "owner": { + "name": "Pipecat" + }, + "metadata": { + "description": "Development workflow skills for contributing to the Pipecat project", + "version": "1.0.0" + }, + "plugins": [ + { + "name": "pipecat-dev", + "description": "Development workflow skills for contributing to the Pipecat project", + "version": "1.0.0", + "source": "./", + "skills": [ + "./.claude/skills/changelog", + "./.claude/skills/cleanup", + "./.claude/skills/code-review", + "./.claude/skills/docstring", + "./.claude/skills/pr-description", + "./.claude/skills/pr-submit" + ] + } + ] +} diff --git a/.claude/skills/changelog/SKILL.md b/.claude/skills/changelog/SKILL.md new file mode 100644 index 000000000..1ef8f324e --- /dev/null +++ b/.claude/skills/changelog/SKILL.md @@ -0,0 +1,47 @@ +--- +name: changelog +description: Create changelog files for important commits in a PR +--- + +Create changelog files for the important commits in this PR. The PR number is provided as an argument. + +## Instructions + +1. Skip changelog for: documentation-only, internal refactoring, test-only, CI changes. + +2. First, check what commits are on the current branch compared to main: + ``` + git log main..HEAD --oneline + ``` + +3. For each significant change, create a changelog file in the `changelog/` folder using the format: + Allowed types: `added`, `changed`, `deprecated`, `removed`, `fixed`, `security`, `performance`, `other` + - `{PR_NUMBER}.added.md` - for new features + - `{PR_NUMBER}.added.2.md`, `{PR_NUMBER}.added.3.md` - for additional entries of the same type + - `{PR_NUMBER}.changed.md` - for changes to existing functionality + - `{PR_NUMBER}.fixed.md` - for bug fixes + - `{PR_NUMBER}.deprecated.md` - for deprecations + - `{PR_NUMBER}.removed.md` - for removed features + - `{PR_NUMBER}.security.md` - for security fixes + - `{PR_NUMBER}.performance.md` - for performance improvements + - `{PR_NUMBER}.other.md` - for other changes + +4. Each changelog file should at least contain a main single line starting with `- ` followed by a clear description of the change. No line wrapping. + +5. If the change is complicated, changelog files can have indented lines after the main line with additional details or code samples. + +6. Use ⚠️ emoji prefix for breaking changes. + +## Example + +For PR #3519 with a new feature and a bug fix: + +`changelog/3519.added.md`: +``` +- Added `SomeNewFeature` for doing something useful. +``` + +`changelog/3519.fixed.md`: +``` +- Fixed an issue where something was not working correctly. +``` diff --git a/.claude/skills/cleanup/SKILL.md b/.claude/skills/cleanup/SKILL.md new file mode 100644 index 000000000..91a61db39 --- /dev/null +++ b/.claude/skills/cleanup/SKILL.md @@ -0,0 +1,307 @@ +# Code Cleanup Skill + +The **Code Cleanup Skill** reviews, refactors, and documents code changes in your current branch, ensuring alignment with **Pipecat's architecture, coding standards, and example patterns**. +It focuses on **readability, correctness, performance, and consistency**, while avoiding breaking changes. + +--- + +## Skill Overview + +This skill analyzes all changes introduced in your branch and performs the following actions: + +1. **Analyze Branch Changes** + - Review uncommitted changes and outgoing commits +2. **Refactor for Readability** + - Improve clarity, naming, structure, and modern Python usage +3. **Enhance Performance** + - Identify safe, conservative optimization opportunities +4. **Add Documentation** + - Apply Pipecat-style, Google-format docstrings +5. **Ensure Pattern Consistency** + - Match existing Pipecat services, pipelines, and examples +6. **Validate Examples** + - Ensure examples follow foundational patterns (e.g. `07-interruptible.py`) + +--- + +## Usage + +Invoke the skill using any of the following commands: + +- "Clean up my branch code" +- "Refactor the changes in my branch" +- "Review and improve my branch code" +- `/cleanup` + +--- + +## What This Skill Does + +### 1. Analyze Branch Changes + +The skill retrieves all uncommitted changes and outgoing commits to understand: + +- New files added +- Modified files +- Code additions and deletions +- Overall scope and intent of changes + +--- + +### 2. Code Refactoring + +#### Readability Improvements + +- Replace tuples with named classes or dataclasses +- Improve variable, method, and class naming +- Extract complex logic into well-named helper methods +- Add missing type hints +- Simplify nested or complex conditionals +- Replace deprecated methods and features +- Normalize formatting to match Pipecat style + +#### Performance Enhancements + +- Identify inefficient loops or repeated work +- Suggest appropriate data structures +- Optimize async workflows and I/O +- Remove redundant operations + +> Performance changes are conservative and non-breaking. + +--- + +### 3. Documentation + +Documentation follows **Google-style docstrings**, consistent with Pipecat conventions. + +#### Class Documentation + +```python +class ExampleService: + """Brief one-line description. + + Detailed explanation of the class purpose, responsibilities, + and important behaviors. + + Supported features: + + - Feature 1 + - Feature 2 + - Feature 3 + """ +``` + +#### Method Documentation + +```python +def process_data(self, data: str, options: Optional[dict] = None) -> bool: + """Process incoming data with optional configuration. + + Args: + data: The input data to process. + options: Optional configuration dictionary. + + Returns: + True if processing succeeded, False otherwise. + + Raises: + ValueError: If data is empty or invalid. + """ +``` + +#### Pydantic Model Parameters + +```python +class InputParams(BaseModel): + """Configuration parameters for the service. + + Parameters: + timeout: Request timeout in seconds. + retry_count: Number of retry attempts. + enable_logging: Whether to enable debug logging. + """ + + timeout: Optional[float] = None + retry_count: int = 3 + enable_logging: bool = False +``` + +--- + +### 4. Pattern Consistency Checks + +#### Service Classes + +- Correct inheritance (`TTSService`, `STTService`, `LLMService`) +- Consistent constructor signatures +- Frame emission patterns +- Metrics support: + - `can_generate_metrics()` + - TTFB metrics + - Usage metrics +- Alignment with similar existing services + +#### Examples + +Validated against `examples/foundational/07-interruptible.py`: + +- Proper `create_transport()` usage +- Correct pipeline structure +- Task setup and observers +- Event handler registration +- Runner and bot entrypoint consistency + +--- + +### 5. Specific Implementation Patterns + +#### Service Implementation + +```python +class ExampleTTSService(TTSService): + + def __init__(self, *, api_key: Optional[str] = None, **kwargs): + super().__init__(**kwargs) + self._api_key = api_key or os.getenv("SERVICE_API_KEY") + + def can_generate_metrics(self) -> bool: + return True + + async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]: + try: + await self.start_ttfb_metrics() + yield TTSStartedFrame() + # ... processing ... + yield TTSAudioRawFrame(...) + finally: + await self.stop_ttfb_metrics() +``` + +--- + +#### Example Structure Pattern + +```python +transport_params = { + "daily": lambda: DailyParams(...), + "twilio": lambda: FastAPIWebsocketParams(...), + "webrtc": lambda: TransportParams(...), +} + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + stt = DeepgramSTTService(...) + tts = SomeTTSService(...) + llm = OpenAILLMService(...) + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair(...) + + pipeline = Pipeline([...]) + task = PipelineTask(pipeline, params=..., observers=[...]) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + await task.queue_frames([LLMRunFrame()]) + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + await runner.run(task) + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) +``` + +--- + +## Execution Flow + +1. Fetch uncommitted and outgoing changes +2. Categorize files (services, examples, tests, utilities) +3. Analyze each file: + - Readability + - Performance + - Documentation + - Pattern consistency +4. Generate actionable recommendations +5. Apply Pipecat standards + +--- + +## Examples + +### Before: Tuple Usage + +```python +def get_audio_info(self) -> Tuple[int, int]: + return (48000, 1) +``` + +### After: Named Class + +```python +class AudioInfo: + """Audio configuration information. + + Parameters: + sample_rate: Sample rate in Hz. + num_channels: Number of audio channels. + """ + + sample_rate: int + num_channels: int + +def get_audio_info(self) -> AudioInfo: + return AudioInfo(sample_rate=48000, num_channels=1) +``` + +--- + +### Before: Missing Documentation + +```python +class NewTTSService(TTSService): + def __init__(self, api_key: str, voice: str): + self._api_key = api_key + self._voice = voice +``` + +### After: Fully Documented + +```python +class NewTTSService(TTSService): + """Text-to-speech service using NewProvider API. + + Streams PCM audio and emits TTSAudioRawFrame frames compatible + with Pipecat transports. + + Supported features: + - Text-to-speech synthesis + - Streaming PCM audio + - Voice customization + - TTFB metrics + """ + + def __init__(self, *, api_key: str, voice: str, **kwargs): + """Initialize the NewTTSService. + + Args: + api_key: API key for authentication. + voice: Voice identifier to use. + **kwargs: Additional arguments passed to the parent service. + """ + super().__init__(**kwargs) + self._api_key = api_key + self.set_voice(voice) +``` + +--- + +## Notes + +- Non-breaking improvements only +- Backward compatibility preserved +- Conservative performance changes +- Google-style docstrings +- Pattern checks follow recent Pipecat code diff --git a/.claude/skills/code-review/SKILL.md b/.claude/skills/code-review/SKILL.md new file mode 100644 index 000000000..036a7f935 --- /dev/null +++ b/.claude/skills/code-review/SKILL.md @@ -0,0 +1,107 @@ +--- +name: code-review +description: Automated code review for pull requests using multiple specialized agents +disable-model-invocation: true +allowed-tools: Bash(gh issue view:*), Bash(gh search:*), Bash(gh issue list:*), Bash(gh pr comment:*), Bash(gh pr diff:*), Bash(gh pr view:*), Bash(gh pr list:*) +--- + +Provide a code review for the given pull request. + +**Agent assumptions (applies to all agents and subagents):** + +- All tools are functional and will work without error. Do not test tools or make exploratory calls. Make sure this is clear to every subagent that is launched. +- Only call a tool if it is required to complete the task. Every tool call should have a clear purpose. + +To do this, follow these steps precisely: + +1. Launch a haiku agent to check if any of the following are true: + - The pull request is closed + - The pull request is a draft + - The pull request does not need code review (e.g. automated PR, trivial change that is obviously correct) + - Claude has already commented on this PR (check `gh pr view --comments` for comments left by claude) + + If any condition is true, stop and do not proceed. + +Note: Still review Claude generated PR's. + +2. Launch a haiku agent to return a list of file paths (not their contents) for all relevant CLAUDE.md files including: + - The root CLAUDE.md file, if it exists + - Any CLAUDE.md files in directories containing files modified by the pull request + +3. Launch a sonnet agent to view the pull request and return a summary of the changes + +4. Launch 4 agents in parallel to independently review the changes. Each agent should return the list of issues, where each issue includes a description and the reason it was flagged (e.g. "CLAUDE.md adherence", "bug"). The agents should do the following: + + Agents 1 + 2: CLAUDE.md compliance sonnet agents + Audit changes for CLAUDE.md compliance in parallel. Note: When evaluating CLAUDE.md compliance for a file, you should only consider CLAUDE.md files that share a file path with the file or parents. + + Agent 3: Opus bug agent (parallel subagent with agent 4) + Scan for obvious bugs. Focus only on the diff itself without reading extra context. Flag only significant bugs; ignore nitpicks and likely false positives. Do not flag issues that you cannot validate without looking at context outside of the git diff. + + Agent 4: Opus bug agent (parallel subagent with agent 3) + Look for problems that exist in the introduced code. This could be security issues, incorrect logic, etc. Only look for issues that fall within the changed code. + + **CRITICAL: We only want HIGH SIGNAL issues.** Flag issues where: + - The code will fail to compile or parse (syntax errors, type errors, missing imports, unresolved references) + - The code will definitely produce wrong results regardless of inputs (clear logic errors) + - Clear, unambiguous CLAUDE.md violations where you can quote the exact rule being broken + + Do NOT flag: + - Code style or quality concerns + - Potential issues that depend on specific inputs or state + - Subjective suggestions or improvements + + If you are not certain an issue is real, do not flag it. False positives erode trust and waste reviewer time. + + In addition to the above, each subagent should be told the PR title and description. This will help provide context regarding the author's intent. + +5. For each issue found in the previous step by agents 3 and 4, launch parallel subagents to validate the issue. These subagents should get the PR title and description along with a description of the issue. The agent's job is to review the issue to validate that the stated issue is truly an issue with high confidence. For example, if an issue such as "variable is not defined" was flagged, the subagent's job would be to validate that is actually true in the code. Another example would be CLAUDE.md issues. The agent should validate that the CLAUDE.md rule that was violated is scoped for this file and is actually violated. Use Opus subagents for bugs and logic issues, and sonnet agents for CLAUDE.md violations. + +6. Filter out any issues that were not validated in step 5. This step will give us our list of high signal issues for our review. + +7. If issues were found, skip to step 8 to post comments. + + If NO issues were found, post a summary comment using `gh pr comment` (if `--comment` argument is provided): + "No issues found. Checked for bugs and CLAUDE.md compliance." + +8. Create a list of all comments that you plan on leaving. This is only for you to make sure you are comfortable with the comments. Do not post this list anywhere. + +9. Post inline comments for each issue using `gh pr review` with inline comments. For each comment: + - Provide a brief description of the issue + - For small, self-contained fixes, include a committable suggestion block + - For larger fixes (6+ lines, structural changes, or changes spanning multiple locations), describe the issue and suggested fix without a suggestion block + - Never post a committable suggestion UNLESS committing the suggestion fixes the issue entirely. If follow up steps are required, do not leave a committable suggestion. + + **IMPORTANT: Only post ONE comment per unique issue. Do not post duplicate comments.** + +Use this list when evaluating issues in Steps 4 and 5 (these are false positives, do NOT flag): + +- Pre-existing issues +- Something that appears to be a bug but is actually correct +- Pedantic nitpicks that a senior engineer would not flag +- Issues that a linter will catch (do not run the linter to verify) +- General code quality concerns (e.g., lack of test coverage, general security issues) unless explicitly required in CLAUDE.md +- Issues mentioned in CLAUDE.md but explicitly silenced in the code (e.g., via a lint ignore comment) + +Notes: + +- Use gh CLI to interact with GitHub (e.g., fetch pull requests, create comments). Do not use web fetch. +- Create a todo list before starting. +- You must cite and link each issue in inline comments (e.g., if referring to a CLAUDE.md, include a link to it). +- If no issues are found, post a comment with the following format: + +--- + +## Code review + +No issues found. Checked for bugs and CLAUDE.md compliance. + +--- + +- When linking to code in inline comments, follow the following format precisely, otherwise the Markdown preview won't render correctly: `https://github.com/OWNER/REPO/blob/FULL_SHA/path/to/file.py#L10-L15` + - Requires full git sha + - You must provide the full sha. Commands like `https://github.com/owner/repo/blob/$(git rev-parse HEAD)/foo/bar` will not work, since your comment will be directly rendered in Markdown. + - Repo name must match the repo you're code reviewing + - # sign after the file name + - Line range format is L[start]-L[end] + - Provide at least 1 line of context before and after, centered on the line you are commenting about (eg. if you are commenting about lines 5-6, you should link to `L4-7`) diff --git a/.claude/skills/docstring/SKILL.md b/.claude/skills/docstring/SKILL.md new file mode 100644 index 000000000..129d83763 --- /dev/null +++ b/.claude/skills/docstring/SKILL.md @@ -0,0 +1,256 @@ +--- +name: docstring +description: Document a Python module and its classes using Google style +--- + +Document a Python module or class using Google-style docstrings following project conventions. The argument can be a class name or a module path. + +## Instructions + +1. Determine what to document based on the argument: + + **If a module path is provided** (e.g. `src/pipecat/audio/vad/vad_analyzer.py`): + - Use that file directly + + **If a class name is provided** (e.g. `VADAnalyzer`): + - Search for `class ClassName` in `src/pipecat/` + - If multiple files contain that class name, list all matches with their file paths, ask the user which one they want to document, and wait for confirmation + +2. Once the file is identified, read the module to understand its structure: + - Identify all classes, functions, and important type aliases + - Understand the purpose of each component + +4. Apply documentation in this order: + - Module docstring (at top, after imports) + - Class docstrings + - `__init__` methods (always document constructor parameters) + - Public methods (not starting with `_`) + - Dataclass/config classes with field descriptions + +5. Skip documentation for: + - Private methods (starting with `_`) + - Simple dunder methods (`__str__`, `__repr__`, `__post_init__`) + - Very simple pass-through properties + - **Already documented code** - If a class, method, or function already has a complete docstring that follows the project style, do not modify it. A docstring is complete if it has: + - A one-line summary + - Args section (if it has parameters) + - Returns section (if it returns something meaningful) + - Only add or improve documentation where it is missing or incomplete + +## Module Docstring Format + +```python +"""[One-line description of module purpose]. + +[Optional: Longer explanation of functionality, key classes, or use cases.] +""" +``` + +Example: +```python +"""Neuphonic text-to-speech service implementations. + +This module provides WebSocket and HTTP-based integrations with Neuphonic's +text-to-speech API for real-time audio synthesis. +""" +``` + +## Class Docstring Format + +```python +class ClassName: + """One-line summary describing what the class does. + + [Longer description explaining purpose, behavior, and key features. + Use action-oriented language.] + + [Optional: Event handlers, usage notes, or important caveats.] + """ +``` + +Example: +```python +class FrameProcessor(BaseObject): + """Base class for all frame processors in the pipeline. + + Frame processors are the building blocks of Pipecat pipelines, they can be + linked to form complex processing pipelines. They receive frames, process + them, and pass them to the next or previous processor in the chain. + + Event handlers available: + + - on_before_process_frame: Called before a frame is processed + - on_after_process_frame: Called after a frame is processed + + Example:: + + @processor.event_handler("on_before_process_frame") + async def on_before_process_frame(processor, frame): + ... + + @processor.event_handler("on_after_process_frame") + async def on_after_process_frame(processor, frame): + ... + """ +``` + +Note: When listing event handlers, do NOT use backticks. Include an `Example::` section (with double colon for Sphinx) showing the decorator pattern and function signature for each event. + +## Constructor (`__init__`) Format + +```python +def __init__(self, *, param1: Type, param2: Type = default, **kwargs): + """Initialize the [ClassName]. + + Args: + param1: Description of param1 and its purpose. + param2: Description of param2. Defaults to [default]. + **kwargs: Additional arguments passed to parent class. + """ +``` + +Example: +```python +def __init__( + self, + *, + api_key: str, + voice_id: Optional[str] = None, + sample_rate: Optional[int] = 22050, + **kwargs, +): + """Initialize the Neuphonic TTS service. + + Args: + api_key: Neuphonic API key for authentication. + voice_id: ID of the voice to use for synthesis. + sample_rate: Audio sample rate in Hz. Defaults to 22050. + **kwargs: Additional arguments passed to parent InterruptibleTTSService. + """ +``` + +## Method Docstring Format + +```python +async def method_name(self, param1: Type) -> ReturnType: + """One-line summary of what method does. + + [Longer description if behavior isn't obvious.] + + Args: + param1: Description of param1. + + Returns: + Description of return value. + + Raises: + ExceptionType: When this exception is raised. + """ +``` + +Example: +```python +async def put(self, item: Tuple[Frame, FrameDirection, FrameCallback]): + """Put an item into the priority queue. + + System frames (`SystemFrame`) have higher priority than any other + frames. If a non-frame item is provided it will have the highest priority. + + Args: + item: The item to enqueue. + """ +``` + +## Dataclass/Config Format + +```python +@dataclass +class ConfigName: + """One-line description of configuration. + + [Explanation of when/how to use this config.] + + Parameters: + field1: Description of field1. + field2: Description of field2. Defaults to [default]. + """ + + field1: Type + field2: Type = default_value +``` + +Example: +```python +@dataclass +class FrameProcessorSetup: + """Configuration parameters for frame processor initialization. + + Parameters: + clock: The clock instance for timing operations. + task_manager: The task manager for handling async operations. + observer: Optional observer for monitoring frame processing events. + """ + + clock: BaseClock + task_manager: BaseTaskManager + observer: Optional[BaseObserver] = None +``` + +## Enum Documentation Format + +```python +class EnumName(Enum): + """One-line description of the enum purpose. + + [Longer description of how the enum is used.] + + Parameters: + VALUE1: Description of VALUE1. + VALUE2: Description of VALUE2. + """ + + VALUE1 = 1 + VALUE2 = 2 +``` + +## Writing Style Guidelines + +- **Concise and professional** - No casual language or filler words +- **Action-oriented** - Start with verbs: "Processes...", "Manages...", "Converts..." +- **Purpose before implementation** - Explain WHY before HOW +- **Clear parameter descriptions** - Include type hints, defaults, and purpose +- **No redundant type info** - Type hints are in the signature, don't repeat in description +- **Use backticks for code references** - Wrap class names, method names, event names, parameter names, and code snippets in backticks + +Good: "Neuphonic API key for authentication." +Bad: "str: The API key (string) that is used for authenticating with Neuphonic." + +Good: "Triggers `on_speech_started` when the `VADAnalyzer` detects speech." +Bad: "Triggers on_speech_started when the VADAnalyzer detects speech." + +## Deprecation Notice Format + +When documenting deprecated code: + +```python +"""[Description]. + +.. deprecated:: X.X.X + `ClassName` is deprecated and will be removed in a future version. + Use `NewClassName` instead. +""" +``` + +## Checklist + +Before finishing, verify: + +- [ ] Module has a docstring at the top (after copyright header and imports) +- [ ] All public classes have docstrings +- [ ] All `__init__` methods document their parameters +- [ ] All public methods have docstrings with Args/Returns/Raises as needed +- [ ] Dataclasses use "Parameters:" section for field descriptions +- [ ] Enums document each value in "Parameters:" section +- [ ] Writing is concise and action-oriented +- [ ] No documentation added to private methods (starting with `_`) +- [ ] Existing complete docstrings were left unchanged diff --git a/.claude/skills/pr-description/SKILL.md b/.claude/skills/pr-description/SKILL.md new file mode 100644 index 000000000..666cf2bd1 --- /dev/null +++ b/.claude/skills/pr-description/SKILL.md @@ -0,0 +1,128 @@ +--- +name: pr-description +description: Update a GitHub PR description with a summary of changes +--- + +Update a GitHub pull request description based on the changes in the PR. + +## Arguments + +``` +/pr-description [--fixes ] +``` + +- `PR_NUMBER` (required): The pull request number to update +- `--fixes` (optional): Comma-separated issue numbers that this PR fixes (e.g., `--fixes 123,456`) + +Examples: +- `/pr-description 3534` +- `/pr-description 3534 --fixes 123` +- `/pr-description 3534 --fixes 123,456,789` + +## Instructions + +1. First, gather information about the PR: + - Use GitHub plugin to get PR details (title, current description, base branch) + - Use local git to get commits: `git log main..HEAD --oneline` + - Use local git to get the diff: `git diff main..HEAD` + - Parse any `--fixes` argument for issue numbers + +2. Check the existing PR description: + - If it already has a complete, accurate description that reflects the changes, do nothing + - If it's missing sections, incomplete, or outdated compared to the actual changes, proceed to update + - If it only has the template placeholder text, generate a full description + +3. Analyze the changes: + - Understand the purpose of each commit + - Identify any breaking changes (API changes, removed features, behavior changes) + - Look for new features, bug fixes, refactoring, or documentation changes + - Collect issue numbers from: + - The `--fixes` argument (if provided) + - Commit messages (patterns like "Fixes #123", "Closes #456", "Resolves #789") + +4. Generate or update the PR description with these sections: + +## PR Description Format + +### Summary (always include) + +Brief bullet points describing what changed and why. Focus on the *purpose* and *impact*, not implementation details. + +```markdown +## Summary + +- Added X to enable Y +- Fixed bug where Z would happen +- Refactored W for better maintainability +``` + +### Breaking Changes (include only if applicable) + +Document any changes that affect existing users or APIs. + +```markdown +## Breaking Changes + +- `ClassName.method()` now requires a `param` argument +- Removed deprecated `old_function()` - use `new_function()` instead +``` + +### Testing (include when non-obvious) + +How to verify the changes work. Skip for trivial changes. + +```markdown +## Testing + +- Run `uv run pytest tests/test_feature.py` to verify the fix +- Example usage: `uv run examples/new_feature.py` +``` + +### Fixes (include if issues are provided or found in commits) + +List issues this PR fixes. GitHub will automatically close these issues when the PR is merged. + +```markdown +## Fixes + +- Fixes #123 +- Fixes #456 +``` + +Note: Use "Fixes #X" format (not "Closes" or "Resolves") for consistency. Each issue should be on its own line with "Fixes" to ensure GitHub auto-closes them. + +## Guidelines + +- **Be concise** - Reviewers should understand the PR in 30 seconds +- **Focus on why** - The diff shows *what* changed, explain *why* +- **Skip empty sections** - Only include sections that have content +- **Use bullet points** - Easier to scan than paragraphs +- **Don't duplicate the diff** - Avoid listing every file or line changed + +## Example Output + +```markdown +## Summary + +- Added `/docstring` skill for documenting Python modules with Google-style docstrings +- Skill finds classes by name and handles conflicts when multiple matches exist +- Skips already-documented code to avoid unnecessary changes + +## Testing + +/docstring ClassName + +## Fixes + +- Fixes #123 +``` + +## Checklist + +Before updating the PR: + +- [ ] Verified existing description needs updating (not already complete) +- [ ] Summary accurately reflects the changes +- [ ] Breaking changes are clearly documented (if any) +- [ ] No unnecessary sections included +- [ ] Description is concise and scannable diff --git a/.claude/skills/pr-submit/SKILL.md b/.claude/skills/pr-submit/SKILL.md new file mode 100644 index 000000000..5724ddb6e --- /dev/null +++ b/.claude/skills/pr-submit/SKILL.md @@ -0,0 +1,28 @@ +--- +name: pr-submit +description: Create and submit a GitHub PR from the current branch +--- + +Submit the current changes as a GitHub pull request. + +## Instructions + +1. Check the current state of the repository: + - Run `git status` to see staged, unstaged, and untracked changes + - Run `git diff` to see current changes + - Run `git log --oneline -10` to see recent commits + +2. If there are uncommitted changes relevant to the PR: + - Ask the user if they want a specific prefix for the branch name (e.g., `alice/`, `fix/`, `feat/`) + - Create a new branch based on the current branch + - Commit the changes using multiple commits if the changes are unrelated + +3. Push the branch and create the PR: + - Push with `-u` flag to set upstream tracking + - Create the PR using `gh pr create` + +4. After the PR is created: + - Run `/changelog ` to generate changelog files, then commit and push them + - Run `/pr-description ` to update the PR description + +5. Return the PR URL to the user. diff --git a/README.md b/README.md index 058f23128..2221e807e 100644 --- a/README.md +++ b/README.md @@ -57,19 +57,13 @@ Love terminal applications? Check out [Tail](https://github.com/pipecat-ai/tail) ### 🤖 Claude Code Skills -Use [Pipecat Skills](https://github.com/pipecat-ai/skills) with [Claude Code](https://claude.ai/code) to scaffold projects, generate changelogs, deploy to Pipecat Cloud, and more. Install the marketplace with: +Use [Pipecat Skills](https://github.com/pipecat-ai/skills) with [Claude Code](https://claude.ai/code) to scaffold projects, deploy to Pipecat Cloud, and more. Install the marketplace with: ``` claude plugin marketplace add pipecat-ai/skills ``` -And install the plugins, for example: - -``` -claude plugin install pipecat-dev@pipecat-skills -``` - -there's more! +and install any of the available plugins. ### 📺️ Pipecat TV Channel @@ -179,6 +173,15 @@ You can get started with Pipecat running on your local machine, then move your a > **Note**: Some extras (local, gstreamer) require system dependencies. See documentation if you encounter build errors. +### Claude Code Skills + +Install development workflow skills for contributing to Pipecat with [Claude Code](https://claude.ai/code): + +``` +claude plugin marketplace add pipecat-ai/pipecat +claude plugin install pipecat-dev@pipecat-dev-skills +``` + ### Running tests To run all tests, from the root directory: From c09ae6ba6d3b27d4f3c90e022726fc97d42893a3 Mon Sep 17 00:00:00 2001 From: filipi87 Date: Wed, 25 Feb 2026 10:17:54 -0300 Subject: [PATCH 096/189] Added two new lifecycle callbacks to AudioContextTTSService: on_audio_context_interrupted() and on_audio_context_completed() --- src/pipecat/services/tts_service.py | 31 +++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index 1b65521a1..e739a03d2 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -1190,6 +1190,7 @@ class AudioContextTTSService(WebsocketTTSService): async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): await super()._handle_interruption(frame, direction) await self._stop_audio_context_task() + await self.on_audio_context_interrupted(context_id=self._context_id) self.reset_active_audio_context() self._create_audio_context_task() @@ -1218,6 +1219,7 @@ class AudioContextTTSService(WebsocketTTSService): # We just finished processing the context, so we can safely remove it. del self._contexts[context_id] + await self.on_audio_context_completed(context_id=context_id) self.reset_active_audio_context() # Append some silence between sentences. @@ -1254,6 +1256,35 @@ class AudioContextTTSService(WebsocketTTSService): logger.trace(f"{self} time out on audio context {context_id}") break + async def on_audio_context_interrupted(self, context_id: str): + """Called when an audio context is cancelled due to an interruption. + + Override this in a subclass to perform provider-specific cleanup (e.g. + sending a cancel/close message over the WebSocket) when the bot is + interrupted mid-speech. The audio context task has already been stopped + and the active context has **not** yet been reset when this is called, + so ``context_id`` reflects the context that was cut short. + + Args: + context_id: The ID of the audio context that was interrupted, or + ``None`` if no context was active at the time. + """ + pass + + async def on_audio_context_completed(self, context_id: str): + """Called after an audio context has finished playing all of its audio. + + Override this in a subclass to perform provider-specific cleanup (e.g. + sending a close-context message to free server-side resources) once an + audio context has been fully processed. The context entry has already + been removed from the internal context map, and the active context has + **not** yet been reset when this is called. + + Args: + context_id: The ID of the audio context that finished processing. + """ + pass + class AudioContextWordTTSService(AudioContextTTSService): """Deprecated. Use AudioContextTTSService with supports_word_timestamps=True instead. From d899f0af11958f6aa34bf9493589d385e942cbbb Mon Sep 17 00:00:00 2001 From: filipi87 Date: Wed, 25 Feb 2026 10:18:16 -0300 Subject: [PATCH 097/189] Refactored all AudioContextTTSService based providers to override the new callbacks instead of _handle_interruption(), making provider-specific cleanup cleaner and more explicit --- src/pipecat/services/asyncai/tts.py | 26 ++++++++++++++++------- src/pipecat/services/cartesia/tts.py | 18 ++++++++++------ src/pipecat/services/elevenlabs/tts.py | 28 +++++++++++++++++-------- src/pipecat/services/gradium/tts.py | 27 ++++++++++++------------ src/pipecat/services/inworld/tts.py | 29 +++++++++++--------------- src/pipecat/services/resembleai/tts.py | 21 ++++++++++--------- src/pipecat/services/rime/tts.py | 19 +++++++++++++---- 7 files changed, 101 insertions(+), 67 deletions(-) diff --git a/src/pipecat/services/asyncai/tts.py b/src/pipecat/services/asyncai/tts.py index d55062c4f..f1f73b7ff 100644 --- a/src/pipecat/services/asyncai/tts.py +++ b/src/pipecat/services/asyncai/tts.py @@ -10,7 +10,7 @@ import asyncio import base64 import json from dataclasses import dataclass, field -from typing import Any, AsyncGenerator, ClassVar, Dict, Mapping, Optional +from typing import Any, AsyncGenerator, Mapping, Optional import aiohttp from loguru import logger @@ -21,7 +21,6 @@ from pipecat.frames.frames import ( EndFrame, ErrorFrame, Frame, - InterruptionFrame, StartFrame, TTSAudioRawFrame, TTSStartedFrame, @@ -392,18 +391,29 @@ class AsyncAITTSService(AudioContextTTSService): logger.warning(f"{self} keepalive error: {e}") break - async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): - """Handle interruption by closing the current context.""" - context_id = self.get_active_audio_context_id() - await super()._handle_interruption(frame, direction) - # Close the current context when interrupted without closing the websocket + async def _close_context(self, context_id: str): + # Async AI requires explicit context closure to free server-side resources, + # both on interruption and on normal completion. if context_id and self._websocket: try: await self._websocket.send( json.dumps({"context_id": context_id, "close_context": True, "transcript": ""}) ) except Exception as e: - logger.error(f"Error closing context on interruption: {e}") + logger.error(f"{self}: Error closing context {context_id}: {e}") + + async def on_audio_context_interrupted(self, context_id: str): + """Close the Async AI context when the bot is interrupted.""" + await self._close_context(context_id) + + async def on_audio_context_completed(self, context_id: str): + """Close the Async AI context after all audio has been played. + + Async AI does not send a server-side signal when a context is + exhausted, so Pipecat must explicitly close it with + ``close_context: True`` to free server-side resources. + """ + await self._close_context(context_id) @traced_tts async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: diff --git a/src/pipecat/services/cartesia/tts.py b/src/pipecat/services/cartesia/tts.py index f31cc2421..f45e7c54f 100644 --- a/src/pipecat/services/cartesia/tts.py +++ b/src/pipecat/services/cartesia/tts.py @@ -11,7 +11,7 @@ import json import warnings from dataclasses import dataclass, field from enum import Enum -from typing import Any, AsyncGenerator, ClassVar, Dict, List, Literal, Mapping, Optional +from typing import Any, AsyncGenerator, List, Literal, Mapping, Optional from loguru import logger from pydantic import BaseModel, Field @@ -21,13 +21,11 @@ from pipecat.frames.frames import ( EndFrame, ErrorFrame, Frame, - InterruptionFrame, StartFrame, TTSAudioRawFrame, TTSStartedFrame, TTSStoppedFrame, ) -from pipecat.processors.frame_processor import FrameDirection from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import AudioContextTTSService, TTSService from pipecat.transcriptions.language import Language, resolve_language @@ -563,14 +561,22 @@ class CartesiaTTSService(AudioContextTTSService): return self._websocket raise Exception("Websocket not connected") - async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): - context_id = self.get_active_audio_context_id() - await super()._handle_interruption(frame, direction) + async def on_audio_context_interrupted(self, context_id: str): + """Cancel the active Cartesia context when the bot is interrupted.""" await self.stop_all_metrics() if context_id: cancel_msg = json.dumps({"context_id": context_id, "cancel": True}) await self._get_websocket().send(cancel_msg) + async def on_audio_context_completed(self, context_id: str): + """Close the Cartesia context after all audio has been played. + + No close message is needed: the server already considers the context + done once it has sent its ``done`` message, which is handled in + ``_process_messages``. + """ + pass + async def flush_audio(self): """Flush any pending audio and finalize the current context.""" context_id = self.get_active_audio_context_id() diff --git a/src/pipecat/services/elevenlabs/tts.py b/src/pipecat/services/elevenlabs/tts.py index 20d46481d..25e1aa5dd 100644 --- a/src/pipecat/services/elevenlabs/tts.py +++ b/src/pipecat/services/elevenlabs/tts.py @@ -666,14 +666,11 @@ class ElevenLabsTTSService(AudioContextTTSService): return self._websocket raise Exception("Websocket not connected") - async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): - """Handle interruption by closing the current context.""" - # Close the current context when interrupted without closing the websocket - context_id = self.get_active_audio_context_id() - await super()._handle_interruption(frame, direction) - + async def _close_context(self, context_id: str): + # ElevenLabs requires that Pipecat explicitly closes contexts to free + # server-side resources, both on interruption and on normal completion. if context_id and self._websocket: - logger.trace(f"Closing context {context_id} due to interruption") + logger.trace(f"{self}: Closing context {context_id}") try: # ElevenLabs requires that Pipecat manages the contexts and closes them # when they're not longer in use. Since an InterruptionFrame is pushed @@ -686,8 +683,21 @@ class ElevenLabsTTSService(AudioContextTTSService): ) except Exception as e: await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e) - self._partial_word = "" - self._partial_word_start_time = 0.0 + self._partial_word = "" + self._partial_word_start_time = 0.0 + + async def on_audio_context_interrupted(self, context_id: str): + """Close the ElevenLabs context when the bot is interrupted.""" + await self._close_context(context_id) + + async def on_audio_context_completed(self, context_id: str): + """Close the ElevenLabs context after all audio has been played. + + ElevenLabs does not send a server-side signal when a context is + exhausted, so Pipecat must explicitly close it with + ``close_context: True`` to free server-side resources. + """ + await self._close_context(context_id) async def _receive_messages(self): """Handle incoming WebSocket messages from ElevenLabs.""" diff --git a/src/pipecat/services/gradium/tts.py b/src/pipecat/services/gradium/tts.py index 703289706..ee6e6821e 100644 --- a/src/pipecat/services/gradium/tts.py +++ b/src/pipecat/services/gradium/tts.py @@ -17,13 +17,11 @@ from pipecat.frames.frames import ( EndFrame, ErrorFrame, Frame, - InterruptionFrame, StartFrame, TTSAudioRawFrame, TTSStartedFrame, TTSStoppedFrame, ) -from pipecat.processors.frame_processor import FrameDirection from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import AudioContextTTSService from pipecat.utils.tracing.service_decorators import traced_tts @@ -265,21 +263,24 @@ class GradiumTTSService(AudioContextTTSService): except Exception as e: logger.error(f"{self} exception: {e}") - async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): - """Handle interruption by resetting context state. + async def on_audio_context_interrupted(self, context_id: str): + """Called when an audio context is cancelled due to an interruption. - The parent AudioContextTTSService._handle_interruption() cancels the audio context - task and creates a new one. We reset _context_id so the next run_tts() creates a - fresh context. No websocket reconnection needed — audio from the old client_req_id - will be silently dropped since the audio context no longer exists. - - Args: - frame: The interruption frame. - direction: The direction of the frame. + No WebSocket message is needed — audio from the interrupted + ``client_req_id`` will be silently dropped by the base class once the + audio context no longer exists. """ - await super()._handle_interruption(frame, direction) await self.stop_all_metrics() + async def on_audio_context_completed(self, context_id: str): + """Called after an audio context has finished playing all of its audio. + + No close message is needed: Gradium signals completion with an + ``end_of_stream`` message (handled in ``_receive_messages``), after + which the server-side context is already closed. + """ + pass + async def _receive_messages(self): """Process incoming websocket messages, demultiplexing by client_req_id.""" # TODO(laurent): This should not be necessary as it should happen when diff --git a/src/pipecat/services/inworld/tts.py b/src/pipecat/services/inworld/tts.py index 2f35dc27c..22bdf22ff 100644 --- a/src/pipecat/services/inworld/tts.py +++ b/src/pipecat/services/inworld/tts.py @@ -681,28 +681,23 @@ class InworldTTSService(AudioContextTTSService): return word_times - async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): - """Handle an interruption from the Inworld WebSocket TTS service. - - Args: - frame: The interruption frame. - direction: The direction of the interruption. - """ - old_context_id = self.get_active_audio_context_id() - logger.trace(f"{self}: Handling interruption, old context: {old_context_id}") - - await super()._handle_interruption(frame, direction) - - if old_context_id and self._websocket: - logger.trace(f"{self}: Closing context {old_context_id} due to interruption") + async def _close_context(self, context_id: str): + if context_id and self._websocket: + logger.info(f"{self}: Closing context {context_id} due to interruption or completion") try: - await self._send_close_context(old_context_id) + await self._send_close_context(context_id) except Exception as e: await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e) - self._cumulative_time = 0.0 self._generation_end_time = 0.0 - logger.trace(f"{self}: Interruption handled, context reset to None") + + async def on_audio_context_interrupted(self, context_id: str): + """Callback invoked when an audio context has been interrupted.""" + await self._close_context(context_id) + + async def on_audio_context_completed(self, context_id: str): + """Callback invoked when an audio context has been completed.""" + await self._close_context(context_id) def _get_websocket(self): """Get the websocket for the Inworld WebSocket TTS service. diff --git a/src/pipecat/services/resembleai/tts.py b/src/pipecat/services/resembleai/tts.py index 026d29d3f..c2ac758a7 100644 --- a/src/pipecat/services/resembleai/tts.py +++ b/src/pipecat/services/resembleai/tts.py @@ -18,13 +18,11 @@ from pipecat.frames.frames import ( EndFrame, ErrorFrame, Frame, - InterruptionFrame, StartFrame, TTSAudioRawFrame, TTSStartedFrame, TTSStoppedFrame, ) -from pipecat.processors.frame_processor import FrameDirection from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import AudioContextTTSService from pipecat.utils.tracing.service_decorators import traced_tts @@ -247,16 +245,19 @@ class ResembleAITTSService(AudioContextTTSService): return self._websocket raise Exception("Websocket not connected") - async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): - """Handle interruption by stopping current synthesis. - - Args: - frame: The interruption frame. - direction: The direction of frame processing. - """ - await super()._handle_interruption(frame, direction) + async def on_audio_context_interrupted(self, context_id: str): + """Stop metrics when the bot is interrupted.""" await self.stop_all_metrics() + async def on_audio_context_completed(self, context_id: str): + """Stop metrics after the Resemble AI context finishes playing. + + No close message is needed: Resemble AI signals completion with an + ``audio_end`` message (handled in ``_process_messages``), after which + the server-side context is already closed. + """ + pass + async def flush_audio(self): """Flush any pending audio and finalize the current context.""" logger.trace(f"{self}: flushing audio") diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index 248c84008..83c2305d5 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -458,14 +458,25 @@ class RimeTTSService(AudioContextTTSService): return self._websocket raise Exception("Websocket not connected") - async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): - """Handle interruption by clearing current context.""" - context_id = self.get_active_audio_context_id() - await super()._handle_interruption(frame, direction) + async def _close_context(self, context_id: str): + """Clear the Rime speech queue and stop metrics.""" await self.stop_all_metrics() if context_id: await self._get_websocket().send(json.dumps(self._build_clear_msg())) + async def on_audio_context_interrupted(self, context_id: str): + """Clear the Rime speech queue and stop metrics when the bot is interrupted.""" + await self._close_context(context_id) + + async def on_audio_context_completed(self, context_id: str): + """Clear server-side state and stop metrics after the Rime context finishes playing. + + Rime does not send a server-side completion signal (e.g. ``done`` / ``end_of_stream`` / + ``audio_end``), so we explicitly send a ``clear`` message to clean up + any residual server-side state once all audio has been delivered. + """ + await self._close_context(context_id) + def _calculate_word_times(self, words: list, starts: list, ends: list) -> list: """Calculate word timing pairs with proper spacing and punctuation. From 751b1b8100e70be0ea2b72e53d9bb7714f35d25b Mon Sep 17 00:00:00 2001 From: filipi87 Date: Wed, 25 Feb 2026 10:18:25 -0300 Subject: [PATCH 098/189] Adding the changelog entries for the tts fixes. --- changelog/3814.added.md | 1 + changelog/3814.fixed.md | 1 + 2 files changed, 2 insertions(+) create mode 100644 changelog/3814.added.md create mode 100644 changelog/3814.fixed.md diff --git a/changelog/3814.added.md b/changelog/3814.added.md new file mode 100644 index 000000000..b6b2ebbf8 --- /dev/null +++ b/changelog/3814.added.md @@ -0,0 +1 @@ +- Added `on_audio_context_interrupted()` and `on_audio_context_completed()` callbacks to `AudioContextTTSService`. Subclasses can override these to perform provider-specific cleanup instead of overriding `_handle_interruption()`. diff --git a/changelog/3814.fixed.md b/changelog/3814.fixed.md new file mode 100644 index 000000000..ecd4871f6 --- /dev/null +++ b/changelog/3814.fixed.md @@ -0,0 +1 @@ +- Fixed an issue where `AudioContextTTSService`-based providers (AsyncAI, ElevenLabs, Inworld, Rime) did not close or clean up their server-side audio contexts after normal speech completion, only on interruption. From 73ee4da7d415d741f8318fabd9139548703b1d64 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Tue, 24 Feb 2026 10:16:35 -0500 Subject: [PATCH 099/189] Add Krisp API key support for new SDK licensing requirement The Krisp VIVA SDK v1.8.0 requires a license key in globalInit(). Add api_key parameter to KrispVivaSDKManager, KrispVivaTurn, and KrispVivaFilter with fallback to KRISP_API_KEY env var. Maintain backwards compatibility with older SDK versions by catching TypeError and falling back to the old 3-arg signature. --- changelog/3809.added.md | 1 + changelog/3809.changed.md | 2 +- env.example | 1 + .../07p-interruptible-krisp-viva.py | 9 ++++--- .../audio/filters/krisp_viva_filter.py | 12 +++++++-- src/pipecat/audio/krisp_instance.py | 26 +++++++++++++++++-- src/pipecat/audio/turn/krisp_viva_turn.py | 5 +++- 7 files changed, 47 insertions(+), 9 deletions(-) create mode 100644 changelog/3809.added.md diff --git a/changelog/3809.added.md b/changelog/3809.added.md new file mode 100644 index 000000000..1bc3a9787 --- /dev/null +++ b/changelog/3809.added.md @@ -0,0 +1 @@ +- Added `api_key` parameter to `KrispVivaSDKManager`, `KrispVivaTurn`, and `KrispVivaFilter` for Krisp SDK v1.8.0 licensing. Falls back to `KRISP_API_KEY` environment variable. Backwards compatible with older SDK versions. diff --git a/changelog/3809.changed.md b/changelog/3809.changed.md index 43aca00f3..ef1c5c5a1 100644 --- a/changelog/3809.changed.md +++ b/changelog/3809.changed.md @@ -1 +1 @@ -- Added debug logging to `KrispVivaTurn.analyze_end_of_turn()` to log turn state and probability at decision time. +- Added `api_key` parameter to `KrispVivaSDKManager`, `KrispVivaTurn`, and `KrispVivaFilter` for Krisp SDK v1.6.1+ licensing. Falls back to `KRISP_VIVA_API_KEY` environment variable. \ No newline at end of file diff --git a/env.example b/env.example index bc14ea0bf..2b850dd19 100644 --- a/env.example +++ b/env.example @@ -104,6 +104,7 @@ INWORLD_API_KEY=... KRISP_MODEL_PATH=... # Krisp Viva +KRISP_VIVA_API_KEY=... KRISP_VIVA_FILTER_MODEL_PATH=... KRISP_VIVA_TURN_MODEL_PATH=... diff --git a/examples/foundational/07p-interruptible-krisp-viva.py b/examples/foundational/07p-interruptible-krisp-viva.py index 4da42e201..62f2a1bc1 100644 --- a/examples/foundational/07p-interruptible-krisp-viva.py +++ b/examples/foundational/07p-interruptible-krisp-viva.py @@ -54,21 +54,24 @@ load_dotenv(override=True) # We use lambdas to defer transport parameter creation until the transport # type is selected at runtime. + +krisp_viva_filter = KrispVivaFilter() + transport_params = { "daily": lambda: DailyParams( audio_in_enabled=True, audio_out_enabled=True, - audio_in_filter=KrispVivaFilter(), + audio_in_filter=krisp_viva_filter, ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, - audio_in_filter=KrispVivaFilter(), + audio_in_filter=krisp_viva_filter, ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, - audio_in_filter=KrispVivaFilter(), + audio_in_filter=krisp_viva_filter, ), } diff --git a/src/pipecat/audio/filters/krisp_viva_filter.py b/src/pipecat/audio/filters/krisp_viva_filter.py index ea5bfb8de..1e2f6c81b 100644 --- a/src/pipecat/audio/filters/krisp_viva_filter.py +++ b/src/pipecat/audio/filters/krisp_viva_filter.py @@ -39,7 +39,11 @@ class KrispVivaFilter(BaseAudioFilter): """ def __init__( - self, model_path: str = None, frame_duration: int = 10, noise_suppression_level: int = 100 + self, + model_path: str = None, + frame_duration: int = 10, + noise_suppression_level: int = 100, + api_key: str = "", ) -> None: """Initialize the Krisp noise reduction filter. @@ -48,6 +52,8 @@ class KrispVivaFilter(BaseAudioFilter): If None, uses KRISP_VIVA_FILTER_MODEL_PATH environment variable. frame_duration: Frame duration in milliseconds. noise_suppression_level: Noise suppression level. + api_key: Krisp SDK API key. If empty, falls back to + the KRISP_VIVA_API_KEY environment variable. Raises: ValueError: If model_path is not provided and KRISP_VIVA_FILTER_MODEL_PATH is not set. @@ -57,6 +63,8 @@ class KrispVivaFilter(BaseAudioFilter): """ super().__init__() + self._api_key = api_key + try: # Set model path, checking environment if not specified if model_path: @@ -132,7 +140,7 @@ class KrispVivaFilter(BaseAudioFilter): """ try: # Acquire SDK reference (will initialize on first call) - KrispVivaSDKManager.acquire() + KrispVivaSDKManager.acquire(api_key=self._api_key) self._session = self._create_session(sample_rate, self._frame_duration_ms) except Exception as e: logger.error(f"Failed to start Krisp session: {e}", exc_info=True) diff --git a/src/pipecat/audio/krisp_instance.py b/src/pipecat/audio/krisp_instance.py index fae2c691e..5ebfd24cc 100644 --- a/src/pipecat/audio/krisp_instance.py +++ b/src/pipecat/audio/krisp_instance.py @@ -7,6 +7,7 @@ """Krisp Instance manager for pipecat audio.""" import atexit +import os from threading import Lock from loguru import logger @@ -88,17 +89,26 @@ class KrispVivaSDKManager: _lock = Lock() _reference_count = 0 + @staticmethod + def _license_callback(error, error_message): + """Callback for Krisp SDK licensing errors.""" + logger.error(f"Krisp licensing error: {error} - {error_message}") + @staticmethod def _log_callback(log_message, log_level): """Thread-safe callback for Krisp SDK logging.""" logger.info(f"[{log_level}] {log_message}") @classmethod - def acquire(cls): + def acquire(cls, api_key: str = ""): """Acquire a reference to the SDK (initializes if needed). Call this when creating a filter instance. + Args: + api_key: Krisp SDK API key. If empty, falls back to the + KRISP_VIVA_API_KEY environment variable. + Raises: Exception: If SDK initialization fails (propagated from krisp_audio) """ @@ -106,7 +116,19 @@ class KrispVivaSDKManager: # Initialize SDK on first acquire if cls._reference_count == 0: try: - krisp_audio.globalInit("", cls._log_callback, krisp_audio.LogLevel.Off) + key = api_key or os.environ.get("KRISP_VIVA_API_KEY", "") + try: + # New SDK signature (requires license key) + krisp_audio.globalInit( + "", + key, + cls._license_callback, + cls._log_callback, + krisp_audio.LogLevel.Off, + ) + except TypeError: + # Old SDK signature (no license key) + krisp_audio.globalInit("", cls._log_callback, krisp_audio.LogLevel.Off) cls._initialized = True diff --git a/src/pipecat/audio/turn/krisp_viva_turn.py b/src/pipecat/audio/turn/krisp_viva_turn.py index 59f8aada8..f15c456c2 100644 --- a/src/pipecat/audio/turn/krisp_viva_turn.py +++ b/src/pipecat/audio/turn/krisp_viva_turn.py @@ -63,6 +63,7 @@ class KrispVivaTurn(BaseTurnAnalyzer): model_path: Optional[str] = None, sample_rate: Optional[int] = None, params: Optional[KrispTurnParams] = None, + api_key: str = "", ) -> None: """Initialize the Krisp turn analyzer. @@ -72,6 +73,8 @@ class KrispVivaTurn(BaseTurnAnalyzer): sample_rate: Optional initial sample rate for audio processing. If provided, this will be used as the fixed sample rate. params: Configuration parameters for turn analysis behavior. + api_key: Krisp SDK API key. If empty, falls back to + the KRISP_VIVA_API_KEY environment variable. Raises: ValueError: If model_path is not provided and KRISP_VIVA_TURN_MODEL_PATH is not set. @@ -83,7 +86,7 @@ class KrispVivaTurn(BaseTurnAnalyzer): # Acquire SDK reference (will initialize on first call) try: - KrispVivaSDKManager.acquire() + KrispVivaSDKManager.acquire(api_key=api_key) self._sdk_acquired = True except Exception as e: self._sdk_acquired = False From 0ca8c850fb1cb6db73b5dd65a0b990f4aa203edd Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Tue, 24 Feb 2026 17:42:18 -0500 Subject: [PATCH 100/189] Add TurnMetricsData and e2e processing time for KrispVivaTurn Introduce a generic TurnMetricsData class for turn detection metrics, replacing the service-specific SmartTurnMetricsData (now deprecated). Add end-to-end processing time measurement to KrispVivaTurn, tracking the interval from VAD speech-to-silence transition to model threshold crossing. Consume metrics in the strategy _handle_input_audio path so they are pushed immediately when fresh. --- changelog/3809.added.md | 2 +- changelog/3809.changed.md | 2 +- changelog/3809.deprecated.md | 1 + .../07p-interruptible-krisp-viva.py | 3 ++ examples/foundational/38b-smart-turn-local.py | 13 +++---- src/pipecat/audio/turn/krisp_viva_turn.py | 37 +++++++++++++++---- .../audio/turn/smart_turn/base_smart_turn.py | 13 +------ src/pipecat/metrics/metrics.py | 26 +++++++++---- .../observers/loggers/metrics_log_observer.py | 33 +++++++++++------ .../turn_analyzer_user_turn_stop_strategy.py | 10 +++-- 10 files changed, 92 insertions(+), 48 deletions(-) create mode 100644 changelog/3809.deprecated.md diff --git a/changelog/3809.added.md b/changelog/3809.added.md index 1bc3a9787..99047dc76 100644 --- a/changelog/3809.added.md +++ b/changelog/3809.added.md @@ -1 +1 @@ -- Added `api_key` parameter to `KrispVivaSDKManager`, `KrispVivaTurn`, and `KrispVivaFilter` for Krisp SDK v1.8.0 licensing. Falls back to `KRISP_API_KEY` environment variable. Backwards compatible with older SDK versions. +- Added `TurnMetricsData` as a generic metrics class for turn detection, with e2e processing time measurement. `KrispVivaTurn` now emits `TurnMetricsData` with `e2e_processing_time_ms` tracking the interval from VAD speech-to-silence transition to turn completion. diff --git a/changelog/3809.changed.md b/changelog/3809.changed.md index ef1c5c5a1..479eaf6ed 100644 --- a/changelog/3809.changed.md +++ b/changelog/3809.changed.md @@ -1 +1 @@ -- Added `api_key` parameter to `KrispVivaSDKManager`, `KrispVivaTurn`, and `KrispVivaFilter` for Krisp SDK v1.6.1+ licensing. Falls back to `KRISP_VIVA_API_KEY` environment variable. \ No newline at end of file +- Added `api_key` parameter to `KrispVivaSDKManager`, `KrispVivaTurn`, and `KrispVivaFilter` for Krisp SDK v1.6.1+ licensing. Falls back to `KRISP_VIVA_API_KEY` environment variable. diff --git a/changelog/3809.deprecated.md b/changelog/3809.deprecated.md new file mode 100644 index 000000000..f1498ec0b --- /dev/null +++ b/changelog/3809.deprecated.md @@ -0,0 +1 @@ +- Deprecated `SmartTurnMetricsData` in favor of `TurnMetricsData`. `BaseSmartTurn` now emits `TurnMetricsData` directly. diff --git a/examples/foundational/07p-interruptible-krisp-viva.py b/examples/foundational/07p-interruptible-krisp-viva.py index 62f2a1bc1..24929a825 100644 --- a/examples/foundational/07p-interruptible-krisp-viva.py +++ b/examples/foundational/07p-interruptible-krisp-viva.py @@ -31,6 +31,8 @@ from pipecat.audio.filters.krisp_viva_filter import KrispVivaFilter from pipecat.audio.turn.krisp_viva_turn import KrispVivaTurn from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.frames.frames import LLMRunFrame +from pipecat.metrics.metrics import TurnMetricsData +from pipecat.observers.loggers.metrics_log_observer import MetricsLogObserver from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask @@ -124,6 +126,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): enable_usage_metrics=True, ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + observers=[MetricsLogObserver(include_metrics={TurnMetricsData})], ) @transport.event_handler("on_client_connected") diff --git a/examples/foundational/38b-smart-turn-local.py b/examples/foundational/38b-smart-turn-local.py index 2872a0e76..dc62010fb 100644 --- a/examples/foundational/38b-smart-turn-local.py +++ b/examples/foundational/38b-smart-turn-local.py @@ -12,6 +12,8 @@ from loguru import logger from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.frames.frames import LLMRunFrame +from pipecat.metrics.metrics import TurnMetricsData +from pipecat.observers.loggers.metrics_log_observer import MetricsLogObserver from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask @@ -77,7 +79,6 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): pipeline = Pipeline( [ transport.input(), # Transport user input - rtvi, stt, user_aggregator, # User responses llm, # LLM @@ -94,17 +95,15 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): enable_usage_metrics=True, ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + observers=[MetricsLogObserver(include_metrics={TurnMetricsData})], ) - @task.rtvi.event_handler("on_client_ready") - async def on_client_ready(rtvi): - # Kick off the conversation - messages.append({"role": "system", "content": "Please introduce yourself to the user."}) - await task.queue_frames([LLMRunFrame()]) - @transport.event_handler("on_client_connected") async def on_client_connected(transport, client): logger.info(f"Client connected") + # Kick off the conversation + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/src/pipecat/audio/turn/krisp_viva_turn.py b/src/pipecat/audio/turn/krisp_viva_turn.py index f15c456c2..3aa540491 100644 --- a/src/pipecat/audio/turn/krisp_viva_turn.py +++ b/src/pipecat/audio/turn/krisp_viva_turn.py @@ -15,6 +15,7 @@ passed directly to the constructor. """ import os +import time from typing import Optional, Tuple import numpy as np @@ -26,7 +27,7 @@ from pipecat.audio.krisp_instance import ( int_to_krisp_sample_rate, ) from pipecat.audio.turn.base_turn_analyzer import BaseTurnAnalyzer, BaseTurnParams, EndOfTurnState -from pipecat.metrics.metrics import MetricsData +from pipecat.metrics.metrics import MetricsData, TurnMetricsData try: import krisp_audio @@ -118,6 +119,9 @@ class KrispVivaTurn(BaseTurnAnalyzer): self._last_probability = None self._frame_probabilities = [] self._last_state = EndOfTurnState.INCOMPLETE + self._speech_stopped_time: Optional[float] = None + self._e2e_processing_time_ms: Optional[float] = None + self._last_metrics: Optional[TurnMetricsData] = None # Create session with provided sample rate or default to 16000 Hz # This preloads the model to improve latency when set_sample_rate is called later @@ -291,7 +295,14 @@ class KrispVivaTurn(BaseTurnAnalyzer): # Track speech start time if not self._speech_triggered: logger.trace("Speech detected, turn analysis started") + self._e2e_processing_time_ms = None self._speech_triggered = True + # Reset speech stopped time when speech resumes + self._speech_stopped_time = None + else: + # Record the moment speech transitions to non-speech + if self._speech_triggered and self._speech_stopped_time is None: + self._speech_stopped_time = time.perf_counter() # Note: We don't immediately mark as complete on silence detection. # Instead, we wait for the model's probability check below to confirm # end-of-turn based on the threshold. @@ -311,6 +322,18 @@ class KrispVivaTurn(BaseTurnAnalyzer): # Only mark as complete if we've detected speech and the model # confirms with sufficient confidence if self._speech_triggered and prob >= self._params.threshold: + # Calculate e2e processing time: time from speech stop to threshold crossing + if self._speech_stopped_time is not None: + self._e2e_processing_time_ms = ( + time.perf_counter() - self._speech_stopped_time + ) * 1000 + self._last_metrics = TurnMetricsData( + processor="KrispVivaTurn", + is_complete=True, + probability=prob, + e2e_processing_time_ms=self._e2e_processing_time_ms, + ) + logger.debug(f"Krisp turn complete") state = EndOfTurnState.COMPLETE self.clear() break @@ -332,15 +355,15 @@ class KrispVivaTurn(BaseTurnAnalyzer): Tuple containing the end-of-turn state and optional metrics data. Returns the last state determined by append_audio(). """ - # For real-time processing, the state is determined in append_audio - # Return the last state that was computed - logger.debug( - f"Krisp turn analysis: state={self._last_state}, probability={self._last_probability}" - ) - return self._last_state, None + # For real-time processing, the state is determined in append_audio. + # Consume metrics so they aren't pushed twice. + metrics = self._last_metrics + self._last_metrics = None + return self._last_state, metrics def clear(self): """Reset the turn analyzer to its initial state.""" self._speech_triggered = False self._audio_buffer.clear() self._last_state = EndOfTurnState.INCOMPLETE + self._speech_stopped_time = None diff --git a/src/pipecat/audio/turn/smart_turn/base_smart_turn.py b/src/pipecat/audio/turn/smart_turn/base_smart_turn.py index 66b45a8f6..fa652d884 100644 --- a/src/pipecat/audio/turn/smart_turn/base_smart_turn.py +++ b/src/pipecat/audio/turn/smart_turn/base_smart_turn.py @@ -21,7 +21,7 @@ import numpy as np from loguru import logger from pipecat.audio.turn.base_turn_analyzer import BaseTurnAnalyzer, BaseTurnParams, EndOfTurnState -from pipecat.metrics.metrics import MetricsData, SmartTurnMetricsData +from pipecat.metrics.metrics import MetricsData, TurnMetricsData # Default timing parameters STOP_SECS = 3 @@ -222,18 +222,11 @@ class BaseSmartTurn(BaseTurnAnalyzer): # Calculate processing time e2e_processing_time_ms = (end_time - start_time) * 1000 - # Extract metrics from the nested structure - metrics = result.get("metrics", {}) - inference_time = metrics.get("inference_time", 0) - total_time = metrics.get("total_time", 0) - # Prepare the result data - result_data = SmartTurnMetricsData( + result_data = TurnMetricsData( processor="BaseSmartTurn", is_complete=result["prediction"] == 1, probability=result["probability"], - inference_time_ms=inference_time * 1000, - server_total_time_ms=total_time * 1000, e2e_processing_time_ms=e2e_processing_time_ms, ) @@ -241,8 +234,6 @@ class BaseSmartTurn(BaseTurnAnalyzer): f"Prediction: {'Complete' if result_data.is_complete else 'Incomplete'}" ) logger.trace(f"Probability of complete: {result_data.probability:.4f}") - logger.trace(f"Inference time: {result_data.inference_time_ms:.2f}ms") - logger.trace(f"Server total time: {result_data.server_total_time_ms:.2f}ms") logger.trace(f"E2E processing time: {result_data.e2e_processing_time_ms:.2f}ms") except SmartTurnTimeoutException: logger.debug( diff --git a/src/pipecat/metrics/metrics.py b/src/pipecat/metrics/metrics.py index 98903483a..ccf30227a 100644 --- a/src/pipecat/metrics/metrics.py +++ b/src/pipecat/metrics/metrics.py @@ -87,19 +87,31 @@ class TTSUsageMetricsData(MetricsData): value: int -class SmartTurnMetricsData(MetricsData): - """Metrics data for smart turn predictions. +class TurnMetricsData(MetricsData): + """Metrics data for turn detection predictions. Parameters: is_complete: Whether the turn is predicted to be complete. probability: Confidence probability of the turn completion prediction. - inference_time_ms: Time taken for inference in milliseconds. - server_total_time_ms: Total server processing time in milliseconds. - e2e_processing_time_ms: End-to-end processing time in milliseconds. + e2e_processing_time_ms: End-to-end processing time in milliseconds, + measured from VAD speech-to-silence transition to turn completion. """ is_complete: bool probability: float - inference_time_ms: float - server_total_time_ms: float e2e_processing_time_ms: float + + +class SmartTurnMetricsData(TurnMetricsData): + """Metrics data for smart turn predictions. + + .. deprecated:: 0.0.104 + Use :class:`TurnMetricsData` instead. This class will be removed in a future version. + + Parameters: + inference_time_ms: Time taken for inference in milliseconds. + server_total_time_ms: Total server processing time in milliseconds. + """ + + inference_time_ms: float = 0.0 + server_total_time_ms: float = 0.0 diff --git a/src/pipecat/observers/loggers/metrics_log_observer.py b/src/pipecat/observers/loggers/metrics_log_observer.py index a36ab510e..7f4c1635c 100644 --- a/src/pipecat/observers/loggers/metrics_log_observer.py +++ b/src/pipecat/observers/loggers/metrics_log_observer.py @@ -24,6 +24,7 @@ from pipecat.metrics.metrics import ( SmartTurnMetricsData, TTFBMetricsData, TTSUsageMetricsData, + TurnMetricsData, ) from pipecat.observers.base_observer import BaseObserver, FramePushed @@ -37,7 +38,7 @@ class MetricsLogObserver(BaseObserver): - ProcessingMetricsData (General processing time) - LLMUsageMetricsData (Token usage statistics) - TTSUsageMetricsData (Text-to-Speech character counts) - - SmartTurnMetricsData (Turn prediction metrics) + - TurnMetricsData (Turn prediction metrics) This allows developers to track performance metrics, token usage, and other statistics throughout the pipeline. @@ -70,6 +71,17 @@ class MetricsLogObserver(BaseObserver): **kwargs: Additional arguments passed to parent class. """ super().__init__(**kwargs) + # Normalize deprecated types in include_metrics + if include_metrics and SmartTurnMetricsData in include_metrics: + import warnings + + warnings.warn( + "SmartTurnMetricsData is deprecated in include_metrics, " + "use TurnMetricsData instead.", + DeprecationWarning, + stacklevel=2, + ) + include_metrics = (include_metrics - {SmartTurnMetricsData}) | {TurnMetricsData} self._include_metrics = include_metrics self._frames_seen = set() @@ -144,8 +156,8 @@ class MetricsLogObserver(BaseObserver): logger.debug( f"📊 {processor_info} TTS USAGE{model_info}: {metrics_data.value} characters at {time_sec:.3f}s" ) - elif isinstance(metrics_data, SmartTurnMetricsData): - self._log_smart_turn(metrics_data, processor_info, model_info, time_sec) + elif isinstance(metrics_data, TurnMetricsData): + self._log_turn(metrics_data, processor_info, model_info, time_sec) else: # Generic fallback for unknown metrics types logger.debug( @@ -191,28 +203,27 @@ class MetricsLogObserver(BaseObserver): f"📊 {processor_info} LLM TOKEN USAGE{model_info}: {usage_str} at {time_sec:.2f}s" ) - def _log_smart_turn( + def _log_turn( self, - metrics_data: SmartTurnMetricsData, + metrics_data: TurnMetricsData, processor_info: str, model_info: str, time_sec: float, ): - """Log smart turn prediction metrics. + """Log turn prediction metrics. Args: - metrics_data: The smart turn metrics data. + metrics_data: The turn metrics data. processor_info: Formatted processor name string. model_info: Formatted model name string. time_sec: Timestamp in seconds. """ complete_str = "COMPLETE" if metrics_data.is_complete else "INCOMPLETE" + e2e_str = f"{metrics_data.e2e_processing_time_ms:.1f}ms" logger.debug( - f"📊 {processor_info} SMART TURN{model_info}: {complete_str} " + f"📊 {processor_info} TURN{model_info}: {complete_str} " f"(probability: {metrics_data.probability:.2%}, " - f"inference: {metrics_data.inference_time_ms:.1f}ms, " - f"server: {metrics_data.server_total_time_ms:.1f}ms, " - f"e2e: {metrics_data.e2e_processing_time_ms:.1f}ms) " + f"e2e: {e2e_str}) " f"at {time_sec:.2f}s" ) diff --git a/src/pipecat/turns/user_stop/turn_analyzer_user_turn_stop_strategy.py b/src/pipecat/turns/user_stop/turn_analyzer_user_turn_stop_strategy.py index acd4936a3..f141a75b7 100644 --- a/src/pipecat/turns/user_stop/turn_analyzer_user_turn_stop_strategy.py +++ b/src/pipecat/turns/user_stop/turn_analyzer_user_turn_stop_strategy.py @@ -115,10 +115,14 @@ class TurnAnalyzerUserTurnStopStrategy(BaseUserTurnStopStrategy): """Handle input audio to check if the turn is completed.""" state = self._turn_analyzer.append_audio(frame.audio, self._vad_user_speaking) - # If at this point the model says the turn is complete it will be due to - # a timeout, so we mark turn as complete and we trigger the user end of - # turn. + # Streaming analyzers (e.g. KrispVivaTurn) detect turn completion + # frame-by-frame inside append_audio, so COMPLETE is returned here + # rather than in analyze_end_of_turn. Batch analyzers (BaseSmartTurn) + # return COMPLETE here only on a silence timeout. In either case we + # consume and push metrics immediately while they're fresh. if state == EndOfTurnState.COMPLETE: + _, prediction = await self._turn_analyzer.analyze_end_of_turn() + await self._handle_prediction_result(prediction) self._turn_complete = True await self._maybe_trigger_user_turn_stopped() From 81f4672535cd683e233d7bd9a3698172c1d419e0 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Wed, 25 Feb 2026 09:47:42 -0500 Subject: [PATCH 101/189] Add Performance as a changelog fragment option --- CLAUDE.md | 27 +++++++++++++-------------- CONTRIBUTING.md | 4 +--- pyproject.toml | 5 +++++ 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 6886fc1ed..7727975b3 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -25,7 +25,7 @@ uv run pytest tests/test_name.py uv run pytest tests/test_name.py::test_function_name # Preview changelog -towncrier build --draft --version Unreleased +uv run towncrier build --draft --version Unreleased # Lint and format check uv run ruff check @@ -74,7 +74,7 @@ All data flows as **Frame** objects through a pipeline of **FrameProcessors**: - **Context Aggregation**: `LLMContext` accumulates messages for LLM calls; `UserResponse` aggregates user input - **Turn Management**: Turn management is done through `LLMUserAggregator` and -`LLMAssistantAggregator`, created with `LLMContextAggregatorPair` + `LLMAssistantAggregator`, created with `LLMContextAggregatorPair` - **User turn strategies**: Detection of when the user starts and stops speaking is done via user turn start/stop strategies. They push `UserStartedSpeakingFrame` and `UserStoppedSpeakingFrame` respectively. @@ -90,17 +90,17 @@ All data flows as **Frame** objects through a pipeline of **FrameProcessors**: ### Key Directories -| Directory | Purpose | -|---------------------------|----------------------------------------------------| -| `src/pipecat/frames/` | Frame definitions (100+ types) | -| `src/pipecat/processors/` | FrameProcessor base + aggregators, filters, audio | -| `src/pipecat/pipeline/` | Pipeline orchestration | -| `src/pipecat/services/` | AI service integrations (60+ providers) | -| `src/pipecat/transports/` | Transport layer (Daily, LiveKit, WebSocket, Local) | -| `src/pipecat/serializers/`| Frame serialization for WebSocket protocols | -| `src/pipecat/observers/` | Pipeline observers for monitoring frame flow | -| `src/pipecat/audio/` | VAD, filters, mixers, turn detection, DTMF | -| `src/pipecat/turns/` | User turn management | +| Directory | Purpose | +| -------------------------- | -------------------------------------------------- | +| `src/pipecat/frames/` | Frame definitions (100+ types) | +| `src/pipecat/processors/` | FrameProcessor base + aggregators, filters, audio | +| `src/pipecat/pipeline/` | Pipeline orchestration | +| `src/pipecat/services/` | AI service integrations (60+ providers) | +| `src/pipecat/transports/` | Transport layer (Daily, LiveKit, WebSocket, Local) | +| `src/pipecat/serializers/` | Frame serialization for WebSocket protocols | +| `src/pipecat/observers/` | Pipeline observers for monitoring frame flow | +| `src/pipecat/audio/` | VAD, filters, mixers, turn detection, DTMF | +| `src/pipecat/turns/` | User turn management | ## Code Style @@ -155,4 +155,3 @@ When adding a new service: ## Testing Test utilities live in `src/pipecat/tests/utils.py`. Use `run_test()` to send frames through a pipeline and assert expected output frames in each direction. Use `SleepFrame(sleep=N)` to add delays between frames. - diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 937532ec9..936a652fa 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -49,12 +49,12 @@ Every pull request that makes a user-facing change should include a changelog en ``` 2. Choose the appropriate type: - - `added.md` - New features - `changed.md` - Changes in existing functionality - `deprecated.md` - Soon-to-be removed features - `removed.md` - Removed features - `fixed.md` - Bug fixes + - `performance.md` - Performance improvements - `security.md` - Security fixes - `other.md` - Other changes (documentation, dependencies, etc.) @@ -80,7 +80,6 @@ Every pull request that makes a user-facing change should include a changelog en ```markdown - Updated service configuration: - - Changed default timeout to 30 seconds - Added retry logic for failed connections ``` @@ -105,7 +104,6 @@ changelog/1234.changed.2.md ```markdown - Updated service configuration: - - Changed default timeout to 30 seconds - Added retry logic for failed connections ``` diff --git a/pyproject.toml b/pyproject.toml index a45ebb3b3..d988fa5cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -257,6 +257,11 @@ directory = "fixed" name = "Fixed" showcontent = true +[[tool.towncrier.type]] +directory = "performance" +name = "Performance" +showcontent = true + [[tool.towncrier.type]] directory = "security" name = "Security" From e028194dbe3895839383ac7c840c4279850fd798 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Wed, 25 Feb 2026 12:20:21 -0500 Subject: [PATCH 102/189] Update the pipecat-ai-small-webrtc-prebuilt to 2.3.0 --- pyproject.toml | 2 +- uv.lock | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a45ebb3b3..cc12deed6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -106,7 +106,7 @@ remote-smart-turn = [] resembleai = [ "pipecat-ai[websockets-base]" ] rime = [ "pipecat-ai[websockets-base]" ] riva = [ "pipecat-ai[nvidia]" ] -runner = [ "python-dotenv>=1.0.0,<2.0.0", "uvicorn>=0.32.0,<1.0.0", "fastapi>=0.115.6,<0.128.0", "pipecat-ai-small-webrtc-prebuilt>=2.2.0"] +runner = [ "python-dotenv>=1.0.0,<2.0.0", "uvicorn>=0.32.0,<1.0.0", "fastapi>=0.115.6,<0.128.0", "pipecat-ai-small-webrtc-prebuilt>=2.3.0"] sagemaker = ["aws_sdk_sagemaker_runtime_http2; python_version>='3.12'"] sambanova = [] sarvam = [ "sarvamai==0.1.26a2", "pipecat-ai[websockets-base]" ] diff --git a/uv.lock b/uv.lock index bd2f64639..a4450710d 100644 --- a/uv.lock +++ b/uv.lock @@ -4730,7 +4730,7 @@ requires-dist = [ { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'ultravox'" }, { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'websocket'" }, { name = "pipecat-ai-krisp", marker = "extra == 'krisp'", specifier = "~=0.4.0" }, - { name = "pipecat-ai-small-webrtc-prebuilt", marker = "extra == 'runner'", specifier = ">=2.2.0" }, + { name = "pipecat-ai-small-webrtc-prebuilt", marker = "extra == 'runner'", specifier = ">=2.3.0" }, { name = "piper-tts", marker = "extra == 'piper'", specifier = ">=1.3.0,<2" }, { name = "protobuf", specifier = "~=5.29.6" }, { name = "pvkoala", marker = "extra == 'koala'", specifier = "~=2.0.3" }, @@ -4801,14 +4801,14 @@ sdist = { url = "https://files.pythonhosted.org/packages/1d/37/0f1d11d1dc33234a3 [[package]] name = "pipecat-ai-small-webrtc-prebuilt" -version = "2.2.0" +version = "2.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "fastapi", extra = ["all"] }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9a/9f/b06cc0e2eaeda811959c216dade3ed38c30d20e6327a2b22f80125072c5a/pipecat_ai_small_webrtc_prebuilt-2.2.0.tar.gz", hash = "sha256:5d73fe619225b97e383863a901060d1c986f088f4de004477856b085aaba76c4", size = 466005, upload-time = "2026-02-13T19:28:54.626Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2e/5f/b0f73bbc6997c22655f0495ce21a4cb176e192df1b5407f66fad8101c697/pipecat_ai_small_webrtc_prebuilt-2.3.0.tar.gz", hash = "sha256:10dc31db9978d68001ae941066fe460c533412a8984df71e5416d4ebeb9c0371", size = 469001, upload-time = "2026-02-25T17:18:43.316Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/26/71/20a015cea25dc57129ed6426fdf37a09aefe37f4dd60e3a42ba2d9e3bd1b/pipecat_ai_small_webrtc_prebuilt-2.2.0-py3-none-any.whl", hash = "sha256:e7917d23f51e5418667541a3e241b2de28a43eea35a5a9486721be3da04e719d", size = 466257, upload-time = "2026-02-13T19:28:53.188Z" }, + { url = "https://files.pythonhosted.org/packages/5a/bc/6193b639a53f4bac1c0fe29b1f8e0d49085c60e457b02a01e725eb7c093f/pipecat_ai_small_webrtc_prebuilt-2.3.0-py3-none-any.whl", hash = "sha256:b3ddaff8bbd56746fe3c58a2d721d3ccc94d17a33c16d78dcbce73d7526c1a05", size = 468881, upload-time = "2026-02-25T17:18:41.869Z" }, ] [[package]] From ceead60ef23cdf30a98f41fd1fa09ed1f28b81ce Mon Sep 17 00:00:00 2001 From: Stephen Altamirano Date: Wed, 25 Feb 2026 09:43:57 -0800 Subject: [PATCH 103/189] Add `append_trailing_space` to all Rime websocket services This was added in 31daa889e83b960fab79d66b2ab014d930e15a2e, but only to `RimeTTSService`, not to `RimeNonJsonTTSService. Bringing these to parity means that users switching between the two, with the same inputs, have more consistent vocalization behaviors. --- changelog/3837.fixed.md | 1 + src/pipecat/services/rime/tts.py | 1 + 2 files changed, 2 insertions(+) create mode 100644 changelog/3837.fixed.md diff --git a/changelog/3837.fixed.md b/changelog/3837.fixed.md new file mode 100644 index 000000000..767e79f45 --- /dev/null +++ b/changelog/3837.fixed.md @@ -0,0 +1 @@ +- Fixed issues with `RimeNonJsonTTSService` where trailing punctuation is sometimes vocalized diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index 83c2305d5..484c99857 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -846,6 +846,7 @@ class RimeNonJsonTTSService(InterruptibleTTSService): aggregate_sentences=aggregate_sentences, push_stop_frames=True, pause_frame_processing=True, + append_trailing_space=True, **kwargs, ) params = params or RimeNonJsonTTSService.InputParams() From 44993fe9e3c55c76c063ce9ccba7b539ae0a2576 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Wed, 25 Feb 2026 14:09:17 -0500 Subject: [PATCH 104/189] Remove PlayHT TTS services --- README.md | 26 +- changelog/3838.removed.md | 1 + docs/api/README.md | 3 +- env.example | 4 - .../07e-interruptible-playht-http.py | 125 ---- .../foundational/07e-interruptible-playht.py | 127 ---- .../55t-update-settings-playht-tts.py | 126 ---- pyproject.toml | 1 - src/pipecat/services/playht/__init__.py | 13 - src/pipecat/services/playht/tts.py | 699 ------------------ uv.lock | 6 +- 11 files changed, 16 insertions(+), 1115 deletions(-) create mode 100644 changelog/3838.removed.md delete mode 100644 examples/foundational/07e-interruptible-playht-http.py delete mode 100644 examples/foundational/07e-interruptible-playht.py delete mode 100644 examples/foundational/55t-update-settings-playht-tts.py delete mode 100644 src/pipecat/services/playht/__init__.py delete mode 100644 src/pipecat/services/playht/tts.py diff --git a/README.md b/README.md index 2221e807e..05874be81 100644 --- a/README.md +++ b/README.md @@ -81,19 +81,19 @@ Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.yout ## 🧩 Available services -| Category | Services | -| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [Hathora](https://docs.pipecat.ai/server/services/stt/hathora), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) | -| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) | -| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Gradium](https://docs.pipecat.ai/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hathora](https://docs.pipecat.ai/server/services/tts/hathora), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Resemble](https://docs.pipecat.ai/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) | -| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/server/services/s2s/ultravox), | -| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local | -| Serializers | [Exotel](https://docs.pipecat.ai/server/utilities/serializers/exotel), [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx), [Vonage](https://docs.pipecat.ai/server/utilities/serializers/vonage) | -| Video | [HeyGen](https://docs.pipecat.ai/server/services/video/heygen), [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) | -| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) | -| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/google-imagen), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) | -| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [ai-coustics](https://docs.pipecat.ai/server/utilities/audio/aic-filter) | -| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) | +| Category | Services | +| ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [Hathora](https://docs.pipecat.ai/server/services/stt/hathora), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) | +| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) | +| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Gradium](https://docs.pipecat.ai/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hathora](https://docs.pipecat.ai/server/services/tts/hathora), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [Resemble](https://docs.pipecat.ai/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) | +| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/server/services/s2s/ultravox), | +| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local | +| Serializers | [Exotel](https://docs.pipecat.ai/server/utilities/serializers/exotel), [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx), [Vonage](https://docs.pipecat.ai/server/utilities/serializers/vonage) | +| Video | [HeyGen](https://docs.pipecat.ai/server/services/video/heygen), [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) | +| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) | +| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/google-imagen), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) | +| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [ai-coustics](https://docs.pipecat.ai/server/utilities/audio/aic-filter) | +| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) | 📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services) diff --git a/changelog/3838.removed.md b/changelog/3838.removed.md new file mode 100644 index 000000000..fa811cb71 --- /dev/null +++ b/changelog/3838.removed.md @@ -0,0 +1 @@ +- ⚠️ Removed `PlayHTTTSService` and `PlayHTHttpTTSService`. PlayHT has been shut down and is no longer available. diff --git a/docs/api/README.md b/docs/api/README.md index 22b62d45e..e181bc898 100644 --- a/docs/api/README.md +++ b/docs/api/README.md @@ -42,7 +42,7 @@ This script: - Creates a fresh virtual environment - Installs all dependencies as specified in requirements files -- Handles conflicting dependencies (like grpcio versions for Riva and PlayHT) +- Handles conflicting dependencies (like grpcio versions for Riva) - Builds the documentation in an isolated environment - Provides detailed logging of the build process @@ -74,7 +74,6 @@ start _build/html/index.html ├── index.rst # Main documentation entry point ├── requirements-base.txt # Base documentation dependencies ├── requirements-riva.txt # Riva-specific dependencies -├── requirements-playht.txt # PlayHT-specific dependencies ├── build-docs.sh # Local build script └── rtd-test.py # ReadTheDocs test build script ``` diff --git a/env.example b/env.example index 2b850dd19..82308812e 100644 --- a/env.example +++ b/env.example @@ -147,10 +147,6 @@ KOALA_ACCESS_KEY=... # Piper PIPER_BASE_URL=... -# PlayHT -PLAYHT_USER_ID=... -PLAYHT_API_KEY=... - # Plivo PLIVO_AUTH_ID=... PLIVO_AUTH_TOKEN=... diff --git a/examples/foundational/07e-interruptible-playht-http.py b/examples/foundational/07e-interruptible-playht-http.py deleted file mode 100644 index c56de3b9f..000000000 --- a/examples/foundational/07e-interruptible-playht-http.py +++ /dev/null @@ -1,125 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - - -import os - -from dotenv import load_dotenv -from loguru import logger - -from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.frames.frames import LLMRunFrame -from pipecat.pipeline.pipeline import Pipeline -from pipecat.pipeline.runner import PipelineRunner -from pipecat.pipeline.task import PipelineParams, PipelineTask -from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response_universal import ( - LLMContextAggregatorPair, - LLMUserAggregatorParams, -) -from pipecat.runner.types import RunnerArguments -from pipecat.runner.utils import create_transport -from pipecat.services.deepgram.stt import DeepgramSTTService -from pipecat.services.openai.llm import OpenAILLMService -from pipecat.services.playht.tts import PlayHTHttpTTSService -from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.daily.transport import DailyParams -from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams - -load_dotenv(override=True) - -# We use lambdas to defer transport parameter creation until the transport -# type is selected at runtime. -transport_params = { - "daily": lambda: DailyParams( - audio_in_enabled=True, - audio_out_enabled=True, - ), - "twilio": lambda: FastAPIWebsocketParams( - audio_in_enabled=True, - audio_out_enabled=True, - ), - "webrtc": lambda: TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - ), -} - - -async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): - logger.info(f"Starting bot") - - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) - - tts = PlayHTHttpTTSService( - user_id=os.getenv("PLAYHT_USER_ID"), - api_key=os.getenv("PLAYHT_API_KEY"), - voice_url="s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json", - ) - - llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) - - messages = [ - { - "role": "system", - "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", - }, - ] - - context = LLMContext(messages) - user_aggregator, assistant_aggregator = LLMContextAggregatorPair( - context, - user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), - ) - - pipeline = Pipeline( - [ - transport.input(), # Transport user input - stt, - user_aggregator, # User responses - llm, # LLM - tts, # TTS - transport.output(), # Transport bot output - assistant_aggregator, # Assistant spoken responses - ] - ) - - task = PipelineTask( - pipeline, - params=PipelineParams( - enable_metrics=True, - enable_usage_metrics=True, - ), - idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, - ) - - @transport.event_handler("on_client_connected") - async def on_client_connected(transport, client): - logger.info(f"Client connected") - # Kick off the conversation. - messages.append({"role": "system", "content": "Please introduce yourself to the user."}) - await task.queue_frames([LLMRunFrame()]) - - @transport.event_handler("on_client_disconnected") - async def on_client_disconnected(transport, client): - logger.info(f"Client disconnected") - await task.cancel() - - runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) - - await runner.run(task) - - -async def bot(runner_args: RunnerArguments): - """Main bot entry point compatible with Pipecat Cloud.""" - transport = await create_transport(runner_args, transport_params) - await run_bot(transport, runner_args) - - -if __name__ == "__main__": - from pipecat.runner.run import main - - main() diff --git a/examples/foundational/07e-interruptible-playht.py b/examples/foundational/07e-interruptible-playht.py deleted file mode 100644 index b42f8f6a2..000000000 --- a/examples/foundational/07e-interruptible-playht.py +++ /dev/null @@ -1,127 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - - -import os - -from dotenv import load_dotenv -from loguru import logger - -from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.frames.frames import LLMRunFrame -from pipecat.pipeline.pipeline import Pipeline -from pipecat.pipeline.runner import PipelineRunner -from pipecat.pipeline.task import PipelineParams, PipelineTask -from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response_universal import ( - LLMContextAggregatorPair, - LLMUserAggregatorParams, -) -from pipecat.runner.types import RunnerArguments -from pipecat.runner.utils import create_transport -from pipecat.services.deepgram.stt import DeepgramSTTService -from pipecat.services.openai.llm import OpenAILLMService -from pipecat.services.playht.tts import PlayHTTTSService -from pipecat.transcriptions.language import Language -from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.daily.transport import DailyParams -from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams - -load_dotenv(override=True) - -# We use lambdas to defer transport parameter creation until the transport -# type is selected at runtime. -transport_params = { - "daily": lambda: DailyParams( - audio_in_enabled=True, - audio_out_enabled=True, - ), - "twilio": lambda: FastAPIWebsocketParams( - audio_in_enabled=True, - audio_out_enabled=True, - ), - "webrtc": lambda: TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - ), -} - - -async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): - logger.info(f"Starting bot") - - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) - - tts = PlayHTTTSService( - user_id=os.getenv("PLAYHT_USER_ID"), - api_key=os.getenv("PLAYHT_API_KEY"), - voice_url="s3://voice-cloning-zero-shot/e46b4027-b38d-4d24-b292-38fbca2be0ef/original/manifest.json", - params=PlayHTTTSService.InputParams(language=Language.EN), - ) - - llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) - - messages = [ - { - "role": "system", - "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", - }, - ] - - context = LLMContext(messages) - user_aggregator, assistant_aggregator = LLMContextAggregatorPair( - context, - user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), - ) - - pipeline = Pipeline( - [ - transport.input(), # Transport user input - stt, - user_aggregator, # User responses - llm, # LLM - tts, # TTS - transport.output(), # Transport bot output - assistant_aggregator, # Assistant spoken responses - ] - ) - - task = PipelineTask( - pipeline, - params=PipelineParams( - enable_metrics=True, - enable_usage_metrics=True, - ), - idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, - ) - - @transport.event_handler("on_client_connected") - async def on_client_connected(transport, client): - logger.info(f"Client connected") - # Kick off the conversation. - messages.append({"role": "system", "content": "Please introduce yourself to the user."}) - await task.queue_frames([LLMRunFrame()]) - - @transport.event_handler("on_client_disconnected") - async def on_client_disconnected(transport, client): - logger.info(f"Client disconnected") - await task.cancel() - - runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) - - await runner.run(task) - - -async def bot(runner_args: RunnerArguments): - """Main bot entry point compatible with Pipecat Cloud.""" - transport = await create_transport(runner_args, transport_params) - await run_bot(transport, runner_args) - - -if __name__ == "__main__": - from pipecat.runner.run import main - - main() diff --git a/examples/foundational/55t-update-settings-playht-tts.py b/examples/foundational/55t-update-settings-playht-tts.py deleted file mode 100644 index d79120d99..000000000 --- a/examples/foundational/55t-update-settings-playht-tts.py +++ /dev/null @@ -1,126 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -import asyncio -import os - -from dotenv import load_dotenv -from loguru import logger - -from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame -from pipecat.pipeline.pipeline import Pipeline -from pipecat.pipeline.runner import PipelineRunner -from pipecat.pipeline.task import PipelineParams, PipelineTask -from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response_universal import ( - LLMContextAggregatorPair, - LLMUserAggregatorParams, -) -from pipecat.runner.types import RunnerArguments -from pipecat.runner.utils import create_transport -from pipecat.services.deepgram.stt import DeepgramSTTService -from pipecat.services.openai.llm import OpenAILLMService -from pipecat.services.playht.tts import PlayHTTTSService, PlayHTTTSSettings -from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.daily.transport import DailyParams -from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams - -load_dotenv(override=True) - -transport_params = { - "daily": lambda: DailyParams( - audio_in_enabled=True, - audio_out_enabled=True, - ), - "twilio": lambda: FastAPIWebsocketParams( - audio_in_enabled=True, - audio_out_enabled=True, - ), - "webrtc": lambda: TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - ), -} - - -async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): - logger.info(f"Starting bot") - - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) - - tts = PlayHTTTSService( - api_key=os.getenv("PLAYHT_API_KEY"), - user_id=os.getenv("PLAYHT_USER_ID"), - voice_url=os.getenv("PLAYHT_VOICE_URL", ""), - ) - - llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) - - messages = [ - { - "role": "system", - "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", - }, - ] - - context = LLMContext(messages) - user_aggregator, assistant_aggregator = LLMContextAggregatorPair( - context, - user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), - ) - - pipeline = Pipeline( - [ - transport.input(), - stt, - user_aggregator, - llm, - tts, - transport.output(), - assistant_aggregator, - ] - ) - - task = PipelineTask( - pipeline, - params=PipelineParams( - enable_metrics=True, - enable_usage_metrics=True, - ), - idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, - ) - - @transport.event_handler("on_client_connected") - async def on_client_connected(transport, client): - logger.info(f"Client connected") - messages.append({"role": "system", "content": "Please introduce yourself to the user."}) - await task.queue_frames([LLMRunFrame()]) - - await asyncio.sleep(10) - logger.info("Updating PlayHT TTS settings: speed=1.3") - await task.queue_frame(TTSUpdateSettingsFrame(delta=PlayHTTTSSettings(speed=1.3))) - - @transport.event_handler("on_client_disconnected") - async def on_client_disconnected(transport, client): - logger.info(f"Client disconnected") - await task.cancel() - - runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) - - await runner.run(task) - - -async def bot(runner_args: RunnerArguments): - """Main bot entry point compatible with Pipecat Cloud.""" - transport = await create_transport(runner_args, transport_params) - await run_bot(transport, runner_args) - - -if __name__ == "__main__": - from pipecat.runner.run import main - - main() diff --git a/pyproject.toml b/pyproject.toml index a45ebb3b3..a925e70d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -100,7 +100,6 @@ openpipe = [ "openpipe>=4.50.0,<6" ] openrouter = [] perplexity = [] piper = [ "piper-tts>=1.3.0,<2", "requests>=2.32.5,<3" ] -playht = [ "pipecat-ai[websockets-base]" ] qwen = [] remote-smart-turn = [] resembleai = [ "pipecat-ai[websockets-base]" ] diff --git a/src/pipecat/services/playht/__init__.py b/src/pipecat/services/playht/__init__.py deleted file mode 100644 index 500ea0fdc..000000000 --- a/src/pipecat/services/playht/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -import sys - -from pipecat.services import DeprecatedModuleProxy - -from .tts import * - -sys.modules[__name__] = DeprecatedModuleProxy(globals(), "playht", "playht.tts") diff --git a/src/pipecat/services/playht/tts.py b/src/pipecat/services/playht/tts.py deleted file mode 100644 index 08a87209c..000000000 --- a/src/pipecat/services/playht/tts.py +++ /dev/null @@ -1,699 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -"""PlayHT text-to-speech service implementations. - -This module provides integration with PlayHT's text-to-speech API -supporting both WebSocket streaming and HTTP-based synthesis. -""" - -import io -import json -import struct -import uuid -import warnings -from dataclasses import dataclass, field -from typing import Any, AsyncGenerator, Optional - -import aiohttp -from loguru import logger -from pydantic import BaseModel - -from pipecat.frames.frames import ( - CancelFrame, - EndFrame, - ErrorFrame, - Frame, - InterruptionFrame, - StartFrame, - TTSAudioRawFrame, - TTSStartedFrame, - TTSStoppedFrame, -) -from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven -from pipecat.services.tts_service import InterruptibleTTSService, TTSService -from pipecat.transcriptions.language import Language, resolve_language -from pipecat.utils.tracing.service_decorators import traced_tts - -try: - from websockets.asyncio.client import connect as websocket_connect - from websockets.protocol import State -except ModuleNotFoundError as e: - logger.error(f"Exception: {e}") - logger.error("In order to use PlayHTTTSService, you need to `pip install pipecat-ai[playht]`.") - raise Exception(f"Missing module: {e}") - - -def language_to_playht_language(language: Language) -> Optional[str]: - """Convert a Language enum to PlayHT language code. - - Args: - language: The Language enum value to convert. - - Returns: - The corresponding PlayHT language code, or None if not supported. - """ - LANGUAGE_MAP = { - Language.AF: "afrikans", - Language.AM: "amharic", - Language.AR: "arabic", - Language.BN: "bengali", - Language.BG: "bulgarian", - Language.CA: "catalan", - Language.CS: "czech", - Language.DA: "danish", - Language.DE: "german", - Language.EL: "greek", - Language.EN: "english", - Language.ES: "spanish", - Language.FR: "french", - Language.GL: "galician", - Language.HE: "hebrew", - Language.HI: "hindi", - Language.HR: "croatian", - Language.HU: "hungarian", - Language.ID: "indonesian", - Language.IT: "italian", - Language.JA: "japanese", - Language.KO: "korean", - Language.MS: "malay", - Language.NL: "dutch", - Language.PL: "polish", - Language.PT: "portuguese", - Language.RU: "russian", - Language.SQ: "albanian", - Language.SR: "serbian", - Language.SV: "swedish", - Language.TH: "thai", - Language.TL: "tagalog", - Language.TR: "turkish", - Language.UK: "ukrainian", - Language.UR: "urdu", - Language.XH: "xhosa", - Language.ZH: "mandarin", - } - - return resolve_language(language, LANGUAGE_MAP, use_base_code=False) - - -@dataclass -class PlayHTTTSSettings(TTSSettings): - """Settings for PlayHT TTS services. - - Parameters: - output_format: Audio output format. - voice_engine: Voice engine to use. - speed: Speech speed multiplier. Defaults to 1.0. - seed: Random seed for voice consistency. - playht_sample_rate: Audio sample rate sent to the API. - """ - - output_format: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - voice_engine: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - speed: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - seed: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - playht_sample_rate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - - -class PlayHTTTSService(InterruptibleTTSService): - """PlayHT WebSocket-based text-to-speech service. - - .. deprecated:: 0.0.88 - - This class is deprecated and will be removed in a future version. - PlayHT is shutting down their API on December 31st, 2025. - - Provides real-time text-to-speech synthesis using PlayHT's WebSocket API. - Supports streaming audio generation with configurable voice engines and - language settings. - """ - - _settings: PlayHTTTSSettings - - class InputParams(BaseModel): - """Input parameters for PlayHT TTS configuration. - - Parameters: - language: Language for synthesis. Defaults to English. - speed: Speech speed multiplier. Defaults to 1.0. - seed: Random seed for voice consistency. - """ - - language: Optional[Language] = Language.EN - speed: Optional[float] = 1.0 - seed: Optional[int] = None - - def __init__( - self, - *, - api_key: str, - user_id: str, - voice_url: str, - voice_engine: str = "Play3.0-mini", - sample_rate: Optional[int] = None, - output_format: str = "wav", - params: Optional[InputParams] = None, - **kwargs, - ): - """Initialize the PlayHT WebSocket TTS service. - - Args: - api_key: PlayHT API key for authentication. - user_id: PlayHT user ID for authentication. - voice_url: URL of the voice to use for synthesis. - voice_engine: Voice engine to use. Defaults to "Play3.0-mini". - sample_rate: Audio sample rate. If None, uses default. - output_format: Audio output format. Defaults to "wav". - params: Additional input parameters for voice customization. - **kwargs: Additional arguments passed to parent InterruptibleTTSService. - """ - super().__init__( - pause_frame_processing=True, - sample_rate=sample_rate, - **kwargs, - ) - - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "PlayHT is shutting down their API on December 31st, 2025. " - "'PlayHTTTSService' is deprecated and will be removed in a future version.", - DeprecationWarning, - stacklevel=2, - ) - - params = params or PlayHTTTSService.InputParams() - - self._api_key = api_key - self._user_id = user_id - self._websocket_url = None - self._receive_task = None - self._context_id = None - - self._settings = PlayHTTTSSettings( - model=voice_engine, - voice=voice_url, - language=self.language_to_service_language(params.language) - if params.language - else "english", - output_format=output_format, - voice_engine=voice_engine, - speed=params.speed, - seed=params.seed, - playht_sample_rate=0, - ) - self._sync_model_name_to_metrics() - - def can_generate_metrics(self) -> bool: - """Check if this service can generate processing metrics. - - Returns: - True, as PlayHT service supports metrics generation. - """ - return True - - async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: - """Apply a settings delta. - - Settings are stored but not applied to the active connection. - """ - changed = await super()._update_settings(delta) - - if not changed: - return changed - - # TODO: someday we could reconnect here to apply updated settings. - # Code might look something like the below: - # await self._disconnect() - # await self._connect() - - self._warn_unhandled_updated_settings(changed) - - return changed - - def language_to_service_language(self, language: Language) -> Optional[str]: - """Convert a Language enum to PlayHT service language format. - - Args: - language: The language to convert. - - Returns: - The PlayHT-specific language code, or None if not supported. - """ - return language_to_playht_language(language) - - async def start(self, frame: StartFrame): - """Start the PlayHT TTS service. - - Args: - frame: The start frame containing initialization parameters. - """ - await super().start(frame) - await self._connect() - - async def stop(self, frame: EndFrame): - """Stop the PlayHT TTS service. - - Args: - frame: The end frame. - """ - await super().stop(frame) - await self._disconnect() - - async def cancel(self, frame: CancelFrame): - """Cancel the PlayHT TTS service. - - Args: - frame: The cancel frame. - """ - await super().cancel(frame) - await self._disconnect() - - async def _connect(self): - """Connect to PlayHT WebSocket and start receive task.""" - await super()._connect() - - await self._connect_websocket() - - if self._websocket and not self._receive_task: - self._receive_task = self.create_task(self._receive_task_handler(self._report_error)) - - async def _disconnect(self): - """Disconnect from PlayHT WebSocket and clean up tasks.""" - await super()._disconnect() - - if self._receive_task: - await self.cancel_task(self._receive_task) - self._receive_task = None - - await self._disconnect_websocket() - - async def _connect_websocket(self): - """Connect to PlayHT websocket.""" - try: - if self._websocket and self._websocket.state is State.OPEN: - return - - logger.debug("Connecting to PlayHT") - - if not self._websocket_url: - await self._get_websocket_url() - - if not isinstance(self._websocket_url, str): - raise ValueError("WebSocket URL is not a string") - - self._websocket = await websocket_connect(self._websocket_url) - - await self._call_event_handler("on_connected") - except ValueError as e: - logger.error(f"{self} initialization error: {e}") - self._websocket = None - await self._call_event_handler("on_connection_error", f"{e}") - except Exception as e: - await self.push_error(error_msg=f"Error connecting: {e}", exception=e) - self._websocket = None - await self._call_event_handler("on_connection_error", f"{e}") - - async def _disconnect_websocket(self): - """Disconnect from PlayHT websocket.""" - try: - await self.stop_all_metrics() - - if self._websocket: - logger.debug("Disconnecting from PlayHT") - await self._websocket.close() - except Exception as e: - await self.push_error(error_msg=f"Error disconnecting: {e}", exception=e) - finally: - self._context_id = None - self._websocket = None - await self._call_event_handler("on_disconnected") - - async def _get_websocket_url(self): - """Retrieve WebSocket URL from PlayHT API.""" - async with aiohttp.ClientSession() as session: - async with session.post( - "https://api.play.ht/api/v4/websocket-auth", - headers={ - "Authorization": f"Bearer {self._api_key}", - "X-User-Id": self._user_id, - "Content-Type": "application/json", - }, - ) as response: - if response.status in (200, 201): - data = await response.json() - # Handle the new response format with multiple URLs - if "websocket_urls" in data: - # Select URL based on voice_engine - if self._settings.voice_engine in data["websocket_urls"]: - self._websocket_url = data["websocket_urls"][ - self._settings.voice_engine - ] - else: - raise ValueError( - f"Unsupported voice engine: {self._settings.voice_engine}" - ) - else: - raise ValueError("Invalid response: missing websocket_urls") - else: - raise Exception(f"Failed to get WebSocket URL: {response.status}") - - def _get_websocket(self): - """Get the WebSocket connection if available.""" - if self._websocket: - return self._websocket - raise Exception("Websocket not connected") - - def create_context_id(self) -> str: - """Generate a unique context ID for a TTS request in case we don't have one already in progress. - - Returns: - A unique string identifier for the TTS context. - """ - # If a context ID does not exist, create a new one. - # If an ID exists, continue using the current ID. - # When interruptions happen, user speech results in - # an interruption, which resets the context ID. - if not self._context_id: - return str(uuid.uuid4()) - return self._context_id - - async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): - """Handle interruption by stopping metrics and clearing request ID.""" - await super()._handle_interruption(frame, direction) - await self.stop_all_metrics() - self._context_id = None - - async def _receive_messages(self): - """Receive messages from PlayHT websocket.""" - async for message in self._get_websocket(): - if isinstance(message, bytes): - # Skip the WAV header message - if message.startswith(b"RIFF"): - continue - await self.stop_ttfb_metrics() - frame = TTSAudioRawFrame(message, self.sample_rate, 1, context_id=self._context_id) - await self.push_frame(frame) - else: - logger.debug(f"Received text message: {message}") - try: - msg = json.loads(message) - if msg.get("type") == "start": - # Handle start of stream - logger.debug(f"Started processing request: {msg.get('request_id')}") - elif msg.get("type") == "end": - # Handle end of stream - if "request_id" in msg and msg["request_id"] == self._context_id: - await self.push_frame(TTSStoppedFrame(context_id=self._context_id)) - self._context_id = None - elif "error" in msg: - await self.push_error(error_msg=f"Error: {msg['error']}") - except json.JSONDecodeError: - logger.error(f"Invalid JSON message: {message}") - - @traced_tts - async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: - """Generate TTS audio from text using PlayHT's WebSocket API. - - Args: - text: The text to synthesize into speech. - context_id: The context ID for tracking audio frames. - - Yields: - Frame: Audio frames containing the synthesized speech. - """ - logger.debug(f"{self}: Generating TTS [{text}]") - - try: - # Reconnect if the websocket is closed - if not self._websocket or self._websocket.state is State.CLOSED: - await self._connect() - - if not self._context_id: - await self.start_ttfb_metrics() - yield TTSStartedFrame(context_id=context_id) - self._context_id = context_id - - tts_command = { - "text": text, - "voice": self._settings.voice, - "voice_engine": self._settings.voice_engine, - "output_format": self._settings.output_format, - "sample_rate": self.sample_rate, - "language": self._settings.language, - "speed": self._settings.speed, - "seed": self._settings.seed, - "request_id": self._context_id, - } - - try: - await self._get_websocket().send(json.dumps(tts_command)) - await self.start_tts_usage_metrics(text) - except Exception as e: - yield ErrorFrame(error=f"Unknown error occurred: {e}") - yield TTSStoppedFrame(context_id=context_id) - await self._disconnect() - await self._connect() - return - - # The actual audio frames will be handled in _receive_task_handler - yield None - - except Exception as e: - yield ErrorFrame(error=f"Unknown error occurred: {e}") - - -class PlayHTHttpTTSService(TTSService): - """PlayHT HTTP-based text-to-speech service. - - .. deprecated:: 0.0.88 - - This class is deprecated and will be removed in a future version. - PlayHT is shutting down their API on December 31st, 2025. - - Provides text-to-speech synthesis using PlayHT's HTTP API for simpler, - non-streaming synthesis. Suitable for use cases where streaming is not - required and simpler integration is preferred. - """ - - _settings: PlayHTTTSSettings - - class InputParams(BaseModel): - """Input parameters for PlayHT HTTP TTS configuration. - - Parameters: - language: Language for synthesis. Defaults to English. - speed: Speech speed multiplier. Defaults to 1.0. - seed: Random seed for voice consistency. - """ - - language: Optional[Language] = Language.EN - speed: Optional[float] = 1.0 - seed: Optional[int] = None - - def __init__( - self, - *, - api_key: str, - user_id: str, - voice_url: str, - voice_engine: str = "Play3.0-mini", - protocol: Optional[str] = None, - output_format: str = "wav", - sample_rate: Optional[int] = None, - params: Optional[InputParams] = None, - **kwargs, - ): - """Initialize the PlayHT HTTP TTS service. - - Args: - api_key: PlayHT API key for authentication. - user_id: PlayHT user ID for authentication. - voice_url: URL of the voice to use for synthesis. - voice_engine: Voice engine to use. Defaults to "Play3.0-mini". - protocol: Protocol to use ("http" or "ws"). - - .. deprecated:: 0.0.80 - This parameter no longer has any effect and will be removed in a future version. - Use PlayHTTTSService for WebSocket or PlayHTHttpTTSService for HTTP. - - output_format: Audio output format. Defaults to "wav". - sample_rate: Audio sample rate. If None, uses default. - params: Additional input parameters for voice customization. - **kwargs: Additional arguments passed to parent TTSService. - """ - super().__init__(sample_rate=sample_rate, **kwargs) - - # Warn about deprecated protocol parameter if explicitly provided - if protocol: - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "The 'protocol' parameter is deprecated and will be removed in a future version.", - DeprecationWarning, - stacklevel=2, - ) - - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "PlayHT is shutting down their API on December 31st, 2025. " - "'PlayHTHttpTTSService' is deprecated and will be removed in a future version.", - DeprecationWarning, - stacklevel=2, - ) - - params = params or PlayHTHttpTTSService.InputParams() - - self._user_id = user_id - self._api_key = api_key - - # Check if voice_engine contains protocol information (backward compatibility) - if "-http" in voice_engine: - # Extract the base engine name - voice_engine = voice_engine.replace("-http", "") - elif "-ws" in voice_engine: - # Extract the base engine name - voice_engine = voice_engine.replace("-ws", "") - - self._settings = PlayHTTTSSettings( - model=voice_engine, - voice=voice_url, - language=self.language_to_service_language(params.language) - if params.language - else "english", - output_format=output_format, - voice_engine=voice_engine, - speed=params.speed, - seed=params.seed, - playht_sample_rate=0, - ) - self._sync_model_name_to_metrics() - - async def start(self, frame: StartFrame): - """Start the PlayHT HTTP TTS service. - - Args: - frame: The start frame containing initialization parameters. - """ - await super().start(frame) - self._settings.playht_sample_rate = self.sample_rate - - def can_generate_metrics(self) -> bool: - """Check if this service can generate processing metrics. - - Returns: - True, as PlayHT HTTP service supports metrics generation. - """ - return True - - def language_to_service_language(self, language: Language) -> Optional[str]: - """Convert a Language enum to PlayHT service language format. - - Args: - language: The language to convert. - - Returns: - The PlayHT-specific language code, or None if not supported. - """ - return language_to_playht_language(language) - - @traced_tts - async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: - """Generate TTS audio from text using PlayHT's HTTP API. - - Args: - text: The text to synthesize into speech. - context_id: The context ID for tracking audio frames. - - Yields: - Frame: Audio frames containing the synthesized speech. - """ - logger.debug(f"{self}: Generating TTS [{text}]") - - try: - await self.start_ttfb_metrics() - - # Prepare the request payload - payload = { - "text": text, - "voice": self._settings.voice, - "voice_engine": self._settings.voice_engine, - "output_format": self._settings.output_format, - "sample_rate": self.sample_rate, - "language": self._settings.language, - } - - # Add optional parameters if they exist - if self._settings.speed is not None: - payload["speed"] = self._settings.speed - if self._settings.seed is not None: - payload["seed"] = self._settings.seed - - headers = { - "Authorization": f"Bearer {self._api_key}", - "X-User-Id": self._user_id, - "Content-Type": "application/json", - "Accept": "*/*", - } - - await self.start_tts_usage_metrics(text) - - yield TTSStartedFrame(context_id=context_id) - - async with aiohttp.ClientSession() as session: - async with session.post( - "https://api.play.ht/api/v2/tts/stream", - headers=headers, - json=payload, - ) as response: - if response.status not in (200, 201): - error_text = await response.text() - raise Exception(f"PlayHT API error {response.status}: {error_text}") - - in_header = True - buffer = b"" - - CHUNK_SIZE = self.chunk_size - - async for chunk in response.content.iter_chunked(CHUNK_SIZE): - if len(chunk) == 0: - continue - - # Skip the RIFF header - if in_header: - buffer += chunk - if len(buffer) <= 36: - continue - else: - fh = io.BytesIO(buffer) - fh.seek(36) - (data, size) = struct.unpack("<4sI", fh.read(8)) - while data != b"data": - fh.read(size) - (data, size) = struct.unpack("<4sI", fh.read(8)) - # Extract audio data after header - audio_data = buffer[fh.tell() :] - if len(audio_data) > 0: - await self.stop_ttfb_metrics() - frame = TTSAudioRawFrame( - audio_data, self.sample_rate, 1, context_id=context_id - ) - yield frame - in_header = False - elif len(chunk) > 0: - await self.stop_ttfb_metrics() - frame = TTSAudioRawFrame( - chunk, self.sample_rate, 1, context_id=context_id - ) - yield frame - - except Exception as e: - yield ErrorFrame(error=f"Unknown error occurred: {e}") - finally: - await self.stop_ttfb_metrics() - yield TTSStoppedFrame(context_id=context_id) diff --git a/uv.lock b/uv.lock index bd2f64639..e2615b170 100644 --- a/uv.lock +++ b/uv.lock @@ -4550,9 +4550,6 @@ piper = [ { name = "piper-tts" }, { name = "requests" }, ] -playht = [ - { name = "websockets" }, -] resembleai = [ { name = "websockets" }, ] @@ -4722,7 +4719,6 @@ requires-dist = [ { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'lmnt'" }, { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'neuphonic'" }, { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'openai'" }, - { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'playht'" }, { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'resembleai'" }, { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'rime'" }, { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'sarvam'" }, @@ -4763,7 +4759,7 @@ requires-dist = [ { name = "wait-for2", marker = "python_full_version < '3.12'", specifier = ">=0.4.1" }, { name = "websockets", marker = "extra == 'websockets-base'", specifier = ">=13.1,<16.0" }, ] -provides-extras = ["aic", "anthropic", "assemblyai", "asyncai", "aws", "aws-nova-sonic", "azure", "cartesia", "camb", "cerebras", "daily", "deepgram", "deepseek", "elevenlabs", "fal", "fireworks", "fish", "gladia", "google", "gradium", "grok", "groq", "gstreamer", "heygen", "hume", "inworld", "koala", "kokoro", "krisp", "langchain", "livekit", "lmnt", "local", "local-smart-turn", "mcp", "mem0", "mistral", "mlx-whisper", "moondream", "neuphonic", "noisereduce", "nvidia", "openai", "rnnoise", "openpipe", "openrouter", "perplexity", "piper", "playht", "qwen", "remote-smart-turn", "resembleai", "rime", "riva", "runner", "sagemaker", "sambanova", "sarvam", "sentry", "silero", "simli", "soniox", "soundfile", "speechmatics", "strands", "tavus", "together", "tracing", "ultravox", "webrtc", "websocket", "websockets-base", "whisper"] +provides-extras = ["aic", "anthropic", "assemblyai", "asyncai", "aws", "aws-nova-sonic", "azure", "cartesia", "camb", "cerebras", "daily", "deepgram", "deepseek", "elevenlabs", "fal", "fireworks", "fish", "gladia", "google", "gradium", "grok", "groq", "gstreamer", "heygen", "hume", "inworld", "koala", "kokoro", "krisp", "langchain", "livekit", "lmnt", "local", "local-smart-turn", "mcp", "mem0", "mistral", "mlx-whisper", "moondream", "neuphonic", "noisereduce", "nvidia", "openai", "rnnoise", "openpipe", "openrouter", "perplexity", "piper", "qwen", "remote-smart-turn", "resembleai", "rime", "riva", "runner", "sagemaker", "sambanova", "sarvam", "sentry", "silero", "simli", "soniox", "soundfile", "speechmatics", "strands", "tavus", "together", "tracing", "ultravox", "webrtc", "websocket", "websockets-base", "whisper"] [package.metadata.requires-dev] dev = [ From 27940d83a2e39a4de8becdd8e434cd6d62fa3001 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 25 Feb 2026 09:41:23 -0500 Subject: [PATCH 105/189] Make it so that `AIService` is the exclusive "syncer" of model name to metrics. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only (rare) exception—where a service directly still needs to directly call `self._sync_model_name_to_metrics()`—is when the model name need to be "pulled" from another field (or nested field) in settings up to settings.model on a settings update. This only occurs in Deepgram services, where we use the voice as the model name. This change has the side-effect of bringing model name to metrics for a number of services that were accidentally omitting it before. --- COMMUNITY_INTEGRATIONS.md | 13 +- src/pipecat/services/ai_service.py | 20 +-- src/pipecat/services/anthropic/llm.py | 52 ++++---- src/pipecat/services/assemblyai/stt.py | 14 +- src/pipecat/services/asyncai/tts.py | 53 ++++---- src/pipecat/services/aws/llm.py | 39 +++--- src/pipecat/services/aws/nova_sonic/llm.py | 36 ++--- src/pipecat/services/aws/stt.py | 20 +-- src/pipecat/services/aws/tts.py | 31 +++-- src/pipecat/services/azure/image.py | 15 ++- src/pipecat/services/azure/stt.py | 18 ++- src/pipecat/services/azure/tts.py | 60 ++++++--- src/pipecat/services/camb/tts.py | 28 ++-- src/pipecat/services/cartesia/stt.py | 24 ++-- src/pipecat/services/cartesia/tts.py | 69 +++++----- src/pipecat/services/deepgram/flux/stt.py | 28 ++-- src/pipecat/services/deepgram/stt.py | 15 ++- .../services/deepgram/stt_sagemaker.py | 21 +-- src/pipecat/services/deepgram/tts.py | 31 ++--- .../services/deepgram/tts_sagemaker.py | 13 +- src/pipecat/services/elevenlabs/stt.py | 50 ++++--- src/pipecat/services/elevenlabs/tts.py | 71 +++++----- src/pipecat/services/fal/image.py | 15 ++- src/pipecat/services/fal/stt.py | 22 +-- src/pipecat/services/fish/tts.py | 38 +++--- src/pipecat/services/gladia/stt.py | 54 ++++---- .../services/google/gemini_live/llm.py | 57 ++++---- src/pipecat/services/google/image.py | 19 ++- src/pipecat/services/google/llm.py | 36 ++--- src/pipecat/services/google/stt.py | 39 +++--- src/pipecat/services/google/tts.py | 86 ++++++------ src/pipecat/services/gradium/stt.py | 23 ++-- src/pipecat/services/gradium/tts.py | 16 +-- src/pipecat/services/grok/realtime/llm.py | 32 ++--- src/pipecat/services/groq/tts.py | 23 ++-- src/pipecat/services/hathora/stt.py | 17 ++- src/pipecat/services/hathora/tts.py | 18 ++- src/pipecat/services/hume/tts.py | 19 +-- src/pipecat/services/image_service.py | 14 +- src/pipecat/services/inworld/tts.py | 61 ++++----- src/pipecat/services/kokoro/tts.py | 19 +-- src/pipecat/services/llm_service.py | 16 ++- src/pipecat/services/lmnt/tts.py | 13 +- src/pipecat/services/minimax/tts.py | 44 +++--- src/pipecat/services/moondream/vision.py | 16 ++- src/pipecat/services/neuphonic/tts.py | 43 +++--- src/pipecat/services/nvidia/stt.py | 47 ++++--- src/pipecat/services/nvidia/tts.py | 20 +-- src/pipecat/services/openai/base_llm.py | 34 ++--- src/pipecat/services/openai/image.py | 15 ++- src/pipecat/services/openai/realtime/llm.py | 33 ++--- src/pipecat/services/openai/stt.py | 12 +- src/pipecat/services/openai/tts.py | 21 +-- .../services/openai_realtime_beta/openai.py | 33 ++--- src/pipecat/services/piper/tts.py | 13 +- src/pipecat/services/resembleai/tts.py | 16 +-- src/pipecat/services/rime/tts.py | 126 +++++++++--------- src/pipecat/services/sarvam/stt.py | 24 ++-- src/pipecat/services/sarvam/tts.py | 110 +++++++-------- src/pipecat/services/settings.py | 22 +++ src/pipecat/services/soniox/stt.py | 29 ++-- src/pipecat/services/speechmatics/stt.py | 38 +++--- src/pipecat/services/speechmatics/tts.py | 21 +-- src/pipecat/services/stt_service.py | 11 +- src/pipecat/services/tts_service.py | 11 +- src/pipecat/services/ultravox/llm.py | 28 ++-- src/pipecat/services/vision_service.py | 14 +- src/pipecat/services/whisper/base_stt.py | 23 ++-- src/pipecat/services/whisper/stt.py | 41 +++--- src/pipecat/services/xtts/tts.py | 16 ++- 70 files changed, 1200 insertions(+), 1019 deletions(-) diff --git a/COMMUNITY_INTEGRATIONS.md b/COMMUNITY_INTEGRATIONS.md index e11c79f31..ff8d08ea5 100644 --- a/COMMUNITY_INTEGRATIONS.md +++ b/COMMUNITY_INTEGRATIONS.md @@ -257,15 +257,16 @@ The service stores its current settings in `self._settings` and declares the typ ```python class MySTTService(STTService): - _settings: MySTTSettings def __init__(self, *, model: str, language: str, region: str, **kwargs): - super().__init__(**kwargs) - # Initial value must be provided for every field in self._settings - # before service is started - self._settings = MySTTSettings(model=model, language=language, region=region) - self._sync_model_name_to_metrics() + # An initial value should be provided for every settings field. + # This will be validated at service start. + # (If you track sample_rate, it can be a placeholder value like 0; see + # "Sample Rate Handling"). + super().__init__( + settings=MySTTSettings(model=model, language=language, region=region), **kwargs + ) ``` To react to runtime setting changes, override `_update_settings`. The base implementation applies the delta to `self._settings` and returns a `dict` mapping each changed field name to its **pre-update** value. Your override should call `super()` first, then act on the changed fields. A common implementation might look like: diff --git a/src/pipecat/services/ai_service.py b/src/pipecat/services/ai_service.py index f092c2b49..c4e45a417 100644 --- a/src/pipecat/services/ai_service.py +++ b/src/pipecat/services/ai_service.py @@ -35,14 +35,21 @@ class AIService(FrameProcessor): this base infrastructure. """ - def __init__(self, **kwargs): + def __init__(self, settings: ServiceSettings | None = None, **kwargs): """Initialize the AI service. Args: + settings: The runtime-updatable settings for the AI service. **kwargs: Additional arguments passed to the parent FrameProcessor. """ super().__init__(**kwargs) - self._settings: ServiceSettings = ServiceSettings() # Here in case subclass doesn't implement more specific settings (hopefully shouldn't happen) + self._settings: ServiceSettings = ( + settings + # Here in case subclass doesn't implement more specific settings + # (which hopefully should be rare) + or ServiceSettings() + ) + self._sync_model_name_to_metrics() self._session_properties: Dict[str, Any] = {} self._tracing_enabled: bool = False self._tracing_context = None @@ -54,15 +61,12 @@ class AIService(FrameProcessor): of truth for it in `self._settings.model`. This method is just for syncing the model name to the metrics data. - TODO: as a next step we should make it so that service classes pass - model into `super().__init__` and `AIService` can be responsible for - syncing its initial value to metrics, just as it's responsible for - syncing any updates to its value to metrics via `_update_settings`. - Args: model: The name of the AI model to use. """ - self.set_core_metrics_data(MetricsData(processor=self.name, model=self._settings.model)) + self.set_core_metrics_data( + MetricsData(processor=self.name, model=self._settings.model or "") + ) async def start(self, frame: StartFrame): """Start the AI service. diff --git a/src/pipecat/services/anthropic/llm.py b/src/pipecat/services/anthropic/llm.py index 047159515..03190ef99 100644 --- a/src/pipecat/services/anthropic/llm.py +++ b/src/pipecat/services/anthropic/llm.py @@ -232,37 +232,39 @@ class AnthropicLLMService(LLMService): retry_on_timeout: Whether to retry the request once if it times out. **kwargs: Additional arguments passed to parent LLMService. """ - super().__init__(**kwargs) params = params or AnthropicLLMService.InputParams() + + super().__init__( + settings=AnthropicLLMSettings( + model=model, + max_tokens=params.max_tokens, + enable_prompt_caching=( + params.enable_prompt_caching + if params.enable_prompt_caching is not None + else ( + params.enable_prompt_caching_beta + if params.enable_prompt_caching_beta is not None + else False + ) + ), + temperature=params.temperature, + top_k=params.top_k, + top_p=params.top_p, + frequency_penalty=None, + presence_penalty=None, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, + thinking=params.thinking, + extra=params.extra if isinstance(params.extra, dict) else {}, + ), + **kwargs, + ) self._client = client or AsyncAnthropic( api_key=api_key ) # if the client is provided, use it and remove it, otherwise create a new one self._retry_timeout_secs = retry_timeout_secs self._retry_on_timeout = retry_on_timeout - self._settings = AnthropicLLMSettings( - model=model, - max_tokens=params.max_tokens, - enable_prompt_caching=( - params.enable_prompt_caching - if params.enable_prompt_caching is not None - else ( - params.enable_prompt_caching_beta - if params.enable_prompt_caching_beta is not None - else False - ) - ), - temperature=params.temperature, - top_k=params.top_k, - top_p=params.top_p, - frequency_penalty=None, - presence_penalty=None, - seed=None, - filter_incomplete_user_turns=False, - user_turn_completion_config=None, - thinking=params.thinking, - extra=params.extra if isinstance(params.extra, dict) else {}, - ) - self._sync_model_name_to_metrics() def can_generate_metrics(self) -> bool: """Check if this service can generate usage metrics. diff --git a/src/pipecat/services/assemblyai/stt.py b/src/pipecat/services/assemblyai/stt.py index 44ae123b7..a89f5fe52 100644 --- a/src/pipecat/services/assemblyai/stt.py +++ b/src/pipecat/services/assemblyai/stt.py @@ -111,15 +111,17 @@ class AssemblyAISTTService(WebsocketSTTService): connection_params = self._configure_manual_turn_mode(connection_params) super().__init__( - sample_rate=connection_params.sample_rate, ttfs_p99_latency=ttfs_p99_latency, **kwargs + sample_rate=connection_params.sample_rate, + ttfs_p99_latency=ttfs_p99_latency, + settings=AssemblyAISTTSettings( + model=None, + language=language, + connection_params=connection_params, + ), + **kwargs, ) self._api_key = api_key - self._settings = AssemblyAISTTSettings( - model=None, - language=language, - connection_params=connection_params, - ) self._api_endpoint_base_url = api_endpoint_base_url self._vad_force_turn_endpoint = vad_force_turn_endpoint diff --git a/src/pipecat/services/asyncai/tts.py b/src/pipecat/services/asyncai/tts.py index f1f73b7ff..334f80d80 100644 --- a/src/pipecat/services/asyncai/tts.py +++ b/src/pipecat/services/asyncai/tts.py @@ -147,30 +147,29 @@ class AsyncAITTSService(AudioContextTTSService): aggregate_sentences: Whether to aggregate sentences within the TTSService. **kwargs: Additional arguments passed to the parent service. """ + params = params or AsyncAITTSService.InputParams() + super().__init__( aggregate_sentences=aggregate_sentences, pause_frame_processing=True, push_stop_frames=True, sample_rate=sample_rate, + settings=AsyncAITTSSettings( + model=model, + voice=voice_id, + output_container=container, + output_encoding=encoding, + output_sample_rate=0, + language=self.language_to_service_language(params.language) + if params.language + else None, + ), **kwargs, ) - params = params or AsyncAITTSService.InputParams() - self._api_key = api_key self._api_version = version self._url = url - self._settings = AsyncAITTSSettings( - model=model, - voice=voice_id, - output_container=container, - output_encoding=encoding, - output_sample_rate=0, - language=self.language_to_service_language(params.language) - if params.language - else None, - ) - self._sync_model_name_to_metrics() self._receive_task = None self._keepalive_task = None @@ -501,24 +500,26 @@ class AsyncAIHttpTTSService(TTSService): params: Additional input parameters for voice customization. **kwargs: Additional arguments passed to the parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) - params = params or AsyncAIHttpTTSService.InputParams() + super().__init__( + sample_rate=sample_rate, + settings=AsyncAITTSSettings( + model=model, + voice=voice_id, + output_container=container, + output_encoding=encoding, + output_sample_rate=0, + language=self.language_to_service_language(params.language) + if params.language + else None, + ), + **kwargs, + ) + self._api_key = api_key self._base_url = url self._api_version = version - self._settings = AsyncAITTSSettings( - model=model, - voice=voice_id, - output_container=container, - output_encoding=encoding, - output_sample_rate=0, - language=self.language_to_service_language(params.language) - if params.language - else None, - ) - self._sync_model_name_to_metrics() self._session = aiohttp_session diff --git a/src/pipecat/services/aws/llm.py b/src/pipecat/services/aws/llm.py index 34e869c69..540ac4a8e 100644 --- a/src/pipecat/services/aws/llm.py +++ b/src/pipecat/services/aws/llm.py @@ -797,10 +797,28 @@ class AWSBedrockLLMService(LLMService): retry_on_timeout: Whether to retry the request once if it times out. **kwargs: Additional arguments passed to parent LLMService. """ - super().__init__(**kwargs) - params = params or AWSBedrockLLMService.InputParams() + super().__init__( + settings=AWSBedrockLLMSettings( + model=model, + max_tokens=params.max_tokens, + temperature=params.temperature, + top_p=params.top_p, + top_k=None, + frequency_penalty=None, + presence_penalty=None, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, + latency=params.latency, + additional_model_request_fields=params.additional_model_request_fields + if isinstance(params.additional_model_request_fields, dict) + else {}, + ), + **kwargs, + ) + # Initialize the AWS Bedrock client if not client_config: client_config = Config( @@ -822,23 +840,6 @@ class AWSBedrockLLMService(LLMService): self._retry_timeout_secs = retry_timeout_secs self._retry_on_timeout = retry_on_timeout - self._settings = AWSBedrockLLMSettings( - model=model, - max_tokens=params.max_tokens, - temperature=params.temperature, - top_p=params.top_p, - top_k=None, - frequency_penalty=None, - presence_penalty=None, - seed=None, - filter_incomplete_user_turns=False, - user_turn_completion_config=None, - latency=params.latency, - additional_model_request_fields=params.additional_model_request_fields - if isinstance(params.additional_model_request_fields, dict) - else {}, - ) - self._sync_model_name_to_metrics() logger.info(f"Using AWS Bedrock model: {model}") diff --git a/src/pipecat/services/aws/nova_sonic/llm.py b/src/pipecat/services/aws/nova_sonic/llm.py index e51a1842c..29612e593 100644 --- a/src/pipecat/services/aws/nova_sonic/llm.py +++ b/src/pipecat/services/aws/nova_sonic/llm.py @@ -254,28 +254,30 @@ class AWSNovaSonicLLMService(LLMService): **kwargs: Additional arguments passed to the parent LLMService. """ - super().__init__(**kwargs) + params = params or Params() + + super().__init__( + settings=AWSNovaSonicLLMSettings( + model=model, + voice_id=voice_id, + temperature=params.temperature, + max_tokens=params.max_tokens, + top_p=params.top_p, + top_k=None, + frequency_penalty=None, + presence_penalty=None, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, + endpointing_sensitivity=params.endpointing_sensitivity, + ), + **kwargs, + ) self._secret_access_key = secret_access_key self._access_key_id = access_key_id self._session_token = session_token self._region = region self._client: Optional[BedrockRuntimeClient] = None - params = params or Params() - self._settings = AWSNovaSonicLLMSettings( - model=model, - voice_id=voice_id, - temperature=params.temperature, - max_tokens=params.max_tokens, - top_p=params.top_p, - top_k=None, - frequency_penalty=None, - presence_penalty=None, - seed=None, - filter_incomplete_user_turns=False, - user_turn_completion_config=None, - endpointing_sensitivity=params.endpointing_sensitivity, - ) - self._sync_model_name_to_metrics() # Audio I/O config (hardware settings, not runtime-tunable) self._input_sample_rate = params.input_sample_rate diff --git a/src/pipecat/services/aws/stt.py b/src/pipecat/services/aws/stt.py index 1d8ae84f5..7c3fb398e 100644 --- a/src/pipecat/services/aws/stt.py +++ b/src/pipecat/services/aws/stt.py @@ -99,15 +99,17 @@ class AWSTranscribeSTTService(WebsocketSTTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to parent STTService class. """ - super().__init__(ttfs_p99_latency=ttfs_p99_latency, **kwargs) - - self._settings = AWSTranscribeSTTSettings( - language=self.language_to_service_language(language) or "en-US", - sample_rate=sample_rate, - media_encoding="linear16", - number_of_channels=1, - show_speaker_label=False, - enable_channel_identification=False, + super().__init__( + ttfs_p99_latency=ttfs_p99_latency, + settings=AWSTranscribeSTTSettings( + language=self.language_to_service_language(language) or "en-US", + sample_rate=sample_rate, + media_encoding="linear16", + number_of_channels=1, + show_speaker_label=False, + enable_channel_identification=False, + ), + **kwargs, ) # Validate sample rate - AWS Transcribe only supports 8000 Hz or 16000 Hz diff --git a/src/pipecat/services/aws/tts.py b/src/pipecat/services/aws/tts.py index 8b06ad7e9..017477a7a 100644 --- a/src/pipecat/services/aws/tts.py +++ b/src/pipecat/services/aws/tts.py @@ -195,10 +195,25 @@ class AWSPollyTTSService(TTSService): params: Additional input parameters for voice customization. **kwargs: Additional arguments passed to parent TTSService class. """ - super().__init__(sample_rate=sample_rate, **kwargs) - params = params or AWSPollyTTSService.InputParams() + super().__init__( + sample_rate=sample_rate, + settings=AWSPollyTTSSettings( + model=None, + voice=voice_id, + engine=params.engine, + language=self.language_to_service_language(params.language) + if params.language + else "en-US", + pitch=params.pitch, + rate=params.rate, + volume=params.volume, + lexicon_names=params.lexicon_names, + ), + **kwargs, + ) + # Get credentials from environment variables if not provided self._aws_params = { "aws_access_key_id": aws_access_key_id or os.getenv("AWS_ACCESS_KEY_ID"), @@ -208,18 +223,6 @@ class AWSPollyTTSService(TTSService): } self._aws_session = aioboto3.Session() - self._settings = AWSPollyTTSSettings( - model=None, - voice=voice_id, - engine=params.engine, - language=self.language_to_service_language(params.language) - if params.language - else "en-US", - pitch=params.pitch, - rate=params.rate, - volume=params.volume, - lexicon_names=params.lexicon_names, - ) self._resampler = create_stream_resampler() diff --git a/src/pipecat/services/azure/image.py b/src/pipecat/services/azure/image.py index f5ce4a9f1..66cc28504 100644 --- a/src/pipecat/services/azure/image.py +++ b/src/pipecat/services/azure/image.py @@ -12,6 +12,7 @@ using REST endpoints for creating images from text prompts. import asyncio import io +from dataclasses import dataclass from typing import AsyncGenerator import aiohttp @@ -19,6 +20,16 @@ from PIL import Image from pipecat.frames.frames import ErrorFrame, Frame, URLImageRawFrame from pipecat.services.image_service import ImageGenService +from pipecat.services.settings import ImageGenSettings + + +@dataclass +class AzureImageGenSettings(ImageGenSettings): + """Settings for the Azure image generation service. + + Parameters: + model: Azure image generation model identifier. + """ class AzureImageGenServiceREST(ImageGenService): @@ -49,13 +60,11 @@ class AzureImageGenServiceREST(ImageGenService): aiohttp_session: Shared aiohttp session for HTTP requests. api_version: Azure API version string. Defaults to "2023-06-01-preview". """ - super().__init__() + super().__init__(settings=AzureImageGenSettings(model=model)) self._api_key = api_key self._azure_endpoint = endpoint self._api_version = api_version - self._settings.model = model - self._sync_model_name_to_metrics() self._image_size = image_size self._aiohttp_session = aiohttp_session diff --git a/src/pipecat/services/azure/stt.py b/src/pipecat/services/azure/stt.py index 096f236a6..c1a38ec0c 100644 --- a/src/pipecat/services/azure/stt.py +++ b/src/pipecat/services/azure/stt.py @@ -96,7 +96,17 @@ class AzureSTTService(STTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to parent STTService. """ - super().__init__(sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, **kwargs) + super().__init__( + sample_rate=sample_rate, + ttfs_p99_latency=ttfs_p99_latency, + settings=AzureSTTSettings( + model=None, + region=region, + language=language_to_azure_language(language), + sample_rate=sample_rate, + ), + **kwargs, + ) self._speech_config = SpeechConfig( subscription=api_key, @@ -109,12 +119,6 @@ class AzureSTTService(STTService): self._audio_stream = None self._speech_recognizer = None - self._settings = AzureSTTSettings( - model=None, - region=region, - language=language_to_azure_language(language), - sample_rate=sample_rate, - ) def can_generate_metrics(self) -> bool: """Check if this service can generate performance metrics. diff --git a/src/pipecat/services/azure/tts.py b/src/pipecat/services/azure/tts.py index 3a176055d..b3534b28e 100644 --- a/src/pipecat/services/azure/tts.py +++ b/src/pipecat/services/azure/tts.py @@ -141,7 +141,6 @@ class AzureBaseTTSService: api_key: str, region: str, voice: str = "en-US-SaraNeural", - params: Optional[InputParams] = None, ): """Initialize Azure-specific configuration. @@ -151,25 +150,7 @@ class AzureBaseTTSService: api_key: Azure Cognitive Services subscription key. region: Azure region identifier (e.g., "eastus", "westus2"). voice: Voice name to use for synthesis. Defaults to "en-US-SaraNeural". - params: Voice and synthesis parameters configuration. """ - params = params or AzureBaseTTSService.InputParams() - - self._settings = AzureTTSSettings( - model=None, - emphasis=params.emphasis, - language=self.language_to_service_language(params.language) - if params.language - else "en-US", - pitch=params.pitch, - rate=params.rate, - role=params.role, - style=params.style, - style_degree=params.style_degree, - voice=voice, - volume=params.volume, - ) - self._api_key = api_key self._region = region self._speech_synthesizer = None @@ -289,6 +270,8 @@ class AzureTTSService(TTSService, AzureBaseTTSService): aggregate_sentences: Whether to aggregate sentences before synthesis. **kwargs: Additional arguments passed to the parent TTSService. """ + params = params or AzureBaseTTSService.InputParams() + super().__init__( aggregate_sentences=aggregate_sentences, push_text_frames=False, # We'll push text frames based on word timestamps @@ -296,11 +279,25 @@ class AzureTTSService(TTSService, AzureBaseTTSService): pause_frame_processing=True, supports_word_timestamps=True, sample_rate=sample_rate, + settings=AzureTTSSettings( + model=None, + emphasis=params.emphasis, + language=self.language_to_service_language(params.language) + if params.language + else "en-US", + pitch=params.pitch, + rate=params.rate, + role=params.role, + style=params.style, + style_degree=params.style_degree, + voice=voice, + volume=params.volume, + ), **kwargs, ) # Initialize Azure-specific functionality from mixin - self._init_azure_base(api_key=api_key, region=region, voice=voice, params=params) + self._init_azure_base(api_key=api_key, region=region, voice=voice) self._speech_config = None self._speech_synthesizer = None @@ -734,10 +731,29 @@ class AzureHttpTTSService(TTSService, AzureBaseTTSService): params: Voice and synthesis parameters configuration. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) + params = params or AzureBaseTTSService.InputParams() + + super().__init__( + sample_rate=sample_rate, + settings=AzureTTSSettings( + model=None, + emphasis=params.emphasis, + language=self.language_to_service_language(params.language) + if params.language + else "en-US", + pitch=params.pitch, + rate=params.rate, + role=params.role, + style=params.style, + style_degree=params.style_degree, + voice=voice, + volume=params.volume, + ), + **kwargs, + ) # Initialize Azure-specific functionality from mixin - self._init_azure_base(api_key=api_key, region=region, voice=voice, params=params) + self._init_azure_base(api_key=api_key, region=region, voice=voice) self._speech_config = None self._speech_synthesizer = None diff --git a/src/pipecat/services/camb/tts.py b/src/pipecat/services/camb/tts.py index a2887df28..75b299569 100644 --- a/src/pipecat/services/camb/tts.py +++ b/src/pipecat/services/camb/tts.py @@ -213,11 +213,6 @@ class CambTTSService(TTSService): params: Additional voice parameters. If None, uses defaults. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) - - self._api_key = api_key - self._timeout = timeout - params = params or CambTTSService.InputParams() # Warn if sample rate doesn't match model's supported rate @@ -227,16 +222,23 @@ class CambTTSService(TTSService): f"sample rate. Current rate of {sample_rate}Hz may cause issues." ) - # Build settings - self._settings = CambTTSSettings( - model=model, - voice=voice_id, - language=( - self.language_to_service_language(params.language) if params.language else "en-us" + super().__init__( + sample_rate=sample_rate, + settings=CambTTSSettings( + model=model, + voice=voice_id, + language=( + self.language_to_service_language(params.language) + if params.language + else "en-us" + ), + user_instructions=params.user_instructions, ), - user_instructions=params.user_instructions, + **kwargs, ) - self._sync_model_name_to_metrics() + + self._api_key = api_key + self._timeout = timeout self._client = None diff --git a/src/pipecat/services/cartesia/stt.py b/src/pipecat/services/cartesia/stt.py index 20ff04963..526fc9116 100644 --- a/src/pipecat/services/cartesia/stt.py +++ b/src/pipecat/services/cartesia/stt.py @@ -173,13 +173,6 @@ class CartesiaSTTService(WebsocketSTTService): **kwargs: Additional arguments passed to parent STTService. """ sample_rate = sample_rate or (live_options.sample_rate if live_options else None) - super().__init__( - sample_rate=sample_rate, - ttfs_p99_latency=ttfs_p99_latency, - keepalive_timeout=120, - keepalive_interval=30, - **kwargs, - ) default_options = CartesiaLiveOptions( model="ink-whisper", @@ -196,12 +189,19 @@ class CartesiaSTTService(WebsocketSTTService): k: v for k, v in merged_options.items() if not isinstance(v, str) or v != "None" } - self._settings = CartesiaSTTSettings( - model=merged_options["model"], - language=merged_options.get("language"), - encoding=merged_options.get("encoding", "pcm_s16le"), + super().__init__( + sample_rate=sample_rate, + ttfs_p99_latency=ttfs_p99_latency, + keepalive_timeout=120, + keepalive_interval=30, + settings=CartesiaSTTSettings( + model=merged_options["model"], + language=merged_options.get("language"), + encoding=merged_options.get("encoding", "pcm_s16le"), + ), + **kwargs, ) - self._sync_model_name_to_metrics() + self._api_key = api_key self._base_url = base_url or "api.cartesia.ai" self._receive_task = None diff --git a/src/pipecat/services/cartesia/tts.py b/src/pipecat/services/cartesia/tts.py index f45e7c54f..edf838e59 100644 --- a/src/pipecat/services/cartesia/tts.py +++ b/src/pipecat/services/cartesia/tts.py @@ -305,6 +305,8 @@ class CartesiaTTSService(AudioContextTTSService): # if we're interrupted. Cartesia gives us word-by-word timestamps. We # can use those to generate text frames ourselves aligned with the # playout timing of the audio! + params = params or CartesiaTTSService.InputParams() + super().__init__( aggregate_sentences=aggregate_sentences, push_text_frames=False, @@ -312,6 +314,20 @@ class CartesiaTTSService(AudioContextTTSService): supports_word_timestamps=True, sample_rate=sample_rate, text_aggregator=text_aggregator, + settings=CartesiaTTSSettings( + model=model, + output_container=container, + output_encoding=encoding, + output_sample_rate=0, + language=self.language_to_service_language(params.language) + if params.language + else None, + speed=params.speed, + emotion=params.emotion, + generation_config=params.generation_config, + pronunciation_dict_id=params.pronunciation_dict_id, + voice=voice_id, + ), **kwargs, ) @@ -323,26 +339,9 @@ class CartesiaTTSService(AudioContextTTSService): # and insert these tags for the purpose of the TTS service alone. self._text_aggregator = SkipTagsAggregator([("", "")]) - params = params or CartesiaTTSService.InputParams() - self._api_key = api_key self._cartesia_version = cartesia_version self._url = url - self._settings = CartesiaTTSSettings( - model=model, - output_container=container, - output_encoding=encoding, - output_sample_rate=0, - language=self.language_to_service_language(params.language) - if params.language - else None, - speed=params.speed, - emotion=params.emotion, - generation_config=params.generation_config, - pronunciation_dict_id=params.pronunciation_dict_id, - voice=voice_id, - ) - self._sync_model_name_to_metrics() self._receive_task = None @@ -727,28 +726,30 @@ class CartesiaHttpTTSService(TTSService): params: Additional input parameters for voice customization. **kwargs: Additional arguments passed to the parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) - params = params or CartesiaHttpTTSService.InputParams() + super().__init__( + sample_rate=sample_rate, + settings=CartesiaTTSSettings( + model=model, + voice=voice_id, + output_container=container, + output_encoding=encoding, + output_sample_rate=0, + language=self.language_to_service_language(params.language) + if params.language + else None, + speed=params.speed, + emotion=params.emotion, + generation_config=params.generation_config, + pronunciation_dict_id=params.pronunciation_dict_id, + ), + **kwargs, + ) + self._api_key = api_key self._base_url = base_url self._cartesia_version = cartesia_version - self._settings = CartesiaTTSSettings( - model=model, - voice=voice_id, - output_container=container, - output_encoding=encoding, - output_sample_rate=0, - language=self.language_to_service_language(params.language) - if params.language - else None, - speed=params.speed, - emotion=params.emotion, - generation_config=params.generation_config, - pronunciation_dict_id=params.pronunciation_dict_id, - ) - self._sync_model_name_to_metrics() self._client = AsyncCartesia( api_key=api_key, diff --git a/src/pipecat/services/deepgram/flux/stt.py b/src/pipecat/services/deepgram/flux/stt.py index f0018d5c8..d509b267e 100644 --- a/src/pipecat/services/deepgram/flux/stt.py +++ b/src/pipecat/services/deepgram/flux/stt.py @@ -207,26 +207,24 @@ class DeepgramFluxSTTService(WebsocketSTTService): # was never destroyed. # So we can keep it here as false, because inside the method send_with_retry, it will # already try to reconnect if needed. + params = params or DeepgramFluxSTTService.InputParams() super().__init__( sample_rate=sample_rate, reconnect_on_error=False, + settings=DeepgramFluxSTTSettings( + model=model, + language=Language.EN, + encoding=flux_encoding, + eager_eot_threshold=params.eager_eot_threshold, + eot_threshold=params.eot_threshold, + eot_timeout_ms=params.eot_timeout_ms, + keyterm=params.keyterm or [], + mip_opt_out=params.mip_opt_out, + tag=params.tag or [], + min_confidence=params.min_confidence, + ), **kwargs, ) - - params = params or DeepgramFluxSTTService.InputParams() - self._settings = DeepgramFluxSTTSettings( - model=model, - language=Language.EN, - encoding=flux_encoding, - eager_eot_threshold=params.eager_eot_threshold, - eot_threshold=params.eot_threshold, - eot_timeout_ms=params.eot_timeout_ms, - keyterm=params.keyterm or [], - mip_opt_out=params.mip_opt_out, - tag=params.tag or [], - min_confidence=params.min_confidence, - ) - self._sync_model_name_to_metrics() self._api_key = api_key self._url = url self._should_interrupt = should_interrupt diff --git a/src/pipecat/services/deepgram/stt.py b/src/pipecat/services/deepgram/stt.py index 0792ea3c9..aa5c2ce8d 100644 --- a/src/pipecat/services/deepgram/stt.py +++ b/src/pipecat/services/deepgram/stt.py @@ -117,7 +117,6 @@ class DeepgramSTTService(STTService): The `vad_events` option in LiveOptions is deprecated as of version 0.0.99 and will be removed in a future version. Please use the Silero VAD instead. """ sample_rate = sample_rate or (live_options.sample_rate if live_options else None) - super().__init__(sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, **kwargs) if url: import warnings @@ -155,12 +154,16 @@ class DeepgramSTTService(STTService): merged_options["language"] = merged_options["language"].value merged_live_options = LiveOptions(**merged_options) - self._settings = DeepgramSTTSettings( - model=merged_options.get("model"), - language=merged_options.get("language"), - live_options=merged_live_options, + super().__init__( + sample_rate=sample_rate, + ttfs_p99_latency=ttfs_p99_latency, + settings=DeepgramSTTSettings( + model=merged_options.get("model"), + language=merged_options.get("language"), + live_options=merged_live_options, + ), + **kwargs, ) - self._sync_model_name_to_metrics() self._addons = addons self._should_interrupt = should_interrupt diff --git a/src/pipecat/services/deepgram/stt_sagemaker.py b/src/pipecat/services/deepgram/stt_sagemaker.py index 3820f8d84..bc5eebe37 100644 --- a/src/pipecat/services/deepgram/stt_sagemaker.py +++ b/src/pipecat/services/deepgram/stt_sagemaker.py @@ -115,10 +115,6 @@ class DeepgramSageMakerSTTService(STTService): **kwargs: Additional arguments passed to the parent STTService. """ sample_rate = sample_rate or (live_options.sample_rate if live_options else None) - super().__init__(sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, **kwargs) - - self._endpoint_name = endpoint_name - self._region = region # Create default options similar to DeepgramSTTService default_options = LiveOptions( @@ -144,12 +140,19 @@ class DeepgramSageMakerSTTService(STTService): merged_options["language"] = merged_options["language"].value merged_live_options = LiveOptions(**merged_options) - self._settings = DeepgramSageMakerSTTSettings( - model=merged_options.get("model"), - language=merged_options.get("language"), - live_options=merged_live_options, + super().__init__( + sample_rate=sample_rate, + ttfs_p99_latency=ttfs_p99_latency, + settings=DeepgramSageMakerSTTSettings( + model=merged_options.get("model"), + language=merged_options.get("language"), + live_options=merged_live_options, + ), + **kwargs, ) - self._sync_model_name_to_metrics() + + self._endpoint_name = endpoint_name + self._region = region self._client: Optional[SageMakerBidiClient] = None self._response_task: Optional[asyncio.Task] = None diff --git a/src/pipecat/services/deepgram/tts.py b/src/pipecat/services/deepgram/tts.py index b3973bba2..c05b90868 100644 --- a/src/pipecat/services/deepgram/tts.py +++ b/src/pipecat/services/deepgram/tts.py @@ -101,18 +101,17 @@ class DeepgramTTSService(WebsocketTTSService): pause_frame_processing=True, push_stop_frames=True, append_trailing_space=True, + settings=DeepgramTTSSettings( + model=voice, + voice=voice, + language=None, + encoding=encoding, + ), **kwargs, ) self._api_key = api_key self._base_url = base_url - self._settings = DeepgramTTSSettings( - model=voice, - voice=voice, - language=None, - encoding=encoding, - ) - self._sync_model_name_to_metrics() self._receive_task = None self._context_id: Optional[str] = None @@ -394,18 +393,20 @@ class DeepgramHttpTTSService(TTSService): encoding: Audio encoding format. Defaults to "linear16". **kwargs: Additional arguments passed to parent TTSService class. """ - super().__init__(sample_rate=sample_rate, **kwargs) + super().__init__( + sample_rate=sample_rate, + settings=DeepgramTTSSettings( + model=voice, + voice=voice, + language=None, + encoding=encoding, + ), + **kwargs, + ) self._api_key = api_key self._session = aiohttp_session self._base_url = base_url - self._settings = DeepgramTTSSettings( - model=voice, - voice=voice, - language=None, - encoding=encoding, - ) - self._sync_model_name_to_metrics() def can_generate_metrics(self) -> bool: """Check if the service can generate metrics. diff --git a/src/pipecat/services/deepgram/tts_sagemaker.py b/src/pipecat/services/deepgram/tts_sagemaker.py index 798a62bf8..b583ce76c 100644 --- a/src/pipecat/services/deepgram/tts_sagemaker.py +++ b/src/pipecat/services/deepgram/tts_sagemaker.py @@ -99,18 +99,17 @@ class DeepgramSageMakerTTSService(TTSService): push_stop_frames=True, pause_frame_processing=True, append_trailing_space=True, + settings=DeepgramSageMakerTTSSettings( + model=voice, + voice=voice, + language=None, + encoding=encoding, + ), **kwargs, ) self._endpoint_name = endpoint_name self._region = region - self._settings = DeepgramSageMakerTTSSettings( - model=voice, - voice=voice, - language=None, - encoding=encoding, - ) - self._sync_model_name_to_metrics() self._client: Optional[SageMakerBidiClient] = None self._response_task: Optional[asyncio.Task] = None diff --git a/src/pipecat/services/elevenlabs/stt.py b/src/pipecat/services/elevenlabs/stt.py index 7a821304d..c3f4300f4 100644 --- a/src/pipecat/services/elevenlabs/stt.py +++ b/src/pipecat/services/elevenlabs/stt.py @@ -261,28 +261,26 @@ class ElevenLabsSTTService(SegmentedSTTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to SegmentedSTTService. """ + params = params or ElevenLabsSTTService.InputParams() + super().__init__( sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, + settings=ElevenLabsSTTSettings( + model=model, + language=self.language_to_service_language(params.language) + if params.language + else "eng", + tag_audio_events=params.tag_audio_events, + ), **kwargs, ) - params = params or ElevenLabsSTTService.InputParams() - self._api_key = api_key self._base_url = base_url self._session = aiohttp_session self._model_id = model - self._settings = ElevenLabsSTTSettings( - model=model, - language=self.language_to_service_language(params.language) - if params.language - else "eng", - tag_audio_events=params.tag_audio_events, - ) - self._sync_model_name_to_metrics() - def can_generate_metrics(self) -> bool: """Check if the service can generate processing metrics. @@ -500,16 +498,28 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to WebsocketSTTService. """ + params = params or ElevenLabsRealtimeSTTService.InputParams() + super().__init__( sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, keepalive_timeout=10, keepalive_interval=5, + settings=ElevenLabsRealtimeSTTSettings( + model=model, + language=params.language_code, + commit_strategy=params.commit_strategy, + vad_silence_threshold_secs=params.vad_silence_threshold_secs, + vad_threshold=params.vad_threshold, + min_speech_duration_ms=params.min_speech_duration_ms, + min_silence_duration_ms=params.min_silence_duration_ms, + include_timestamps=params.include_timestamps, + enable_logging=params.enable_logging, + include_language_detection=params.include_language_detection, + ), **kwargs, ) - params = params or ElevenLabsRealtimeSTTService.InputParams() - self._api_key = api_key self._base_url = base_url self._model_id = model @@ -519,20 +529,6 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): self._connected_event = asyncio.Event() self._connected_event.set() - self._settings = ElevenLabsRealtimeSTTSettings( - model=model, - language=params.language_code, - commit_strategy=params.commit_strategy, - vad_silence_threshold_secs=params.vad_silence_threshold_secs, - vad_threshold=params.vad_threshold, - min_speech_duration_ms=params.min_speech_duration_ms, - min_silence_duration_ms=params.min_silence_duration_ms, - include_timestamps=params.include_timestamps, - enable_logging=params.enable_logging, - include_language_detection=params.include_language_detection, - ) - self._sync_model_name_to_metrics() - def can_generate_metrics(self) -> bool: """Check if the service can generate processing metrics. diff --git a/src/pipecat/services/elevenlabs/tts.py b/src/pipecat/services/elevenlabs/tts.py index 25e1aa5dd..c68d005f1 100644 --- a/src/pipecat/services/elevenlabs/tts.py +++ b/src/pipecat/services/elevenlabs/tts.py @@ -394,6 +394,8 @@ class ElevenLabsTTSService(AudioContextTTSService): # Finally, ElevenLabs doesn't provide information on when the bot stops # speaking for a while, so we want the parent class to send TTSStopFrame # after a short period not receiving any audio. + params = params or ElevenLabsTTSService.InputParams() + super().__init__( aggregate_sentences=aggregate_sentences, push_text_frames=False, @@ -401,30 +403,27 @@ class ElevenLabsTTSService(AudioContextTTSService): pause_frame_processing=True, supports_word_timestamps=True, sample_rate=sample_rate, + settings=ElevenLabsTTSSettings( + model=model, + voice=voice_id, + language=( + self.language_to_service_language(params.language) if params.language else None + ), + stability=params.stability, + similarity_boost=params.similarity_boost, + style=params.style, + use_speaker_boost=params.use_speaker_boost, + speed=params.speed, + auto_mode=str(params.auto_mode).lower(), + enable_ssml_parsing=params.enable_ssml_parsing, + enable_logging=params.enable_logging, + apply_text_normalization=params.apply_text_normalization, + ), **kwargs, ) - params = params or ElevenLabsTTSService.InputParams() - self._api_key = api_key self._url = url - self._settings = ElevenLabsTTSSettings( - model=model, - voice=voice_id, - language=( - self.language_to_service_language(params.language) if params.language else None - ), - stability=params.stability, - similarity_boost=params.similarity_boost, - style=params.style, - use_speaker_boost=params.use_speaker_boost, - speed=params.speed, - auto_mode=str(params.auto_mode).lower(), - enable_ssml_parsing=params.enable_ssml_parsing, - enable_logging=params.enable_logging, - apply_text_normalization=params.apply_text_normalization, - ) - self._sync_model_name_to_metrics() self._output_format = "" # initialized in start() self._voice_settings = self._set_voice_settings() @@ -910,37 +909,35 @@ class ElevenLabsHttpTTSService(TTSService): aggregate_sentences: Whether to aggregate sentences within the TTSService. **kwargs: Additional arguments passed to the parent service. """ + params = params or ElevenLabsHttpTTSService.InputParams() + super().__init__( aggregate_sentences=aggregate_sentences, push_text_frames=False, push_stop_frames=True, supports_word_timestamps=True, sample_rate=sample_rate, + settings=ElevenLabsHttpTTSSettings( + model=model, + voice=voice_id, + language=self.language_to_service_language(params.language) + if params.language + else None, + optimize_streaming_latency=params.optimize_streaming_latency, + stability=params.stability, + similarity_boost=params.similarity_boost, + style=params.style, + use_speaker_boost=params.use_speaker_boost, + speed=params.speed, + apply_text_normalization=params.apply_text_normalization, + ), **kwargs, ) - params = params or ElevenLabsHttpTTSService.InputParams() - self._api_key = api_key self._base_url = base_url - self._params = params self._session = aiohttp_session - self._settings = ElevenLabsHttpTTSSettings( - model=model, - voice=voice_id, - language=self.language_to_service_language(params.language) - if params.language - else None, - optimize_streaming_latency=params.optimize_streaming_latency, - stability=params.stability, - similarity_boost=params.similarity_boost, - style=params.style, - use_speaker_boost=params.use_speaker_boost, - speed=params.speed, - apply_text_normalization=params.apply_text_normalization, - ) - self._sync_model_name_to_metrics() self._output_format = "" # initialized in start() self._voice_settings = self._set_voice_settings() self._pronunciation_dictionary_locators = params.pronunciation_dictionary_locators diff --git a/src/pipecat/services/fal/image.py b/src/pipecat/services/fal/image.py index fd9d9a22d..c16d31b43 100644 --- a/src/pipecat/services/fal/image.py +++ b/src/pipecat/services/fal/image.py @@ -13,6 +13,7 @@ for creating images from text prompts using various AI models. import asyncio import io import os +from dataclasses import dataclass from typing import AsyncGenerator, Dict, Optional, Union import aiohttp @@ -22,6 +23,7 @@ from pydantic import BaseModel from pipecat.frames.frames import ErrorFrame, Frame, URLImageRawFrame from pipecat.services.image_service import ImageGenService +from pipecat.services.settings import ImageGenSettings try: import fal_client @@ -31,6 +33,15 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class FalImageGenSettings(ImageGenSettings): + """Settings for the Fal image generation service. + + Parameters: + model: Fal.ai model identifier. + """ + + class FalImageGenService(ImageGenService): """Fal's image generation service. @@ -77,9 +88,7 @@ class FalImageGenService(ImageGenService): key: Optional API key for Fal.ai. If provided, sets FAL_KEY environment variable. **kwargs: Additional arguments passed to parent ImageGenService. """ - super().__init__(**kwargs) - self._settings.model = model - self._sync_model_name_to_metrics() + super().__init__(settings=FalImageGenSettings(model=model), **kwargs) self._params = params self._aiohttp_session = aiohttp_session if key: diff --git a/src/pipecat/services/fal/stt.py b/src/pipecat/services/fal/stt.py index 923c3c2ea..bf70c1c2a 100644 --- a/src/pipecat/services/fal/stt.py +++ b/src/pipecat/services/fal/stt.py @@ -207,14 +207,23 @@ class FalSTTService(SegmentedSTTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to SegmentedSTTService. """ + params = params or FalSTTService.InputParams() + super().__init__( sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, + settings=FalSTTSettings( + model=None, + language=self.language_to_service_language(params.language) + if params.language + else "en", + task=params.task, + chunk_level=params.chunk_level, + version=params.version, + ), **kwargs, ) - params = params or FalSTTService.InputParams() - if api_key: os.environ["FAL_KEY"] = api_key elif "FAL_KEY" not in os.environ: @@ -223,15 +232,6 @@ class FalSTTService(SegmentedSTTService): ) self._fal_client = fal_client.AsyncClient(key=api_key or os.getenv("FAL_KEY")) - self._settings = FalSTTSettings( - model=None, - language=self.language_to_service_language(params.language) - if params.language - else "en", - task=params.task, - chunk_level=params.chunk_level, - version=params.version, - ) def can_generate_metrics(self) -> bool: """Check if the service can generate processing metrics. diff --git a/src/pipecat/services/fish/tts.py b/src/pipecat/services/fish/tts.py index 1927b6cac..9f9d753de 100644 --- a/src/pipecat/services/fish/tts.py +++ b/src/pipecat/services/fish/tts.py @@ -138,13 +138,6 @@ class FishAudioTTSService(InterruptibleTTSService): params: Additional input parameters for voice customization. **kwargs: Additional arguments passed to the parent service. """ - super().__init__( - push_stop_frames=True, - pause_frame_processing=True, - sample_rate=sample_rate, - **kwargs, - ) - params = params or FishAudioTTSService.InputParams() # Validation for model and reference_id parameters @@ -169,25 +162,30 @@ class FishAudioTTSService(InterruptibleTTSService): ) reference_id = model + super().__init__( + push_stop_frames=True, + pause_frame_processing=True, + sample_rate=sample_rate, + settings=FishAudioTTSSettings( + model=model_id, + voice=reference_id, + fish_sample_rate=0, + latency=params.latency, + format=output_format, + normalize=params.normalize, + prosody_speed=params.prosody_speed, + prosody_volume=params.prosody_volume, + reference_id=reference_id, + ), + **kwargs, + ) + self._api_key = api_key self._base_url = "wss://api.fish.audio/v1/tts/live" self._websocket = None self._receive_task = None self._request_id = None - self._settings = FishAudioTTSSettings( - model=model_id, - voice=reference_id, - fish_sample_rate=0, - latency=params.latency, - format=output_format, - normalize=params.normalize, - prosody_speed=params.prosody_speed, - prosody_volume=params.prosody_volume, - reference_id=reference_id, - ) - self._sync_model_name_to_metrics() - def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. diff --git a/src/pipecat/services/gladia/stt.py b/src/pipecat/services/gladia/stt.py index c1ce02d87..045a56613 100644 --- a/src/pipecat/services/gladia/stt.py +++ b/src/pipecat/services/gladia/stt.py @@ -278,14 +278,6 @@ class GladiaSTTService(WebsocketSTTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to the STTService parent class. """ - super().__init__( - sample_rate=sample_rate, - ttfs_p99_latency=ttfs_p99_latency, - keepalive_timeout=20, - keepalive_interval=5, - **kwargs, - ) - params = params or GladiaInputParams() if params.language is not None: @@ -308,11 +300,6 @@ class GladiaSTTService(WebsocketSTTService): stacklevel=2, ) - self._api_key = api_key - self._region = region - self._url = url - self._receive_task = None - # Resolve deprecated language → language_config at init time language_config = params.language_config if not language_config and params.language: @@ -320,22 +307,33 @@ class GladiaSTTService(WebsocketSTTService): if language_code: language_config = LanguageConfig(languages=[language_code], code_switching=False) - self._settings = GladiaSTTSettings( - model=model, - language=None, - encoding=params.encoding, - bit_depth=params.bit_depth, - channels=params.channels, - custom_metadata=params.custom_metadata, - endpointing=params.endpointing, - maximum_duration_without_endpointing=params.maximum_duration_without_endpointing, - language_config=language_config, - pre_processing=params.pre_processing, - realtime_processing=params.realtime_processing, - messages_config=params.messages_config, - enable_vad=params.enable_vad, + super().__init__( + sample_rate=sample_rate, + ttfs_p99_latency=ttfs_p99_latency, + keepalive_timeout=20, + keepalive_interval=5, + settings=GladiaSTTSettings( + model=model, + language=None, + encoding=params.encoding, + bit_depth=params.bit_depth, + channels=params.channels, + custom_metadata=params.custom_metadata, + endpointing=params.endpointing, + maximum_duration_without_endpointing=params.maximum_duration_without_endpointing, + language_config=language_config, + pre_processing=params.pre_processing, + realtime_processing=params.realtime_processing, + messages_config=params.messages_config, + enable_vad=params.enable_vad, + ), + **kwargs, ) - self._sync_model_name_to_metrics() + + self._api_key = api_key + self._region = region + self._url = url + self._receive_task = None # Session management self._session_url = None diff --git a/src/pipecat/services/google/gemini_live/llm.py b/src/pipecat/services/google/gemini_live/llm.py index 037b23bb3..d06f941c7 100644 --- a/src/pipecat/services/google/gemini_live/llm.py +++ b/src/pipecat/services/google/gemini_live/llm.py @@ -695,10 +695,38 @@ class GeminiLiveLLMService(LLMService): stacklevel=2, ) - super().__init__(base_url=base_url, **kwargs) - params = params or InputParams() + super().__init__( + base_url=base_url, + settings=GeminiLiveLLMSettings( + model=model, + frequency_penalty=params.frequency_penalty, + max_tokens=params.max_tokens, + presence_penalty=params.presence_penalty, + temperature=params.temperature, + top_k=params.top_k, + top_p=params.top_p, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, + modalities=params.modalities, + language=language_to_gemini_language(params.language) + if params.language + else "en-US", + media_resolution=params.media_resolution, + vad=params.vad, + context_window_compression=params.context_window_compression.model_dump() + if params.context_window_compression + else {}, + thinking=params.thinking or {}, + enable_affective_dialog=params.enable_affective_dialog or False, + proactivity=params.proactivity or {}, + extra=params.extra if isinstance(params.extra, dict) else {}, + ), + **kwargs, + ) + self._last_sent_time = 0 self._base_url = base_url self._voice_id = voice_id @@ -742,31 +770,6 @@ class GeminiLiveLLMService(LLMService): self._consecutive_failures = 0 self._connection_start_time = None - self._settings = GeminiLiveLLMSettings( - model=model, - frequency_penalty=params.frequency_penalty, - max_tokens=params.max_tokens, - presence_penalty=params.presence_penalty, - temperature=params.temperature, - top_k=params.top_k, - top_p=params.top_p, - seed=None, - filter_incomplete_user_turns=False, - user_turn_completion_config=None, - modalities=params.modalities, - language=self._language_code, - media_resolution=params.media_resolution, - vad=params.vad, - context_window_compression=params.context_window_compression.model_dump() - if params.context_window_compression - else {}, - thinking=params.thinking or {}, - enable_affective_dialog=params.enable_affective_dialog or False, - proactivity=params.proactivity or {}, - extra=params.extra if isinstance(params.extra, dict) else {}, - ) - self._sync_model_name_to_metrics() - self._file_api_base_url = file_api_base_url self._file_api: Optional[GeminiFileAPI] = None diff --git a/src/pipecat/services/google/image.py b/src/pipecat/services/google/image.py index f03b1da63..e69faf65e 100644 --- a/src/pipecat/services/google/image.py +++ b/src/pipecat/services/google/image.py @@ -16,6 +16,7 @@ import os # Suppress gRPC fork warnings os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "false" +from dataclasses import dataclass from typing import Any, AsyncGenerator, Optional from loguru import logger @@ -25,6 +26,7 @@ from pydantic import BaseModel, Field from pipecat.frames.frames import ErrorFrame, Frame, URLImageRawFrame from pipecat.services.google.utils import update_google_client_http_options from pipecat.services.image_service import ImageGenService +from pipecat.services.settings import ImageGenSettings try: from google import genai @@ -35,6 +37,15 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class GoogleImageGenSettings(ImageGenSettings): + """Settings for the Google image generation service. + + Parameters: + model: Google Imagen model identifier. + """ + + class GoogleImageGenService(ImageGenService): """Google AI image generation service using Imagen models. @@ -72,17 +83,15 @@ class GoogleImageGenService(ImageGenService): http_options: HTTP options for the client. **kwargs: Additional arguments passed to the parent ImageGenService. """ - super().__init__(**kwargs) - self._params = params or GoogleImageGenService.InputParams() + params = params or GoogleImageGenService.InputParams() + super().__init__(settings=GoogleImageGenSettings(model=params.model), **kwargs) + self._params = params # Add client header http_options = update_google_client_http_options(http_options) self._client = genai.Client(api_key=api_key, http_options=http_options) - self._settings.model = self._params.model - self._sync_model_name_to_metrics() - def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. diff --git a/src/pipecat/services/google/llm.py b/src/pipecat/services/google/llm.py index 1c6f56669..37ccfae9a 100644 --- a/src/pipecat/services/google/llm.py +++ b/src/pipecat/services/google/llm.py @@ -793,29 +793,29 @@ class GoogleLLMService(LLMService): http_options: HTTP options for the client. **kwargs: Additional arguments passed to parent class. """ - super().__init__(**kwargs) - params = params or GoogleLLMService.InputParams() + super().__init__( + settings=GoogleLLMSettings( + model=model, + max_tokens=params.max_tokens, + temperature=params.temperature, + top_k=params.top_k, + top_p=params.top_p, + frequency_penalty=None, + presence_penalty=None, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, + thinking=params.thinking, + extra=params.extra if isinstance(params.extra, dict) else {}, + ), + **kwargs, + ) + self._api_key = api_key self._system_instruction = system_instruction self._http_options = update_google_client_http_options(http_options) - - self._settings = GoogleLLMSettings( - model=model, - max_tokens=params.max_tokens, - temperature=params.temperature, - top_k=params.top_k, - top_p=params.top_p, - frequency_penalty=None, - presence_penalty=None, - seed=None, - filter_incomplete_user_turns=False, - user_turn_completion_config=None, - thinking=params.thinking, - extra=params.extra if isinstance(params.extra, dict) else {}, - ) - self._sync_model_name_to_metrics() self._tools = tools self._tool_config = tool_config diff --git a/src/pipecat/services/google/stt.py b/src/pipecat/services/google/stt.py index ac3afa7a3..95d91d462 100644 --- a/src/pipecat/services/google/stt.py +++ b/src/pipecat/services/google/stt.py @@ -499,10 +499,29 @@ class GoogleSTTService(STTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to STTService. """ - super().__init__(sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, **kwargs) - params = params or GoogleSTTService.InputParams() + super().__init__( + sample_rate=sample_rate, + ttfs_p99_latency=ttfs_p99_latency, + settings=GoogleSTTSettings( + language=None, + languages=list(params.language_list), + language_codes=None, + model=params.model, + use_separate_recognition_per_channel=params.use_separate_recognition_per_channel, + enable_automatic_punctuation=params.enable_automatic_punctuation, + enable_spoken_punctuation=params.enable_spoken_punctuation, + enable_spoken_emojis=params.enable_spoken_emojis, + profanity_filter=params.profanity_filter, + enable_word_time_offsets=params.enable_word_time_offsets, + enable_word_confidence=params.enable_word_confidence, + enable_interim_results=params.enable_interim_results, + enable_voice_activity_events=params.enable_voice_activity_events, + ), + **kwargs, + ) + self._location = location self._stream = None self._config = None @@ -553,22 +572,6 @@ class GoogleSTTService(STTService): self._client = speech_v2.SpeechAsyncClient(credentials=creds, client_options=client_options) - self._settings = GoogleSTTSettings( - language=None, - languages=list(params.language_list), - language_codes=None, - model=params.model, - use_separate_recognition_per_channel=params.use_separate_recognition_per_channel, - enable_automatic_punctuation=params.enable_automatic_punctuation, - enable_spoken_punctuation=params.enable_spoken_punctuation, - enable_spoken_emojis=params.enable_spoken_emojis, - profanity_filter=params.profanity_filter, - enable_word_time_offsets=params.enable_word_time_offsets, - enable_word_confidence=params.enable_word_confidence, - enable_interim_results=params.enable_interim_results, - enable_voice_activity_events=params.enable_voice_activity_events, - ) - def can_generate_metrics(self) -> bool: """Check if the service can generate metrics. diff --git a/src/pipecat/services/google/tts.py b/src/pipecat/services/google/tts.py index 3416ee6d4..80c71b10f 100644 --- a/src/pipecat/services/google/tts.py +++ b/src/pipecat/services/google/tts.py @@ -602,25 +602,28 @@ class GoogleHttpTTSService(TTSService): params: Voice customization parameters including pitch, rate, volume, etc. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) - params = params or GoogleHttpTTSService.InputParams() - self._location = location - self._settings = GoogleHttpTTSSettings( - model=None, - pitch=params.pitch, - rate=params.rate, - speaking_rate=params.speaking_rate, - volume=params.volume, - emphasis=params.emphasis, - language=self.language_to_service_language(params.language) - if params.language - else "en-US", - gender=params.gender, - google_style=params.google_style, - voice=voice_id, + super().__init__( + sample_rate=sample_rate, + settings=GoogleHttpTTSSettings( + model=None, + pitch=params.pitch, + rate=params.rate, + speaking_rate=params.speaking_rate, + volume=params.volume, + emphasis=params.emphasis, + language=self.language_to_service_language(params.language) + if params.language + else "en-US", + gender=params.gender, + google_style=params.google_style, + voice=voice_id, + ), + **kwargs, ) + + self._location = location self._client: texttospeech_v1.TextToSpeechAsyncClient = self._create_client( credentials, credentials_path ) @@ -1016,19 +1019,22 @@ class GoogleTTSService(GoogleBaseTTSService): params: Language configuration parameters. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) - params = params or GoogleTTSService.InputParams() - self._location = location - self._settings = GoogleStreamTTSSettings( - model=None, - language=self.language_to_service_language(params.language) - if params.language - else "en-US", - speaking_rate=params.speaking_rate, - voice=voice_id, + super().__init__( + sample_rate=sample_rate, + settings=GoogleStreamTTSSettings( + model=None, + language=self.language_to_service_language(params.language) + if params.language + else "en-US", + speaking_rate=params.speaking_rate, + voice=voice_id, + ), + **kwargs, ) + + self._location = location self._voice_cloning_key = voice_cloning_key self._client: texttospeech_v1.TextToSpeechAsyncClient = self._create_client( credentials, credentials_path @@ -1222,26 +1228,28 @@ class GeminiTTSService(GoogleBaseTTSService): f"Google TTS only supports {self.GOOGLE_SAMPLE_RATE}Hz sample rate. " f"Current rate of {sample_rate}Hz may cause issues." ) - super().__init__(sample_rate=sample_rate, **kwargs) - params = params or GeminiTTSService.InputParams() if voice_id not in self.AVAILABLE_VOICES: logger.warning(f"Voice '{voice_id}' not in known voices list. Using anyway.") - self._location = location - self._model = model - self._settings = GeminiTTSSettings( - model=None, - language=self.language_to_service_language(params.language) - if params.language - else "en-US", - prompt=params.prompt, - multi_speaker=params.multi_speaker, - speaker_configs=params.speaker_configs, - voice=voice_id, + super().__init__( + sample_rate=sample_rate, + settings=GeminiTTSSettings( + model=None, + language=self.language_to_service_language(params.language) + if params.language + else "en-US", + prompt=params.prompt, + multi_speaker=params.multi_speaker, + speaker_configs=params.speaker_configs, + voice=voice_id, + ), + **kwargs, ) + self._location = location + self._model = model self._client: texttospeech_v1.TextToSpeechAsyncClient = self._create_client( credentials, credentials_path ) diff --git a/src/pipecat/services/gradium/stt.py b/src/pipecat/services/gradium/stt.py index 4ec7bf6ff..ac35c6e52 100644 --- a/src/pipecat/services/gradium/stt.py +++ b/src/pipecat/services/gradium/stt.py @@ -129,8 +129,6 @@ class GradiumSTTService(WebsocketSTTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to parent STTService class. """ - super().__init__(sample_rate=SAMPLE_RATE, ttfs_p99_latency=ttfs_p99_latency, **kwargs) - if json_config is not None: import warnings @@ -140,19 +138,24 @@ class GradiumSTTService(WebsocketSTTService): stacklevel=2, ) + params = params or GradiumSTTService.InputParams() + + super().__init__( + sample_rate=SAMPLE_RATE, + ttfs_p99_latency=ttfs_p99_latency, + settings=GradiumSTTSettings( + model=None, + language=params.language, + delay_in_frames=params.delay_in_frames or None, + ), + **kwargs, + ) + self._api_key = api_key self._api_endpoint_base_url = api_endpoint_base_url self._websocket = None self._json_config = json_config - params = params or GradiumSTTService.InputParams() - - self._settings = GradiumSTTSettings( - model=None, - language=params.language, - delay_in_frames=params.delay_in_frames or None, - ) - self._receive_task = None self._audio_buffer = bytearray() diff --git a/src/pipecat/services/gradium/tts.py b/src/pipecat/services/gradium/tts.py index ee6e6821e..c8a83a7f2 100644 --- a/src/pipecat/services/gradium/tts.py +++ b/src/pipecat/services/gradium/tts.py @@ -85,27 +85,27 @@ class GradiumTTSService(AudioContextTTSService): params: Additional configuration parameters. **kwargs: Additional arguments passed to parent class. """ + params = params or GradiumTTSService.InputParams() + super().__init__( push_stop_frames=True, push_text_frames=False, pause_frame_processing=True, supports_word_timestamps=True, sample_rate=SAMPLE_RATE, + settings=GradiumTTSSettings( + model=model, + voice=voice_id, + language=None, + output_format="pcm", + ), **kwargs, ) - params = params or GradiumTTSService.InputParams() - # Store service configuration self._api_key = api_key self._url = url self._json_config = json_config - self._settings = GradiumTTSSettings( - model=model, - voice=voice_id, - language=None, - output_format="pcm", - ) # State tracking self._receive_task = None diff --git a/src/pipecat/services/grok/realtime/llm.py b/src/pipecat/services/grok/realtime/llm.py index 4f6a62e24..c522c6c6f 100644 --- a/src/pipecat/services/grok/realtime/llm.py +++ b/src/pipecat/services/grok/realtime/llm.py @@ -145,25 +145,27 @@ class GrokRealtimeLLMService(LLMService): start_audio_paused: Whether to start with audio input paused. Defaults to False. **kwargs: Additional arguments passed to parent LLMService. """ - super().__init__(base_url=base_url, **kwargs) + super().__init__( + base_url=base_url, + settings=GrokRealtimeLLMSettings( + model=None, + temperature=None, + max_tokens=None, + top_p=None, + top_k=None, + frequency_penalty=None, + presence_penalty=None, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, + session_properties=session_properties or events.SessionProperties(), + ), + **kwargs, + ) self.api_key = api_key self.base_url = base_url - self._settings = GrokRealtimeLLMSettings( - model=None, - temperature=None, - max_tokens=None, - top_p=None, - top_k=None, - frequency_penalty=None, - presence_penalty=None, - seed=None, - filter_incomplete_user_turns=False, - user_turn_completion_config=None, - session_properties=session_properties or events.SessionProperties(), - ) - self._audio_input_paused = start_audio_paused self._websocket = None self._receive_task = None diff --git a/src/pipecat/services/groq/tts.py b/src/pipecat/services/groq/tts.py index cc073f8c7..7e4d40dba 100644 --- a/src/pipecat/services/groq/tts.py +++ b/src/pipecat/services/groq/tts.py @@ -99,27 +99,24 @@ class GroqTTSService(TTSService): if sample_rate != self.GROQ_SAMPLE_RATE: logger.warning(f"Groq TTS only supports {self.GROQ_SAMPLE_RATE}Hz sample rate. ") + params = params or GroqTTSService.InputParams() + super().__init__( pause_frame_processing=True, sample_rate=sample_rate, + settings=GroqTTSSettings( + model=model_name, + voice=voice_id, + language=str(params.language) if params.language else "en", + output_format=output_format, + speed=params.speed, + groq_sample_rate=sample_rate, + ), **kwargs, ) - params = params or GroqTTSService.InputParams() - self._api_key = api_key self._output_format = output_format - self._params = params - - self._settings = GroqTTSSettings( - model=model_name, - voice=voice_id, - language=str(params.language) if params.language else "en", - output_format=output_format, - speed=params.speed, - groq_sample_rate=sample_rate, - ) - self._sync_model_name_to_metrics() self._client = AsyncGroq(api_key=self._api_key) diff --git a/src/pipecat/services/hathora/stt.py b/src/pipecat/services/hathora/stt.py index a08a80aa2..84f4116f3 100644 --- a/src/pipecat/services/hathora/stt.py +++ b/src/pipecat/services/hathora/stt.py @@ -89,24 +89,23 @@ class HathoraSTTService(SegmentedSTTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to the parent class. """ + params = params or HathoraSTTService.InputParams() + super().__init__( sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, + settings=HathoraSTTSettings( + model=model, + language=params.language, + config=params.config, + ), **kwargs, ) + self._model = model self._api_key = api_key or os.getenv("HATHORA_API_KEY") self._base_url = base_url - params = params or HathoraSTTService.InputParams() - - self._settings = HathoraSTTSettings( - model=model, - language=params.language, - config=params.config, - ) - self._sync_model_name_to_metrics() - def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. diff --git a/src/pipecat/services/hathora/tts.py b/src/pipecat/services/hathora/tts.py index 1e7662aab..e16aa3b08 100644 --- a/src/pipecat/services/hathora/tts.py +++ b/src/pipecat/services/hathora/tts.py @@ -107,24 +107,22 @@ class HathoraTTSService(TTSService): params: Configuration parameters. **kwargs: Additional arguments passed to the parent class. """ + params = params or HathoraTTSService.InputParams() + super().__init__( sample_rate=sample_rate, + settings=HathoraTTSSettings( + model=model, + voice=voice_id, + speed=params.speed, + config=params.config, + ), **kwargs, ) self._model = model self._api_key = api_key or os.getenv("HATHORA_API_KEY") self._base_url = base_url - params = params or HathoraTTSService.InputParams() - - self._settings = HathoraTTSSettings( - model=model, - voice=voice_id, - speed=params.speed, - config=params.config, - ) - self._sync_model_name_to_metrics() - def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. diff --git a/src/pipecat/services/hume/tts.py b/src/pipecat/services/hume/tts.py index 3fb43ff88..2a075ab36 100644 --- a/src/pipecat/services/hume/tts.py +++ b/src/pipecat/services/hume/tts.py @@ -121,11 +121,21 @@ class HumeTTSService(TTSService): f"Hume TTS streams at {HUME_SAMPLE_RATE} Hz; configured sample_rate={sample_rate}" ) + params = params or HumeTTSService.InputParams() + super().__init__( sample_rate=sample_rate, push_text_frames=False, push_stop_frames=True, supports_word_timestamps=True, + settings=HumeTTSSettings( + model=None, + voice=voice_id, + language=None, # Not applicable here + description=params.description, + speed=params.speed, + trailing_silence=params.trailing_silence, + ), **kwargs, ) @@ -135,15 +145,6 @@ class HumeTTSService(TTSService): self._client = AsyncHumeClient(api_key=api_key, httpx_client=self._http_client) - params = params or HumeTTSService.InputParams() - self._settings = HumeTTSSettings( - model=None, - voice=voice_id, - description=params.description, - speed=params.speed, - trailing_silence=params.trailing_silence, - ) - self._audio_bytes = b"" # Track cumulative time for word timestamps across utterances diff --git a/src/pipecat/services/image_service.py b/src/pipecat/services/image_service.py index 58ab58fa4..f99909444 100644 --- a/src/pipecat/services/image_service.py +++ b/src/pipecat/services/image_service.py @@ -11,11 +11,12 @@ text prompts into images. """ from abc import abstractmethod -from typing import AsyncGenerator +from typing import AsyncGenerator, Optional from pipecat.frames.frames import Frame, TextFrame from pipecat.processors.frame_processor import FrameDirection from pipecat.services.ai_service import AIService +from pipecat.services.settings import ImageGenSettings class ImageGenService(AIService): @@ -26,13 +27,20 @@ class ImageGenService(AIService): generation functionality using their specific AI service. """ - def __init__(self, **kwargs): + def __init__(self, *, settings: Optional[ImageGenSettings] = None, **kwargs): """Initialize the image generation service. Args: + settings: The runtime-updatable settings for the image generation service. **kwargs: Additional arguments passed to the parent AIService. """ - super().__init__(**kwargs) + super().__init__( + settings=settings + # Here in case subclass doesn't implement more specific settings + # (which hopefully should be rare) + or ImageGenSettings(), + **kwargs, + ) # Renders the image. Returns an Image object. @abstractmethod diff --git a/src/pipecat/services/inworld/tts.py b/src/pipecat/services/inworld/tts.py index 22bdf22ff..2fb86b4a6 100644 --- a/src/pipecat/services/inworld/tts.py +++ b/src/pipecat/services/inworld/tts.py @@ -150,16 +150,28 @@ class InworldHttpTTSService(TTSService): params: Input parameters for Inworld TTS configuration. **kwargs: Additional arguments passed to the parent class. """ + params = params or InworldHttpTTSService.InputParams() + super().__init__( push_text_frames=False, push_stop_frames=True, supports_word_timestamps=True, sample_rate=sample_rate, + settings=InworldTTSSettings( + model=model, + voice=voice_id, + language=None, + audio_encoding=encoding, + audio_sample_rate=0, + speaking_rate=params.speaking_rate, + temperature=params.temperature, + timestamp_transport_strategy=params.timestamp_transport_strategy, + auto_mode=None, # Not applicable for HTTP TTS + apply_text_normalization=None, # Not applicable for HTTP TTS + ), **kwargs, ) - params = params or InworldHttpTTSService.InputParams() - self._api_key = api_key self._session = aiohttp_session self._streaming = streaming @@ -170,23 +182,8 @@ class InworldHttpTTSService(TTSService): else: self._base_url = "https://api.inworld.ai/tts/v1/voice" - self._settings = InworldTTSSettings( - model=model, - voice=voice_id, - language=None, - audio_encoding=encoding, - audio_sample_rate=0, - speaking_rate=params.speaking_rate, - temperature=params.temperature, - timestamp_transport_strategy=params.timestamp_transport_strategy, - auto_mode=None, # Not applicable for HTTP TTS - apply_text_normalization=None, # Not applicable for HTTP TTS - ) - self._cumulative_time = 0.0 - self._sync_model_name_to_metrics() - def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -530,6 +527,8 @@ class InworldTTSService(AudioContextTTSService): append_trailing_space: Whether to append a trailing space to text before sending to TTS. **kwargs: Additional arguments passed to the parent class. """ + params = params or InworldTTSService.InputParams() + super().__init__( push_text_frames=False, push_stop_frames=True, @@ -538,25 +537,23 @@ class InworldTTSService(AudioContextTTSService): sample_rate=sample_rate, aggregate_sentences=aggregate_sentences, append_trailing_space=append_trailing_space, + settings=InworldTTSSettings( + model=model, + voice=voice_id, + language=None, + audio_encoding=encoding, + audio_sample_rate=0, + speaking_rate=params.speaking_rate, + temperature=params.temperature, + apply_text_normalization=params.apply_text_normalization, + timestamp_transport_strategy=params.timestamp_transport_strategy, + auto_mode=params.auto_mode if params.auto_mode is not None else aggregate_sentences, + ), **kwargs, ) - params = params or InworldTTSService.InputParams() - self._api_key = api_key self._url = url - self._settings = InworldTTSSettings( - model=model, - voice=voice_id, - language=None, - audio_encoding=encoding, - audio_sample_rate=0, - speaking_rate=params.speaking_rate, - temperature=params.temperature, - apply_text_normalization=params.apply_text_normalization, - timestamp_transport_strategy=params.timestamp_transport_strategy, - auto_mode=params.auto_mode if params.auto_mode is not None else aggregate_sentences, - ) self._timestamp_type = "WORD" self._buffer_settings = { @@ -575,8 +572,6 @@ class InworldTTSService(AudioContextTTSService): # Track the end time of the last word in the current generation self._generation_end_time = 0.0 - self._sync_model_name_to_metrics() - def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. diff --git a/src/pipecat/services/kokoro/tts.py b/src/pipecat/services/kokoro/tts.py index 519c565ba..4b35fa46d 100644 --- a/src/pipecat/services/kokoro/tts.py +++ b/src/pipecat/services/kokoro/tts.py @@ -137,19 +137,20 @@ class KokoroTTSService(TTSService): **kwargs: Additional arguments passed to parent `TTSService`. """ - super().__init__(**kwargs) - params = params or KokoroTTSService.InputParams() - self._lang_code = language_to_kokoro_language(params.language) - - self._settings = KokoroTTSSettings( - model=None, - voice=voice_id, - language=language_to_kokoro_language(params.language), - lang_code=language_to_kokoro_language(params.language), + super().__init__( + settings=KokoroTTSSettings( + model=None, + voice=voice_id, + language=language_to_kokoro_language(params.language), + lang_code=language_to_kokoro_language(params.language), + ), + **kwargs, ) + self._lang_code = language_to_kokoro_language(params.language) + model = Path(model_path) if model_path else KOKORO_CACHE_DIR / "kokoro-v1.0.onnx" voices = Path(voices_path) if voices_path else KOKORO_CACHE_DIR / "voices-v1.0.bin" diff --git a/src/pipecat/services/llm_service.py b/src/pipecat/services/llm_service.py index 1102e85a1..a06423754 100644 --- a/src/pipecat/services/llm_service.py +++ b/src/pipecat/services/llm_service.py @@ -181,7 +181,11 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): adapter_class: Type[BaseLLMAdapter] = OpenAILLMAdapter def __init__( - self, run_in_parallel: bool = True, function_call_timeout_secs: float = 10.0, **kwargs + self, + run_in_parallel: bool = True, + function_call_timeout_secs: float = 10.0, + settings: Optional[LLMSettings] = None, + **kwargs, ): """Initialize the LLM service. @@ -190,10 +194,17 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): Defaults to True. function_call_timeout_secs: Timeout in seconds for deferred function calls. Defaults to 10.0 seconds. + settings: The runtime-updatable settings for the LLM service. **kwargs: Additional arguments passed to the parent AIService. """ - super().__init__(**kwargs) + super().__init__( + settings=settings + # Here in case subclass doesn't implement more specific settings + # (which hopefully should be rare) + or LLMSettings(), + **kwargs, + ) self._run_in_parallel = run_in_parallel self._function_call_timeout_secs = function_call_timeout_secs self._filter_incomplete_user_turns: bool = False @@ -204,7 +215,6 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): self._sequential_runner_task: Optional[asyncio.Task] = None self._skip_tts: Optional[bool] = None self._summary_task: Optional[asyncio.Task] = None - self._settings = LLMSettings() # Here in case subclass doesn't implement more specific settings (hopefully shouldn't happen) self._register_event_handler("on_function_calls_started") self._register_event_handler("on_completion_timeout") diff --git a/src/pipecat/services/lmnt/tts.py b/src/pipecat/services/lmnt/tts.py index b7ebb19ea..a2c500ca2 100644 --- a/src/pipecat/services/lmnt/tts.py +++ b/src/pipecat/services/lmnt/tts.py @@ -118,17 +118,16 @@ class LmntTTSService(InterruptibleTTSService): push_stop_frames=True, pause_frame_processing=True, sample_rate=sample_rate, + settings=LmntTTSSettings( + model=model, + voice=voice_id, + language=self.language_to_service_language(language), + format="raw", + ), **kwargs, ) self._api_key = api_key - self._settings = LmntTTSSettings( - model=model, - voice=voice_id, - language=self.language_to_service_language(language), - format="raw", - ) - self._sync_model_name_to_metrics() self._receive_task = None self._context_id: Optional[str] = None diff --git a/src/pipecat/services/minimax/tts.py b/src/pipecat/services/minimax/tts.py index 388b7ee9e..116d24a34 100644 --- a/src/pipecat/services/minimax/tts.py +++ b/src/pipecat/services/minimax/tts.py @@ -227,35 +227,35 @@ class MiniMaxHttpTTSService(TTSService): params: Additional configuration parameters. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) - params = params or MiniMaxHttpTTSService.InputParams() + super().__init__( + sample_rate=sample_rate, + settings=MiniMaxTTSSettings( + model=model, + voice=voice_id, + language=None, + stream=True, + speed=params.speed, + volume=params.volume, + pitch=params.pitch, + language_boost=None, + emotion=None, + text_normalization=None, + latex_read=None, + audio_bitrate=128000, + audio_format="pcm", + audio_channel=1, + audio_sample_rate=0, + ), + **kwargs, + ) + self._api_key = api_key self._group_id = group_id self._base_url = f"{base_url}?GroupId={group_id}" self._session = aiohttp_session - # Create voice settings - self._settings = MiniMaxTTSSettings( - model=model, - voice=voice_id, - language=None, - stream=True, - speed=params.speed, - volume=params.volume, - pitch=params.pitch, - language_boost=None, - emotion=None, - text_normalization=None, - latex_read=None, - audio_bitrate=128000, - audio_format="pcm", - audio_channel=1, - audio_sample_rate=0, - ) - self._sync_model_name_to_metrics() - # Add language boost if provided if params.language: service_lang = self.language_to_service_language(params.language) diff --git a/src/pipecat/services/moondream/vision.py b/src/pipecat/services/moondream/vision.py index 16be15ac5..53b98b77a 100644 --- a/src/pipecat/services/moondream/vision.py +++ b/src/pipecat/services/moondream/vision.py @@ -11,6 +11,7 @@ for image analysis and description generation. """ import asyncio +from dataclasses import dataclass from typing import AsyncGenerator, Optional from loguru import logger @@ -24,6 +25,7 @@ from pipecat.frames.frames import ( VisionFullResponseStartFrame, VisionTextFrame, ) +from pipecat.services.settings import VisionSettings from pipecat.services.vision_service import VisionService try: @@ -60,6 +62,15 @@ def detect_device(): return torch.device("cpu"), torch.float32 +@dataclass +class MoondreamSettings(VisionSettings): + """Settings for the Moondream vision service. + + Parameters: + model: Moondream model identifier. + """ + + class MoondreamService(VisionService): """Moondream vision-language model service. @@ -79,10 +90,7 @@ class MoondreamService(VisionService): use_cpu: Whether to force CPU usage instead of hardware acceleration. **kwargs: Additional arguments passed to the parent VisionService. """ - super().__init__(**kwargs) - - self._settings.model = model - self._sync_model_name_to_metrics() + super().__init__(settings=MoondreamSettings(model=model), **kwargs) if not use_cpu: device, dtype = detect_device() diff --git a/src/pipecat/services/neuphonic/tts.py b/src/pipecat/services/neuphonic/tts.py index e076958c4..81b366a8b 100644 --- a/src/pipecat/services/neuphonic/tts.py +++ b/src/pipecat/services/neuphonic/tts.py @@ -134,26 +134,26 @@ class NeuphonicTTSService(InterruptibleTTSService): aggregate_sentences: Whether to aggregate sentences within the TTSService. **kwargs: Additional arguments passed to parent InterruptibleTTSService. """ + params = params or NeuphonicTTSService.InputParams() + super().__init__( aggregate_sentences=aggregate_sentences, push_stop_frames=True, stop_frame_timeout_s=2.0, sample_rate=sample_rate, + settings=NeuphonicTTSSettings( + model=None, + language=self.language_to_service_language(params.language), + speed=params.speed, + encoding=encoding, + sampling_rate=sample_rate, + voice=voice_id, + ), **kwargs, ) - params = params or NeuphonicTTSService.InputParams() - self._api_key = api_key self._url = url - self._settings = NeuphonicTTSSettings( - model=None, - language=self.language_to_service_language(params.language), - speed=params.speed, - encoding=encoding, - sampling_rate=sample_rate, - voice=voice_id, - ) self._cumulative_time = 0 @@ -443,21 +443,24 @@ class NeuphonicHttpTTSService(TTSService): params: Additional input parameters for TTS configuration. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) - params = params or NeuphonicHttpTTSService.InputParams() + super().__init__( + sample_rate=sample_rate, + settings=NeuphonicTTSSettings( + model=None, + voice=voice_id, + language=self.language_to_service_language(params.language) or "en", + speed=params.speed, + encoding=encoding, + sampling_rate=sample_rate, + ), + **kwargs, + ) + self._api_key = api_key self._session = aiohttp_session self._base_url = url.rstrip("/") - self._settings = NeuphonicTTSSettings( - model=None, - voice=voice_id, - language=self.language_to_service_language(params.language) or "en", - speed=params.speed, - encoding=encoding, - sampling_rate=sample_rate, - ) def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. diff --git a/src/pipecat/services/nvidia/stt.py b/src/pipecat/services/nvidia/stt.py index 3bbe04f51..950515096 100644 --- a/src/pipecat/services/nvidia/stt.py +++ b/src/pipecat/services/nvidia/stt.py @@ -164,10 +164,18 @@ class NvidiaSTTService(STTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to STTService. """ - super().__init__(sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, **kwargs) - params = params or NvidiaSTTService.InputParams() + super().__init__( + sample_rate=sample_rate, + ttfs_p99_latency=ttfs_p99_latency, + settings=NvidiaSTTSettings( + model=model_function_map.get("model_name"), + language=params.language, + ), + **kwargs, + ) + self._server = server self._api_key = api_key self._use_ssl = use_ssl @@ -180,12 +188,6 @@ class NvidiaSTTService(STTService): self._custom_configuration = "" self._function_id = model_function_map.get("function_id") - self._settings = NvidiaSTTSettings( - model=model_function_map.get("model_name"), - language=params.language, - ) - self._sync_model_name_to_metrics() - self._asr_service = None self._queue = None self._config = None @@ -463,10 +465,24 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to SegmentedSTTService """ - super().__init__(sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, **kwargs) - params = params or NvidiaSegmentedSTTService.InputParams() + super().__init__( + sample_rate=sample_rate, + ttfs_p99_latency=ttfs_p99_latency, + settings=NvidiaSegmentedSTTSettings( + model=model_function_map.get("model_name"), + language=self.language_to_service_language(params.language or Language.EN_US) + or "en-US", + profanity_filter=params.profanity_filter, + automatic_punctuation=params.automatic_punctuation, + verbatim_transcripts=params.verbatim_transcripts, + boosted_lm_words=params.boosted_lm_words, + boosted_lm_score=params.boosted_lm_score, + ), + **kwargs, + ) + # Initialize NVIDIA Riva settings self._api_key = api_key self._server = server @@ -484,17 +500,6 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): self._config = None self._asr_service = None - self._settings = NvidiaSegmentedSTTSettings( - model=model_function_map.get("model_name"), - language=self.language_to_service_language(params.language or Language.EN_US) - or "en-US", - profanity_filter=params.profanity_filter, - automatic_punctuation=params.automatic_punctuation, - verbatim_transcripts=params.verbatim_transcripts, - boosted_lm_words=params.boosted_lm_words, - boosted_lm_score=params.boosted_lm_score, - ) - self._sync_model_name_to_metrics() def language_to_service_language(self, language: Language) -> Optional[str]: """Convert pipecat Language enum to NVIDIA Riva's language code. diff --git a/src/pipecat/services/nvidia/tts.py b/src/pipecat/services/nvidia/tts.py index c6a5f371e..6785e9631 100644 --- a/src/pipecat/services/nvidia/tts.py +++ b/src/pipecat/services/nvidia/tts.py @@ -103,21 +103,23 @@ class NvidiaTTSService(TTSService): use_ssl: Whether to use SSL for the NVIDIA Riva server. Defaults to True. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) - params = params or NvidiaTTSService.InputParams() + super().__init__( + sample_rate=sample_rate, + settings=NvidiaTTSSettings( + model=model_function_map.get("model_name"), + voice=voice_id, + language=params.language, + quality=params.quality, + ), + **kwargs, + ) + self._server = server self._api_key = api_key self._function_id = model_function_map.get("function_id") self._use_ssl = use_ssl - self._settings = NvidiaTTSSettings( - model=model_function_map.get("model_name"), - voice=voice_id, - language=params.language, - quality=params.quality, - ) - self._sync_model_name_to_metrics() self._service = None self._config = None diff --git a/src/pipecat/services/openai/base_llm.py b/src/pipecat/services/openai/base_llm.py index 9ba0583a1..40a2672f8 100644 --- a/src/pipecat/services/openai/base_llm.py +++ b/src/pipecat/services/openai/base_llm.py @@ -133,28 +133,28 @@ class BaseOpenAILLMService(LLMService): retry_on_timeout: Whether to retry the request once if it times out. **kwargs: Additional arguments passed to the parent LLMService. """ - super().__init__(**kwargs) - params = params or BaseOpenAILLMService.InputParams() - self._settings = OpenAILLMSettings( - model=model, - frequency_penalty=params.frequency_penalty, - presence_penalty=params.presence_penalty, - seed=params.seed, - temperature=params.temperature, - top_p=params.top_p, - top_k=None, - max_tokens=params.max_tokens, - max_completion_tokens=params.max_completion_tokens, - service_tier=params.service_tier, - filter_incomplete_user_turns=False, - user_turn_completion_config=None, - extra=params.extra if isinstance(params.extra, dict) else {}, + super().__init__( + settings=OpenAILLMSettings( + model=model, + frequency_penalty=params.frequency_penalty, + presence_penalty=params.presence_penalty, + seed=params.seed, + temperature=params.temperature, + top_p=params.top_p, + top_k=None, + max_tokens=params.max_tokens, + max_completion_tokens=params.max_completion_tokens, + service_tier=params.service_tier, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, + extra=params.extra if isinstance(params.extra, dict) else {}, + ), + **kwargs, ) self._retry_timeout_secs = retry_timeout_secs self._retry_on_timeout = retry_on_timeout - self._sync_model_name_to_metrics() self._full_model_name: str = "" self._client = self.create_client( api_key=api_key, diff --git a/src/pipecat/services/openai/image.py b/src/pipecat/services/openai/image.py index 36efc5987..f35a5ded8 100644 --- a/src/pipecat/services/openai/image.py +++ b/src/pipecat/services/openai/image.py @@ -11,6 +11,7 @@ for creating images from text prompts. """ import io +from dataclasses import dataclass from typing import AsyncGenerator, Literal, Optional import aiohttp @@ -24,6 +25,16 @@ from pipecat.frames.frames import ( URLImageRawFrame, ) from pipecat.services.image_service import ImageGenService +from pipecat.services.settings import ImageGenSettings + + +@dataclass +class OpenAIImageGenSettings(ImageGenSettings): + """Settings for the OpenAI image generation service. + + Parameters: + model: DALL-E model identifier. + """ class OpenAIImageGenService(ImageGenService): @@ -52,9 +63,7 @@ class OpenAIImageGenService(ImageGenService): image_size: Target size for generated images. model: DALL-E model to use for generation. Defaults to "dall-e-3". """ - super().__init__() - self._settings.model = model - self._sync_model_name_to_metrics() + super().__init__(settings=OpenAIImageGenSettings(model=model)) self._image_size = image_size self._client = AsyncOpenAI(api_key=api_key, base_url=base_url) self._aiohttp_session = aiohttp_session diff --git a/src/pipecat/services/openai/realtime/llm.py b/src/pipecat/services/openai/realtime/llm.py index a68304fdc..6665a9a75 100644 --- a/src/pipecat/services/openai/realtime/llm.py +++ b/src/pipecat/services/openai/realtime/llm.py @@ -171,25 +171,26 @@ class OpenAIRealtimeLLMService(LLMService): # Build WebSocket URL with model query parameter # Source: https://platform.openai.com/docs/guides/realtime-websocket full_url = f"{base_url}?model={model}" - super().__init__(base_url=full_url, **kwargs) + super().__init__( + base_url=full_url, + settings=OpenAIRealtimeLLMSettings( + model=model, + temperature=None, + max_tokens=None, + top_p=None, + top_k=None, + frequency_penalty=None, + presence_penalty=None, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, + session_properties=session_properties or events.SessionProperties(), + ), + **kwargs, + ) self.api_key = api_key self.base_url = full_url - - self._settings = OpenAIRealtimeLLMSettings( - model=model, - temperature=None, - max_tokens=None, - top_p=None, - top_k=None, - frequency_penalty=None, - presence_penalty=None, - seed=None, - filter_incomplete_user_turns=False, - user_turn_completion_config=None, - session_properties=session_properties or events.SessionProperties(), - ) - self._sync_model_name_to_metrics() self._audio_input_paused = start_audio_paused self._video_input_paused = start_video_paused self._video_frame_detail = video_frame_detail diff --git a/src/pipecat/services/openai/stt.py b/src/pipecat/services/openai/stt.py index 8b690d015..9a52be114 100644 --- a/src/pipecat/services/openai/stt.py +++ b/src/pipecat/services/openai/stt.py @@ -221,6 +221,11 @@ class OpenAIRealtimeSTTService(WebsocketSTTService): super().__init__( ttfs_p99_latency=ttfs_p99_latency, + settings=OpenAIRealtimeSTTSettings( + model=model, + language=language, + prompt=prompt, + ), **kwargs, ) @@ -232,13 +237,6 @@ class OpenAIRealtimeSTTService(WebsocketSTTService): self._noise_reduction = noise_reduction self._should_interrupt = should_interrupt - self._settings = OpenAIRealtimeSTTSettings( - model=model, - language=language, - prompt=prompt, - ) - self._sync_model_name_to_metrics() - self._receive_task = None self._session_ready = False self._resampler = create_stream_resampler() diff --git a/src/pipecat/services/openai/tts.py b/src/pipecat/services/openai/tts.py index 2693bcc27..f95d79134 100644 --- a/src/pipecat/services/openai/tts.py +++ b/src/pipecat/services/openai/tts.py @@ -132,10 +132,6 @@ class OpenAITTSService(TTSService): f"OpenAI TTS only supports {self.OPENAI_SAMPLE_RATE}Hz sample rate. " f"Current rate of {sample_rate}Hz may cause issues." ) - super().__init__(sample_rate=sample_rate, **kwargs) - - self._client = AsyncOpenAI(api_key=api_key, base_url=base_url) - if instructions or speed: import warnings @@ -147,13 +143,18 @@ class OpenAITTSService(TTSService): stacklevel=2, ) - self._settings = OpenAITTSSettings( - model=model, - voice=voice, - instructions=params.instructions if params else instructions, - speed=params.speed if params else speed, + super().__init__( + sample_rate=sample_rate, + settings=OpenAITTSSettings( + model=model, + voice=voice, + instructions=params.instructions if params else instructions, + speed=params.speed if params else speed, + ), + **kwargs, ) - self._sync_model_name_to_metrics() + + self._client = AsyncOpenAI(api_key=api_key, base_url=base_url) def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. diff --git a/src/pipecat/services/openai_realtime_beta/openai.py b/src/pipecat/services/openai_realtime_beta/openai.py index 6ffccfbef..983cd10df 100644 --- a/src/pipecat/services/openai_realtime_beta/openai.py +++ b/src/pipecat/services/openai_realtime_beta/openai.py @@ -156,25 +156,26 @@ class OpenAIRealtimeBetaLLMService(LLMService): ) full_url = f"{base_url}?model={model}" - super().__init__(base_url=full_url, **kwargs) + super().__init__( + base_url=full_url, + settings=OpenAIRealtimeBetaLLMSettings( + model=model, + temperature=None, + max_tokens=None, + top_p=None, + top_k=None, + frequency_penalty=None, + presence_penalty=None, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, + session_properties=session_properties or events.SessionProperties(), + ), + **kwargs, + ) self.api_key = api_key self.base_url = full_url - - self._settings = OpenAIRealtimeBetaLLMSettings( - model=model, - temperature=None, - max_tokens=None, - top_p=None, - top_k=None, - frequency_penalty=None, - presence_penalty=None, - seed=None, - filter_incomplete_user_turns=False, - user_turn_completion_config=None, - session_properties=session_properties or events.SessionProperties(), - ) - self._sync_model_name_to_metrics() self._audio_input_paused = start_audio_paused self._send_transcription_frames = send_transcription_frames self._websocket = None diff --git a/src/pipecat/services/piper/tts.py b/src/pipecat/services/piper/tts.py index e6a2c6943..c4831b839 100644 --- a/src/pipecat/services/piper/tts.py +++ b/src/pipecat/services/piper/tts.py @@ -69,9 +69,10 @@ class PiperTTSService(TTSService): use_cuda: Use CUDA for GPU-accelerated inference. **kwargs: Additional arguments passed to the parent `TTSService`. """ - super().__init__(**kwargs) - - self._settings = PiperTTSSettings(model=None, voice=voice_id, language=None) + super().__init__( + settings=PiperTTSSettings(model=None, voice=voice_id, language=None), + **kwargs, + ) download_dir = download_dir or Path.cwd() @@ -199,7 +200,10 @@ class PiperHttpTTSService(TTSService): voice_id: Piper voice model identifier (e.g. `en_US-ryan-high`). **kwargs: Additional arguments passed to the parent TTSService. """ - super().__init__(**kwargs) + super().__init__( + settings=PiperHttpTTSSettings(model=None, voice=voice_id, language=None), + **kwargs, + ) if base_url.endswith("/"): logger.warning("Base URL ends with a slash, this is not allowed.") @@ -207,7 +211,6 @@ class PiperHttpTTSService(TTSService): self._base_url = base_url self._session = aiohttp_session - self._settings = PiperHttpTTSSettings(model=None, voice=voice_id, language=None) def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. diff --git a/src/pipecat/services/resembleai/tts.py b/src/pipecat/services/resembleai/tts.py index c2ac758a7..1c2953b72 100644 --- a/src/pipecat/services/resembleai/tts.py +++ b/src/pipecat/services/resembleai/tts.py @@ -92,19 +92,19 @@ class ResembleAITTSService(AudioContextTTSService): sample_rate=sample_rate, reuse_context_id_within_turn=False, supports_word_timestamps=True, + settings=ResembleAITTSSettings( + model=None, + voice=voice_id, + language=None, + precision=precision, + output_format=output_format, + resemble_sample_rate=sample_rate, + ), **kwargs, ) self._api_key = api_key self._url = url - self._settings = ResembleAITTSSettings( - model=None, - voice=voice_id, - language=None, - precision=precision, - output_format=output_format, - resemble_sample_rate=sample_rate, - ) self._websocket = None self._request_id_counter = 0 diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index 484c99857..059db8178 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -202,6 +202,8 @@ class RimeTTSService(AudioContextTTSService): **kwargs: Additional arguments passed to parent class. """ # Initialize with parent class settings for proper frame handling + params = params or RimeTTSService.InputParams() + super().__init__( aggregate_sentences=aggregate_sentences, push_text_frames=False, @@ -210,6 +212,28 @@ class RimeTTSService(AudioContextTTSService): supports_word_timestamps=True, append_trailing_space=True, sample_rate=sample_rate, + settings=RimeTTSSettings( + model=model, + voice=voice_id, + audioFormat="pcm", + samplingRate=0, # updated in start() + language=self.language_to_service_language(params.language) + if params.language + else None, + segment=params.segment, + inlineSpeedAlpha=None, # Not applicable here + # Arcana params + repetition_penalty=params.repetition_penalty, + temperature=params.temperature, + top_p=params.top_p, + # Mistv2 params + speedAlpha=params.speed_alpha, + reduceLatency=params.reduce_latency, + pauseBetweenBrackets=params.pause_between_brackets, + phonemizeBetweenBrackets=params.phonemize_between_brackets, + noTextNormalization=params.no_text_normalization, + saveOovs=params.save_oovs, + ), **kwargs, ) @@ -221,34 +245,9 @@ class RimeTTSService(AudioContextTTSService): # and insert these tags for the purpose of the TTS service alone. self._text_aggregator = SkipTagsAggregator([("spell(", ")")]) - params = params or RimeTTSService.InputParams() - # Store service configuration self._api_key = api_key self._url = url - self._settings = RimeTTSSettings( - model=model, - voice=voice_id, - audioFormat="pcm", - samplingRate=0, # updated in start() - language=self.language_to_service_language(params.language) - if params.language - else None, - segment=params.segment, - inlineSpeedAlpha=None, # Not applicable here - # Arcana params - repetition_penalty=params.repetition_penalty, - temperature=params.temperature, - top_p=params.top_p, - # Mistv2 params - speedAlpha=params.speed_alpha, - reduceLatency=params.reduce_latency, - pauseBetweenBrackets=params.pause_between_brackets, - phonemizeBetweenBrackets=params.phonemize_between_brackets, - noTextNormalization=params.no_text_normalization, - saveOovs=params.save_oovs, - ) - self._sync_model_name_to_metrics() # State tracking self._receive_task = None @@ -657,34 +656,36 @@ class RimeHttpTTSService(TTSService): params: Additional configuration parameters. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) - params = params or RimeHttpTTSService.InputParams() + super().__init__( + sample_rate=sample_rate, + settings=RimeTTSSettings( + model=model, + language=self.language_to_service_language(params.language) + if params.language + else "eng", + audioFormat="pcm", + samplingRate=0, + segment=None, + speedAlpha=params.speed_alpha, + reduceLatency=params.reduce_latency, + pauseBetweenBrackets=params.pause_between_brackets, + phonemizeBetweenBrackets=params.phonemize_between_brackets, + noTextNormalization=None, + saveOovs=None, + inlineSpeedAlpha=params.inline_speed_alpha if params.inline_speed_alpha else None, + repetition_penalty=None, + temperature=None, + top_p=None, + voice=voice_id, + ), + **kwargs, + ) + self._api_key = api_key self._session = aiohttp_session self._base_url = "https://users.rime.ai/v1/rime-tts" - self._settings = RimeTTSSettings( - model=model, - language=self.language_to_service_language(params.language) - if params.language - else "eng", - audioFormat="pcm", - samplingRate=0, - segment=None, - speedAlpha=params.speed_alpha, - reduceLatency=params.reduce_latency, - pauseBetweenBrackets=params.pause_between_brackets, - phonemizeBetweenBrackets=params.phonemize_between_brackets, - noTextNormalization=None, - saveOovs=None, - inlineSpeedAlpha=params.inline_speed_alpha if params.inline_speed_alpha else None, - repetition_penalty=None, - temperature=None, - top_p=None, - voice=voice_id, - ) - self._sync_model_name_to_metrics() def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -841,31 +842,30 @@ class RimeNonJsonTTSService(InterruptibleTTSService): aggregate_sentences: Whether to aggregate sentences within the TTSService. **kwargs: Additional arguments passed to parent class. """ + params = params or RimeNonJsonTTSService.InputParams() super().__init__( sample_rate=sample_rate, aggregate_sentences=aggregate_sentences, push_stop_frames=True, pause_frame_processing=True, append_trailing_space=True, + settings=RimeNonJsonTTSSettings( + voice=voice_id, + model=model, + audioFormat=audio_format, + samplingRate=sample_rate, + language=self.language_to_service_language(params.language) + if params.language + else None, + segment=params.segment, + repetition_penalty=params.repetition_penalty, + temperature=params.temperature, + top_p=params.top_p, + ), **kwargs, ) - params = params or RimeNonJsonTTSService.InputParams() self._api_key = api_key self._url = url - self._settings = RimeNonJsonTTSSettings( - voice=voice_id, - model=model, - audioFormat=audio_format, - samplingRate=sample_rate, - language=self.language_to_service_language(params.language) - if params.language - else None, - segment=params.segment, - repetition_penalty=params.repetition_penalty, - temperature=params.temperature, - top_p=params.top_p, - ) - self._sync_model_name_to_metrics() # Add any extra parameters for future compatibility if params.extra: self._settings.extra.update(params.extra) diff --git a/src/pipecat/services/sarvam/stt.py b/src/pipecat/services/sarvam/stt.py index 02d6e250f..379473c6f 100644 --- a/src/pipecat/services/sarvam/stt.py +++ b/src/pipecat/services/sarvam/stt.py @@ -240,11 +240,22 @@ class SarvamSTTService(STTService): f"Model '{model}' does not support language parameter (auto-detects language)." ) + # Resolve mode default from model config + mode = params.mode if params.mode is not None else self._config.default_mode + super().__init__( sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, keepalive_timeout=keepalive_timeout, keepalive_interval=keepalive_interval, + settings=SarvamSTTSettings( + model=model, + language=params.language, + prompt=params.prompt, + mode=mode, + vad_signals=params.vad_signals, + high_vad_sensitivity=params.high_vad_sensitivity, + ), **kwargs, ) @@ -268,19 +279,6 @@ class SarvamSTTService(STTService): self._socket_client = None self._receive_task = None - # Resolve mode default from model config - mode = params.mode if params.mode is not None else self._config.default_mode - - self._settings = SarvamSTTSettings( - model=model, - language=params.language, - prompt=params.prompt, - mode=mode, - vad_signals=params.vad_signals, - high_vad_sensitivity=params.high_vad_sensitivity, - ) - self._sync_model_name_to_metrics() - if params.vad_signals: self._register_event_handler("on_speech_started") self._register_event_handler("on_speech_stopped") diff --git a/src/pipecat/services/sarvam/tts.py b/src/pipecat/services/sarvam/tts.py index ade547798..7b63828a1 100644 --- a/src/pipecat/services/sarvam/tts.py +++ b/src/pipecat/services/sarvam/tts.py @@ -466,12 +466,6 @@ class SarvamHttpTTSService(TTSService): if voice_id is None: voice_id = self._config.default_speaker - super().__init__(sample_rate=sample_rate, **kwargs) - - self._api_key = api_key - self._base_url = base_url - self._session = aiohttp_session - # Validate and clamp pace to model's valid range pace = params.pace pace_min, pace_max = self._config.pace_range @@ -479,22 +473,32 @@ class SarvamHttpTTSService(TTSService): logger.warning(f"Pace {pace} is outside model range ({pace_min}-{pace_max}). Clamping.") pace = max(pace_min, min(pace_max, pace)) - # Build base settings - self._settings = SarvamHttpTTSSettings( - language=( - self.language_to_service_language(params.language) if params.language else "en-IN" + super().__init__( + sample_rate=sample_rate, + settings=SarvamHttpTTSSettings( + language=( + self.language_to_service_language(params.language) + if params.language + else "en-IN" + ), + enable_preprocessing=( + True + if self._config.preprocessing_always_enabled + else params.enable_preprocessing + ), + pace=pace, + pitch=None, + loudness=None, + temperature=None, + model=model, + voice=voice_id, ), - enable_preprocessing=( - True if self._config.preprocessing_always_enabled else params.enable_preprocessing - ), - pace=pace, - pitch=None, - loudness=None, - temperature=None, - model=model, - voice=voice_id, + **kwargs, ) - self._sync_model_name_to_metrics() + + self._api_key = api_key + self._base_url = base_url + self._session = aiohttp_session # Add parameters based on model support if self._config.supports_pitch: @@ -818,21 +822,8 @@ class SarvamTTSService(InterruptibleTTSService): if voice_id is None: voice_id = self._config.default_speaker - # Initialize parent class first - super().__init__( - aggregate_sentences=aggregate_sentences, - push_text_frames=True, - pause_frame_processing=True, - push_stop_frames=True, - sample_rate=sample_rate, - **kwargs, - ) params = params or SarvamTTSService.InputParams() - # WebSocket endpoint URL with model query parameter - self._websocket_url = f"{url}?model={model}" - self._api_key = api_key - # Validate and clamp pace to model's valid range pace = params.pace pace_min, pace_max = self._config.pace_range @@ -840,27 +831,42 @@ class SarvamTTSService(InterruptibleTTSService): logger.warning(f"Pace {pace} is outside model range ({pace_min}-{pace_max}). Clamping.") pace = max(pace_min, min(pace_max, pace)) - # Build base settings - self._settings = SarvamTTSSettings( - language=( - self.language_to_service_language(params.language) if params.language else "en-IN" + # Initialize parent class first + super().__init__( + aggregate_sentences=aggregate_sentences, + push_text_frames=True, + pause_frame_processing=True, + push_stop_frames=True, + sample_rate=sample_rate, + settings=SarvamTTSSettings( + language=( + self.language_to_service_language(params.language) + if params.language + else "en-IN" + ), + speech_sample_rate=str(sample_rate), + enable_preprocessing=( + True + if self._config.preprocessing_always_enabled + else params.enable_preprocessing + ), + min_buffer_size=params.min_buffer_size, + max_chunk_length=params.max_chunk_length, + output_audio_codec=params.output_audio_codec, + output_audio_bitrate=params.output_audio_bitrate, + pace=pace, + pitch=None, + loudness=None, + temperature=None, + model=model, + voice=voice_id, ), - speech_sample_rate=str(sample_rate), - enable_preprocessing=( - True if self._config.preprocessing_always_enabled else params.enable_preprocessing - ), - min_buffer_size=params.min_buffer_size, - max_chunk_length=params.max_chunk_length, - output_audio_codec=params.output_audio_codec, - output_audio_bitrate=params.output_audio_bitrate, - pace=pace, - pitch=None, - loudness=None, - temperature=None, - model=model, - voice=voice_id, + **kwargs, ) - self._sync_model_name_to_metrics() + + # WebSocket endpoint URL with model query parameter + self._websocket_url = f"{url}?model={model}" + self._api_key = api_key # Add parameters based on model support if self._config.supports_pitch: diff --git a/src/pipecat/services/settings.py b/src/pipecat/services/settings.py index 641cc23f5..5d215273f 100644 --- a/src/pipecat/services/settings.py +++ b/src/pipecat/services/settings.py @@ -319,6 +319,28 @@ class ServiceSettings: # --------------------------------------------------------------------------- +@dataclass +class ImageGenSettings(ServiceSettings): + """Runtime-updatable settings for image generation services. + + Used in both store and delta mode — see ``ServiceSettings``. + + Parameters: + model: Image generation model identifier. + """ + + +@dataclass +class VisionSettings(ServiceSettings): + """Runtime-updatable settings for vision services. + + Used in both store and delta mode — see ``ServiceSettings``. + + Parameters: + model: Vision model identifier. + """ + + @dataclass class LLMSettings(ServiceSettings): """Runtime-updatable settings for LLM services. diff --git a/src/pipecat/services/soniox/stt.py b/src/pipecat/services/soniox/stt.py index 3160d19a6..32cbee1f4 100644 --- a/src/pipecat/services/soniox/stt.py +++ b/src/pipecat/services/soniox/stt.py @@ -202,33 +202,32 @@ class SonioxSTTService(WebsocketSTTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to the STTService. """ + params = params or SonioxInputParams() + super().__init__( sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, keepalive_timeout=1, keepalive_interval=5, + settings=SonioxSTTSettings( + model=params.model, + language=None, + audio_format=params.audio_format, + num_channels=params.num_channels, + language_hints=params.language_hints, + language_hints_strict=params.language_hints_strict, + context=params.context, + enable_speaker_diarization=params.enable_speaker_diarization, + enable_language_identification=params.enable_language_identification, + client_reference_id=params.client_reference_id, + ), **kwargs, ) - params = params or SonioxInputParams() self._api_key = api_key self._url = url self._vad_force_turn_endpoint = vad_force_turn_endpoint - self._settings = SonioxSTTSettings( - model=params.model, - language=None, - audio_format=params.audio_format, - num_channels=params.num_channels, - language_hints=params.language_hints, - language_hints_strict=params.language_hints_strict, - context=params.context, - enable_speaker_diarization=params.enable_speaker_diarization, - enable_language_identification=params.enable_language_identification, - client_reference_id=params.client_reference_id, - ) - self._sync_model_name_to_metrics() - self._final_transcription_buffer = [] self._last_tokens_received: Optional[float] = None diff --git a/src/pipecat/services/speechmatics/stt.py b/src/pipecat/services/speechmatics/stt.py index 61bf8b69f..ac18a36e3 100644 --- a/src/pipecat/services/speechmatics/stt.py +++ b/src/pipecat/services/speechmatics/stt.py @@ -398,8 +398,6 @@ class SpeechmaticsSTTService(STTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to STTService. """ - super().__init__(sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, **kwargs) - # Service parameters self._api_key: str = api_key or os.getenv("SPEECHMATICS_API_KEY") self._base_url: str = ( @@ -428,8 +426,8 @@ class SpeechmaticsSTTService(STTService): speaker_passive_format = params.speaker_passive_format or speaker_active_format # Settings — seeded from InputParams - self._settings = SpeechmaticsSTTSettings( - model=None, + settings = SpeechmaticsSTTSettings( + model=None, # Will be resolved from operating_point after config is built language=params.language, domain=params.domain, turn_detection_mode=params.turn_detection_mode, @@ -455,9 +453,17 @@ class SpeechmaticsSTTService(STTService): extra_params=params.extra_params, ) - # Build SDK config from settings + # Build SDK config from settings, then resolve model from operating_point self._client: VoiceAgentClient | None = None - self._config: VoiceAgentConfig = self._build_config() + self._config: VoiceAgentConfig = self._build_config(settings) + settings.model = self._config.operating_point.value + + super().__init__( + sample_rate=sample_rate, + ttfs_p99_latency=ttfs_p99_latency, + settings=settings, + **kwargs, + ) # Outbound frame queue self._outbound_frames: asyncio.Queue[Frame] = asyncio.Queue() @@ -468,10 +474,6 @@ class SpeechmaticsSTTService(STTService): EndOfUtteranceMode.EXTERNAL, ] - # Model + metrics (operating_point comes from the SDK config/preset) - self._settings.model = self._config.operating_point.value - self._sync_model_name_to_metrics() - # Message queue self._stt_msg_queue: asyncio.Queue[dict[str, Any]] = asyncio.Queue() self._stt_msg_task: asyncio.Task | None = None @@ -524,7 +526,7 @@ class SpeechmaticsSTTService(STTService): logger.debug(f"{self} settings update requires reconnect: {changed.keys()}") # Connection-level fields changed — rebuild the SDK config # from the now-updated self._settings, then reconnect. - self._config = self._build_config() + self._config = self._build_config(self._settings) await self._disconnect() await self._connect() elif changed.keys() & SpeechmaticsSTTSettings.HOT_FIELDS: @@ -661,13 +663,17 @@ class SpeechmaticsSTTService(STTService): # CONFIGURATION # ============================================================================ - def _build_config(self) -> VoiceAgentConfig: - """Build a ``VoiceAgentConfig`` from the current ``self._settings``. + def _build_config(self, settings: SpeechmaticsSTTSettings) -> VoiceAgentConfig: + """Build a ``VoiceAgentConfig`` from the given settings. - Used both at init time and before reconnecting so the connection - always reflects the latest settings. + Used both at init time (with explicit settings, before + ``super().__init__`` has run) and before reconnecting so the + connection always reflects the latest settings. + + Args: + settings: Settings to build from. """ - s = self._settings + s = settings # Preset from turn detection mode config = VoiceAgentConfigPreset.load(s.turn_detection_mode.value) diff --git a/src/pipecat/services/speechmatics/tts.py b/src/pipecat/services/speechmatics/tts.py index 32fb0c2b3..1ddb895aa 100644 --- a/src/pipecat/services/speechmatics/tts.py +++ b/src/pipecat/services/speechmatics/tts.py @@ -95,7 +95,18 @@ class SpeechmaticsTTSService(TTSService): f"Speechmatics TTS only supports {self.SPEECHMATICS_SAMPLE_RATE}Hz sample rate. " f"Current rate of {sample_rate}Hz may cause issues." ) - super().__init__(sample_rate=sample_rate, **kwargs) + params = params or SpeechmaticsTTSService.InputParams() + + super().__init__( + sample_rate=sample_rate, + settings=SpeechmaticsTTSSettings( + model=None, + voice=voice_id, + language=None, + max_retries=params.max_retries, + ), + **kwargs, + ) # Service parameters self._api_key: str = api_key @@ -106,14 +117,6 @@ class SpeechmaticsTTSService(TTSService): if not self._api_key: raise ValueError("Missing Speechmatics API key") - params = params or SpeechmaticsTTSService.InputParams() - self._settings = SpeechmaticsTTSSettings( - model=None, - voice=voice_id, - language=None, - max_retries=params.max_retries, - ) - def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. diff --git a/src/pipecat/services/stt_service.py b/src/pipecat/services/stt_service.py index 20e8cacc9..ebf007f6f 100644 --- a/src/pipecat/services/stt_service.py +++ b/src/pipecat/services/stt_service.py @@ -86,6 +86,7 @@ class STTService(AIService): ttfs_p99_latency: Optional[float] = None, keepalive_timeout: Optional[float] = None, keepalive_interval: float = 5.0, + settings: Optional[STTSettings] = None, **kwargs, ): """Initialize the STT service. @@ -109,14 +110,20 @@ class STTService(AIService): connection alive. None disables keepalive. Useful for services that close idle connections (e.g. behind a ServiceSwitcher). keepalive_interval: Seconds between idle checks when keepalive is enabled. + settings: The runtime-updatable settings for the STT service. **kwargs: Additional arguments passed to the parent AIService. """ - super().__init__(**kwargs) + super().__init__( + settings=settings + # Here in case subclass doesn't implement more specific settings + # (which hopefully should be rare) + or STTSettings(), + **kwargs, + ) self._audio_passthrough = audio_passthrough self._init_sample_rate = sample_rate self._sample_rate = 0 - self._settings = STTSettings() # Here in case subclass doesn't implement more specific settings (hopefully shouldn't happen) self._muted: bool = False self._user_id: str = "" self._ttfs_p99_latency = ttfs_p99_latency diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index e739a03d2..e36d4754f 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -147,6 +147,7 @@ class TTSService(AIService): text_filter: Optional[BaseTextFilter] = None, # Audio transport destination of the generated frames. transport_destination: Optional[str] = None, + settings: Optional[TTSSettings] = None, **kwargs, ): """Initialize the TTS service. @@ -183,9 +184,16 @@ class TTSService(AIService): Use `text_filters` instead, which allows multiple filters. transport_destination: Destination for generated audio frames. + settings: The runtime-updatable settings for the TTS service. **kwargs: Additional arguments passed to the parent AIService. """ - super().__init__(**kwargs) + super().__init__( + settings=settings + # Here in case subclass doesn't implement more specific settings + # (which hopefully should be rare) + or TTSSettings(), + **kwargs, + ) self._aggregate_sentences: bool = aggregate_sentences self._push_text_frames: bool = push_text_frames self._push_stop_frames: bool = push_stop_frames @@ -196,7 +204,6 @@ class TTSService(AIService): self._append_trailing_space: bool = append_trailing_space self._init_sample_rate = sample_rate self._sample_rate = 0 - self._settings = TTSSettings() # Here in case subclass doesn't implement more specific settings (hopefully shouldn't happen) self._text_aggregator: BaseTextAggregator = text_aggregator or SimpleTextAggregator() if text_aggregator: import warnings diff --git a/src/pipecat/services/ultravox/llm.py b/src/pipecat/services/ultravox/llm.py index 11525258a..d14c3b9ca 100644 --- a/src/pipecat/services/ultravox/llm.py +++ b/src/pipecat/services/ultravox/llm.py @@ -176,19 +176,21 @@ class UltravoxRealtimeLLMService(LLMService): May only be set with OneShotInputParams. **kwargs: Additional arguments passed to parent LLMService. """ - super().__init__(**kwargs) - self._settings = UltravoxRealtimeLLMSettings( - model=None, - temperature=None, - max_tokens=None, - top_p=None, - top_k=None, - frequency_penalty=None, - presence_penalty=None, - seed=None, - filter_incomplete_user_turns=False, - user_turn_completion_config=None, - output_medium=None, + super().__init__( + settings=UltravoxRealtimeLLMSettings( + model=None, + temperature=None, + max_tokens=None, + top_p=None, + top_k=None, + frequency_penalty=None, + presence_penalty=None, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, + output_medium=None, + ), + **kwargs, ) self._params = params if one_shot_selected_tools: diff --git a/src/pipecat/services/vision_service.py b/src/pipecat/services/vision_service.py index d12737d84..572f3b423 100644 --- a/src/pipecat/services/vision_service.py +++ b/src/pipecat/services/vision_service.py @@ -12,11 +12,12 @@ visual content. """ from abc import abstractmethod -from typing import AsyncGenerator +from typing import AsyncGenerator, Optional from pipecat.frames.frames import Frame, UserImageRawFrame from pipecat.processors.frame_processor import FrameDirection from pipecat.services.ai_service import AIService +from pipecat.services.settings import VisionSettings class VisionService(AIService): @@ -27,13 +28,20 @@ class VisionService(AIService): with the AI service infrastructure for metrics and lifecycle management. """ - def __init__(self, **kwargs): + def __init__(self, *, settings: Optional[VisionSettings] = None, **kwargs): """Initialize the vision service. Args: + settings: The runtime-updatable settings for the vision service. **kwargs: Additional arguments passed to the parent AIService. """ - super().__init__(**kwargs) + super().__init__( + settings=settings + # Here in case subclass doesn't implement more specific settings + # (which hopefully should be rare) + or VisionSettings(), + **kwargs, + ) self._describe_text = None @abstractmethod diff --git a/src/pipecat/services/whisper/base_stt.py b/src/pipecat/services/whisper/base_stt.py index 9d2b3ab51..cf3342f4b 100644 --- a/src/pipecat/services/whisper/base_stt.py +++ b/src/pipecat/services/whisper/base_stt.py @@ -155,22 +155,23 @@ class BaseWhisperSTTService(SegmentedSTTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to SegmentedSTTService. """ - super().__init__(ttfs_p99_latency=ttfs_p99_latency, **kwargs) + super().__init__( + ttfs_p99_latency=ttfs_p99_latency, + settings=BaseWhisperSTTSettings( + model=model, + language=self.language_to_service_language(language or Language.EN), + base_url=base_url, + prompt=prompt, + temperature=temperature, + ), + **kwargs, + ) self._client = self._create_client(api_key, base_url) - self._language = self.language_to_service_language(language or Language.EN) + self._language = self._settings.language self._prompt = prompt self._temperature = temperature self._include_prob_metrics = include_prob_metrics - self._settings = BaseWhisperSTTSettings( - model=model, - language=self._language, - base_url=base_url, - prompt=self._prompt, - temperature=self._temperature, - ) - self._sync_model_name_to_metrics() - def _create_client(self, api_key: Optional[str], base_url: Optional[str]): return AsyncOpenAI(api_key=api_key, base_url=base_url) diff --git a/src/pipecat/services/whisper/stt.py b/src/pipecat/services/whisper/stt.py index 205838314..d386d6ed2 100644 --- a/src/pipecat/services/whisper/stt.py +++ b/src/pipecat/services/whisper/stt.py @@ -233,21 +233,21 @@ class WhisperSTTService(SegmentedSTTService): language: The default language for transcription. **kwargs: Additional arguments passed to SegmentedSTTService. """ - super().__init__(**kwargs) + super().__init__( + settings=WhisperSTTSettings( + model=model if isinstance(model, str) else model.value, + language=language, + device=device, + compute_type=compute_type, + no_speech_prob=no_speech_prob, + ), + **kwargs, + ) self._device: str = device self._compute_type = compute_type self._no_speech_prob = no_speech_prob self._model: Optional[WhisperModel] = None - self._settings = WhisperSTTSettings( - model=model if isinstance(model, str) else model.value, - language=language, - device=self._device, - compute_type=self._compute_type, - no_speech_prob=self._no_speech_prob, - ) - self._sync_model_name_to_metrics() - self._load() def can_generate_metrics(self) -> bool: @@ -368,20 +368,21 @@ class WhisperSTTServiceMLX(WhisperSTTService): **kwargs: Additional arguments passed to SegmentedSTTService. """ # Skip WhisperSTTService.__init__ and call its parent directly - SegmentedSTTService.__init__(self, **kwargs) + SegmentedSTTService.__init__( + self, + settings=WhisperMLXSTTSettings( + model=model if isinstance(model, str) else model.value, + language=language, + no_speech_prob=no_speech_prob, + temperature=temperature, + engine="mlx", + ), + **kwargs, + ) self._no_speech_prob = no_speech_prob self._temperature = temperature - self._settings = WhisperMLXSTTSettings( - model=model if isinstance(model, str) else model.value, - language=language, - no_speech_prob=self._no_speech_prob, - temperature=self._temperature, - engine="mlx", - ) - self._sync_model_name_to_metrics() - # No need to call _load() as MLX Whisper loads models on demand @override diff --git a/src/pipecat/services/xtts/tts.py b/src/pipecat/services/xtts/tts.py index ab06ffb5a..8817c09b5 100644 --- a/src/pipecat/services/xtts/tts.py +++ b/src/pipecat/services/xtts/tts.py @@ -111,13 +111,15 @@ class XTTSService(TTSService): sample_rate: Audio sample rate. If None, uses default. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) - - self._settings = XTTSTTSSettings( - model=None, - voice=voice_id, - language=self.language_to_service_language(language), - base_url=base_url, + super().__init__( + sample_rate=sample_rate, + settings=XTTSTTSSettings( + model=None, + voice=voice_id, + language=self.language_to_service_language(language), + base_url=base_url, + ), + **kwargs, ) self._studio_speakers: Optional[Dict[str, Any]] = None self._aiohttp_session = aiohttp_session From bca42f7d68b71abdacdaecf008e4a0142d1b3040 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 25 Feb 2026 14:03:53 -0500 Subject: [PATCH 106/189] Fix Hathora 55 series examples, and fix Hathora missing settings field warning --- examples/foundational/55zg-update-settings-hathora-tts.py | 6 +++--- examples/foundational/55zs-update-settings-hathora-stt.py | 4 +++- src/pipecat/services/hathora/tts.py | 1 + 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/examples/foundational/55zg-update-settings-hathora-tts.py b/examples/foundational/55zg-update-settings-hathora-tts.py index 9f6b6bd0a..80b9bfcce 100644 --- a/examples/foundational/55zg-update-settings-hathora-tts.py +++ b/examples/foundational/55zg-update-settings-hathora-tts.py @@ -54,7 +54,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): tts = HathoraTTSService( api_key=os.getenv("HATHORA_API_KEY"), - model="hathora-ai/polar", + model="hexgrad-kokoro-82m", ) llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) @@ -100,8 +100,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await task.queue_frames([LLMRunFrame()]) await asyncio.sleep(10) - logger.info("Updating Hathora TTS settings: speed=1.3") - await task.queue_frame(TTSUpdateSettingsFrame(delta=HathoraTTSSettings(speed=1.3))) + logger.info("Updating Hathora TTS settings: speed=1.5") + await task.queue_frame(TTSUpdateSettingsFrame(delta=HathoraTTSSettings(speed=1.5))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zs-update-settings-hathora-stt.py b/examples/foundational/55zs-update-settings-hathora-stt.py index db5ed4d2a..7a033490a 100644 --- a/examples/foundational/55zs-update-settings-hathora-stt.py +++ b/examples/foundational/55zs-update-settings-hathora-stt.py @@ -52,7 +52,9 @@ transport_params = { async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info(f"Starting bot") - stt = HathoraSTTService(api_key=os.getenv("HATHORA_API_KEY"), model="deepgram-nova3") + stt = HathoraSTTService( + api_key=os.getenv("HATHORA_API_KEY"), model="nvidia-parakeet-tdt-0.6b-v3" + ) tts = CartesiaTTSService( api_key=os.getenv("CARTESIA_API_KEY"), diff --git a/src/pipecat/services/hathora/tts.py b/src/pipecat/services/hathora/tts.py index e16aa3b08..8d6fb32cb 100644 --- a/src/pipecat/services/hathora/tts.py +++ b/src/pipecat/services/hathora/tts.py @@ -114,6 +114,7 @@ class HathoraTTSService(TTSService): settings=HathoraTTSSettings( model=model, voice=voice_id, + language=None, # Not applicable here speed=params.speed, config=params.config, ), From ff0f3dce3209e38867e0eba97323bb328fb327f4 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 25 Feb 2026 15:10:11 -0500 Subject: [PATCH 107/189] A few Groq-related tweaks: - Wire up passing speed setting to Groq, even though only a value of 1.0 is supported today - Update the 55y example to switch voices instead of changing speed - Add a 55zn example to exercise runtime updates of Groq STT --- .../55y-update-settings-groq-tts.py | 4 +- .../55zzn-update-settings-groq-stt.py | 128 ++++++++++++++++++ src/pipecat/services/groq/tts.py | 3 + 3 files changed, 133 insertions(+), 2 deletions(-) create mode 100644 examples/foundational/55zzn-update-settings-groq-stt.py diff --git a/examples/foundational/55y-update-settings-groq-tts.py b/examples/foundational/55y-update-settings-groq-tts.py index 86dc1f98a..3531509f2 100644 --- a/examples/foundational/55y-update-settings-groq-tts.py +++ b/examples/foundational/55y-update-settings-groq-tts.py @@ -97,8 +97,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): await task.queue_frames([LLMRunFrame()]) await asyncio.sleep(10) - logger.info("Updating Groq TTS settings: speed=1.5") - await task.queue_frame(TTSUpdateSettingsFrame(delta=GroqTTSSettings(speed=1.5))) + logger.info("Updating Groq TTS settings: voice=troy") + await task.queue_frame(TTSUpdateSettingsFrame(delta=GroqTTSSettings(voice="troy"))) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/55zzn-update-settings-groq-stt.py b/examples/foundational/55zzn-update-settings-groq-stt.py new file mode 100644 index 000000000..b00ecda81 --- /dev/null +++ b/examples/foundational/55zzn-update-settings-groq-stt.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.groq.stt import GroqSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.whisper.base_stt import BaseWhisperSTTSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = GroqSTTService( + api_key=os.getenv("GROQ_API_KEY"), + ) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info('Updating Groq STT settings: language="es"') + await task.queue_frame(STTUpdateSettingsFrame(delta=BaseWhisperSTTSettings(language="es"))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/src/pipecat/services/groq/tts.py b/src/pipecat/services/groq/tts.py index cc073f8c7..95ee6faa3 100644 --- a/src/pipecat/services/groq/tts.py +++ b/src/pipecat/services/groq/tts.py @@ -152,6 +152,9 @@ class GroqTTSService(TTSService): model=self._settings.model, voice=self._settings.voice, response_format=self._output_format, + # Note: as of 2026-02-25, only a speed of 1.0 is supported, but + # here we pass it for completeness and future-proofing + speed=self._settings.speed, input=text, ) From eee2ef7e859c66a60c1d0e976e907f10829d0f6c Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Wed, 25 Feb 2026 15:45:16 -0500 Subject: [PATCH 108/189] Add /update-docs skill to claude-plugin --- .claude-plugin/marketplace.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 64aac9338..c628e79e5 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -19,7 +19,8 @@ "./.claude/skills/code-review", "./.claude/skills/docstring", "./.claude/skills/pr-description", - "./.claude/skills/pr-submit" + "./.claude/skills/pr-submit", + "./.claude/skills/update-docs" ] } ] From 781d19150987fa2b8198adf584b90bbae96e1772 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 24 Feb 2026 21:19:39 -0500 Subject: [PATCH 109/189] Remove unnecessary `_model` ivar from `GeminiTTSService`, using `_settings.model` instead --- src/pipecat/services/google/tts.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/pipecat/services/google/tts.py b/src/pipecat/services/google/tts.py index 80c71b10f..6c71977a0 100644 --- a/src/pipecat/services/google/tts.py +++ b/src/pipecat/services/google/tts.py @@ -1236,7 +1236,7 @@ class GeminiTTSService(GoogleBaseTTSService): super().__init__( sample_rate=sample_rate, settings=GeminiTTSSettings( - model=None, + model=model, language=self.language_to_service_language(params.language) if params.language else "en-US", @@ -1249,7 +1249,6 @@ class GeminiTTSService(GoogleBaseTTSService): ) self._location = location - self._model = model self._client: texttospeech_v1.TextToSpeechAsyncClient = self._create_client( credentials, credentials_path ) @@ -1327,7 +1326,7 @@ class GeminiTTSService(GoogleBaseTTSService): voice = texttospeech_v1.VoiceSelectionParams( language_code=self._settings.language, - model_name=self._model, + model_name=self._settings.model, multi_speaker_voice_config=multi_speaker_voice_config, ) else: @@ -1335,7 +1334,7 @@ class GeminiTTSService(GoogleBaseTTSService): voice = texttospeech_v1.VoiceSelectionParams( language_code=self._settings.language, name=self._settings.voice, - model_name=self._model, + model_name=self._settings.model, ) # Create streaming config From 7ee0400c4c8f81bd17046db48ef2d4d5d6d7bee6 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 24 Feb 2026 21:22:44 -0500 Subject: [PATCH 110/189] Remove unnecessary `_model` ivar from Hathora TTS and STT services, using `_settings.model` instead. --- src/pipecat/services/hathora/stt.py | 4 +--- src/pipecat/services/hathora/tts.py | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/pipecat/services/hathora/stt.py b/src/pipecat/services/hathora/stt.py index 84f4116f3..27f1aebfb 100644 --- a/src/pipecat/services/hathora/stt.py +++ b/src/pipecat/services/hathora/stt.py @@ -101,8 +101,6 @@ class HathoraSTTService(SegmentedSTTService): ), **kwargs, ) - - self._model = model self._api_key = api_key or os.getenv("HATHORA_API_KEY") self._base_url = base_url @@ -136,7 +134,7 @@ class HathoraSTTService(SegmentedSTTService): url = f"{self._base_url}" payload = { - "model": self._model, + "model": self._settings.model, } if self._settings.language is not None: diff --git a/src/pipecat/services/hathora/tts.py b/src/pipecat/services/hathora/tts.py index 8d6fb32cb..3fb9e747b 100644 --- a/src/pipecat/services/hathora/tts.py +++ b/src/pipecat/services/hathora/tts.py @@ -120,7 +120,6 @@ class HathoraTTSService(TTSService): ), **kwargs, ) - self._model = model self._api_key = api_key or os.getenv("HATHORA_API_KEY") self._base_url = base_url @@ -149,7 +148,7 @@ class HathoraTTSService(TTSService): url = f"{self._base_url}" - payload = {"model": self._model, "text": text} + payload = {"model": self._settings.model, "text": text} if self._settings.voice is not None: payload["voice"] = self._settings.voice From 3d8e3a4043a14891c91b11fee4a3e1e760c57bed Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 24 Feb 2026 21:49:54 -0500 Subject: [PATCH 111/189] Remove unnecessary `_model` ivar from ElevenLabs STT services, using `_settings.model` instead. --- src/pipecat/services/elevenlabs/stt.py | 32 +++----------------------- 1 file changed, 3 insertions(+), 29 deletions(-) diff --git a/src/pipecat/services/elevenlabs/stt.py b/src/pipecat/services/elevenlabs/stt.py index c3f4300f4..5422fb193 100644 --- a/src/pipecat/services/elevenlabs/stt.py +++ b/src/pipecat/services/elevenlabs/stt.py @@ -279,7 +279,6 @@ class ElevenLabsSTTService(SegmentedSTTService): self._api_key = api_key self._base_url = base_url self._session = aiohttp_session - self._model_id = model def can_generate_metrics(self) -> bool: """Check if the service can generate processing metrics. @@ -300,25 +299,6 @@ class ElevenLabsSTTService(SegmentedSTTService): """ return language_to_elevenlabs_language(language) - async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: - """Apply a settings delta. - - Converts language to ElevenLabs format before applying and keeps - ``_model_id`` in sync with the model setting. - - Args: - delta: A :class:`STTSettings` (or ``ElevenLabsSTTSettings``) delta. - - Returns: - Dict mapping changed field names to their previous values. - """ - changed = await super()._update_settings(delta) - - if "model" in changed: - self._model_id = self._settings.model - - return changed - async def _transcribe_audio(self, audio_data: bytes) -> dict: """Upload audio data to ElevenLabs and get transcription result. @@ -344,7 +324,7 @@ class ElevenLabsSTTService(SegmentedSTTService): ) # Add required model_id, language_code, and tag_audio_events - data.add_field("model_id", self._model_id) + data.add_field("model_id", self._settings.model) data.add_field("language_code", self._settings.language) data.add_field("tag_audio_events", str(self._settings.tag_audio_events).lower()) @@ -522,7 +502,6 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): self._api_key = api_key self._base_url = base_url - self._model_id = model self._audio_format = "" # initialized in start() self._receive_task = None @@ -540,9 +519,6 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: """Apply a settings delta and reconnect if anything changed. - Converts language to ElevenLabs format before applying and keeps - ``_model_id`` in sync. - Args: delta: A :class:`STTSettings` (or ``ElevenLabsRealtimeSTTSettings``) delta. @@ -554,11 +530,9 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): if not changed: return changed - if "model" in changed: - self._model_id = self._settings.model - await self._disconnect() await self._connect() + return changed async def start(self, frame: StartFrame): @@ -704,7 +678,7 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): logger.debug("Connecting to ElevenLabs Realtime STT") # Build query parameters - params = [f"model_id={self._model_id}"] + params = [f"model_id={self._settings.model}"] if self._settings.language: params.append(f"language_code={self._settings.language}") From a4b6db6fb44148709c22c65039f02ef677ad603b Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 24 Feb 2026 15:06:06 -0500 Subject: [PATCH 112/189] Flatten `LiveOptions` into individual fields on `DeepgramSTTSettings` and `DeepgramSageMakerSTTSettings` for backward-compatible dict-style updates via `STTUpdateSettingsFrame`; during the big service settings refactor, we accidentally got rid of the ability to update individual `LiveOptions` fields with a sparse update --- ...-update-settings-deepgram-sagemaker-stt.py | 7 + .../55a-update-settings-deepgram-stt.py | 7 + src/pipecat/services/deepgram/stt.py | 119 ++++++++++------- .../services/deepgram/stt_sagemaker.py | 121 ++++++++++-------- 4 files changed, 158 insertions(+), 96 deletions(-) diff --git a/examples/foundational/55a-update-settings-deepgram-sagemaker-stt.py b/examples/foundational/55a-update-settings-deepgram-sagemaker-stt.py index 05c92e7e2..fba722648 100644 --- a/examples/foundational/55a-update-settings-deepgram-sagemaker-stt.py +++ b/examples/foundational/55a-update-settings-deepgram-sagemaker-stt.py @@ -112,6 +112,13 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): STTUpdateSettingsFrame(delta=DeepgramSageMakerSTTSettings(language=Language.ES)) ) + # Old-style dict update (for backward-compat testing): + # await asyncio.sleep(10) + # logger.info("Updating Deepgram SageMaker STT settings via dict: punctuate=False, filler_words=True") + # await task.queue_frame( + # STTUpdateSettingsFrame(settings={"punctuate": False, "filler_words": True}) + # ) + @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") diff --git a/examples/foundational/55a-update-settings-deepgram-stt.py b/examples/foundational/55a-update-settings-deepgram-stt.py index 39dde69e9..20068ab75 100644 --- a/examples/foundational/55a-update-settings-deepgram-stt.py +++ b/examples/foundational/55a-update-settings-deepgram-stt.py @@ -106,6 +106,13 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): STTUpdateSettingsFrame(delta=DeepgramSTTSettings(language=Language.ES)) ) + # Old-style dict update (for backward-compat testing): + # await asyncio.sleep(10) + # logger.info("Updating Deepgram STT settings via dict: punctuate=False, filler_words=True") + # await task.queue_frame( + # STTUpdateSettingsFrame(settings={"punctuate": False, "filler_words": True}) + # ) + @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") diff --git a/src/pipecat/services/deepgram/stt.py b/src/pipecat/services/deepgram/stt.py index aa5c2ce8d..768c4d6eb 100644 --- a/src/pipecat/services/deepgram/stt.py +++ b/src/pipecat/services/deepgram/stt.py @@ -6,7 +6,8 @@ """Deepgram speech-to-text service implementation.""" -from dataclasses import dataclass, field +import inspect +from dataclasses import dataclass, field, fields from typing import Any, AsyncGenerator, Dict, Optional from loguru import logger @@ -24,7 +25,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven, is_given +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import DEEPGRAM_TTFS_P99 from pipecat.services.stt_service import STTService from pipecat.transcriptions.language import Language @@ -51,11 +52,32 @@ except ModuleNotFoundError as e: class DeepgramSTTSettings(STTSettings): """Settings for the Deepgram STT service. + Some commonly used ``LiveOptions`` fields are declared as top-level + fields here so they can be updated individually via + ``STTUpdateSettingsFrame``. Any *additional* ``LiveOptions`` fields + (e.g. ``filler_words``, ``diarize``, ``utterance_end_ms``) can be + passed through the ``extra`` dict — they will be forwarded to + ``LiveOptions`` when the WebSocket connection is (re)established. + This keeps the settings class future-proof: new Deepgram features work + without code changes on the Pipecat side. + Parameters: - live_options: Deepgram ``LiveOptions`` for detailed configuration. + encoding: Audio encoding format (e.g. ``"linear16"``). + channels: Number of audio channels. + interim_results: Whether to return interim transcription results. + smart_format: Whether to enable Deepgram smart formatting. + punctuate: Whether to add punctuation to transcripts. + profanity_filter: Whether to filter profanity from transcripts. + vad_events: Whether to enable Deepgram VAD events (deprecated). """ - live_options: LiveOptions | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + encoding: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + channels: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + interim_results: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + smart_format: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + punctuate: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + profanity_filter: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + vad_events: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class DeepgramSTTService(STTService): @@ -153,22 +175,29 @@ class DeepgramSTTService(STTService): if "language" in merged_options and isinstance(merged_options["language"], Language): merged_options["language"] = merged_options["language"].value - merged_live_options = LiveOptions(**merged_options) + settings_fields = {f.name for f in fields(DeepgramSTTSettings)} + settings_kwargs = {} + extra = {} + for key, value in merged_options.items(): + if key in settings_fields: + settings_kwargs[key] = value + else: + extra[key] = value + + settings = DeepgramSTTSettings(**settings_kwargs) + settings.extra = extra + super().__init__( sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, - settings=DeepgramSTTSettings( - model=merged_options.get("model"), - language=merged_options.get("language"), - live_options=merged_live_options, - ), + settings=settings, **kwargs, ) self._addons = addons self._should_interrupt = should_interrupt - if merged_live_options.vad_events: + if self._settings.vad_events: import warnings with warnings.catch_warnings(): @@ -199,7 +228,7 @@ class DeepgramSTTService(STTService): Returns: True if VAD events are enabled in the current settings. """ - return self._settings.live_options.vad_events + return self._settings.vad_events def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -210,43 +239,12 @@ class DeepgramSTTService(STTService): return True async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: - """Apply a settings delta, keeping ``live_options`` in sync. - - Top-level ``model`` and ``language`` are the source of truth. When - they are given in *delta* their values are propagated into - ``live_options``. When only ``live_options`` is given, its ``model`` - and ``language`` are propagated *up* to the top-level fields. - - Any change triggers a WebSocket reconnect. - """ - # Determine which top-level fields are explicitly provided. - model_given = isinstance(delta, DeepgramSTTSettings) and is_given( - getattr(delta, "model", NOT_GIVEN) - ) - language_given = isinstance(delta, DeepgramSTTSettings) and is_given( - getattr(delta, "language", NOT_GIVEN) - ) - + """Apply a settings delta and reconnect if anything changed.""" changed = await super()._update_settings(delta) if not changed: return changed - # --- Sync model -------------------------------------------------- - if model_given: - # Top-level model wins → push into live_options. - self._settings.live_options.model = self._settings.model - elif "live_options" in changed and self._settings.live_options.model is not None: - # Only live_options was given → pull model up. - self._settings.model = self._settings.live_options.model - self._sync_model_name_to_metrics() - - # --- Sync language ----------------------------------------------- - if language_given: - self._settings.live_options.language = self._settings.language - elif "live_options" in changed and self._settings.live_options.language is not None: - self._settings.language = self._settings.live_options.language - await self._disconnect() await self._connect() @@ -259,7 +257,6 @@ class DeepgramSTTService(STTService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings.live_options.sample_rate = self.sample_rate await self._connect() async def stop(self, frame: EndFrame): @@ -292,6 +289,36 @@ class DeepgramSTTService(STTService): await self._connection.send(audio) yield None + def _build_live_options(self) -> LiveOptions: + """Build a ``LiveOptions`` from flat settings fields, sample rate, and extras. + + Returns: + A fully-populated ``LiveOptions`` ready for the Deepgram SDK. + """ + valid_kwargs = set(inspect.signature(LiveOptions.__init__).parameters) - {"self"} + + # Start with extras that are valid LiveOptions kwargs. + opts: dict[str, Any] = {k: v for k, v in self._settings.extra.items() if k in valid_kwargs} + + # Override with flat settings fields (these take precedence). + s = self._settings + opts.update( + { + "model": s.model, + "language": s.language, + "encoding": s.encoding, + "channels": s.channels, + "interim_results": s.interim_results, + "smart_format": s.smart_format, + "punctuate": s.punctuate, + "profanity_filter": s.profanity_filter, + "vad_events": s.vad_events, + "sample_rate": self.sample_rate, + } + ) + + return LiveOptions(**opts) + async def _connect(self): logger.debug("Connecting to Deepgram") @@ -313,7 +340,7 @@ class DeepgramSTTService(STTService): ) if not await self._connection.start( - options=self._settings.live_options, addons=self._addons + options=self._build_live_options(), addons=self._addons ): await self.push_error(error_msg=f"Unable to connect to Deepgram") else: diff --git a/src/pipecat/services/deepgram/stt_sagemaker.py b/src/pipecat/services/deepgram/stt_sagemaker.py index bc5eebe37..97309ab93 100644 --- a/src/pipecat/services/deepgram/stt_sagemaker.py +++ b/src/pipecat/services/deepgram/stt_sagemaker.py @@ -13,8 +13,9 @@ languages, and various Deepgram features. """ import asyncio +import inspect import json -from dataclasses import dataclass, field +from dataclasses import dataclass, field, fields from typing import Any, AsyncGenerator, Optional from loguru import logger @@ -32,7 +33,7 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.aws.sagemaker.bidi_client import SageMakerBidiClient -from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven, is_given +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import DEEPGRAM_SAGEMAKER_TTFS_P99 from pipecat.services.stt_service import STTService from pipecat.transcriptions.language import Language @@ -53,11 +54,32 @@ except ModuleNotFoundError as e: class DeepgramSageMakerSTTSettings(STTSettings): """Settings for the Deepgram SageMaker STT service. + Some commonly used ``LiveOptions`` fields are declared as top-level + fields here so they can be updated individually via + ``STTUpdateSettingsFrame``. Any *additional* ``LiveOptions`` fields + (e.g. ``filler_words``, ``diarize``, ``utterance_end_ms``) can be + passed through the ``extra`` dict — they will be forwarded to + ``LiveOptions`` when the connection is (re)established. This keeps the + settings class future-proof: new Deepgram features work without code + changes on the Pipecat side. + Parameters: - live_options: Deepgram ``LiveOptions`` for detailed configuration. + encoding: Audio encoding format (e.g. ``"linear16"``). + channels: Number of audio channels. + interim_results: Whether to return interim transcription results. + smart_format: Whether to enable Deepgram smart formatting. + punctuate: Whether to add punctuation to transcripts. + profanity_filter: Whether to filter profanity from transcripts. + vad_events: Whether to enable Deepgram VAD events (deprecated). """ - live_options: LiveOptions | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + encoding: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + channels: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + interim_results: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + smart_format: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + punctuate: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + profanity_filter: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + vad_events: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class DeepgramSageMakerSTTService(STTService): @@ -139,15 +161,22 @@ class DeepgramSageMakerSTTService(STTService): if "language" in merged_options and isinstance(merged_options["language"], Language): merged_options["language"] = merged_options["language"].value - merged_live_options = LiveOptions(**merged_options) + settings_fields = {f.name for f in fields(DeepgramSageMakerSTTSettings)} + settings_kwargs = {} + extra = {} + for key, value in merged_options.items(): + if key in settings_fields: + settings_kwargs[key] = value + else: + extra[key] = value + + settings = DeepgramSageMakerSTTSettings(**settings_kwargs) + settings.extra = extra + super().__init__( sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, - settings=DeepgramSageMakerSTTSettings( - model=merged_options.get("model"), - language=merged_options.get("language"), - live_options=merged_live_options, - ), + settings=settings, **kwargs, ) @@ -167,46 +196,12 @@ class DeepgramSageMakerSTTService(STTService): return True async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: - """Apply a settings delta, keeping ``live_options`` in sync. - - Top-level ``model`` and ``language`` are the source of truth. When - they are given in *delta* their values are propagated into - ``live_options``. When only ``live_options`` is given, its ``model`` - and ``language`` are propagated *up* to the top-level fields. - - Any change triggers a reconnect. - """ - # Determine which top-level fields are explicitly provided. - model_given = isinstance(delta, DeepgramSageMakerSTTSettings) and is_given( - getattr(delta, "model", NOT_GIVEN) - ) - language_given = isinstance(delta, DeepgramSageMakerSTTSettings) and is_given( - getattr(delta, "language", NOT_GIVEN) - ) - + """Apply a settings delta and warn about unhandled changes.""" changed = await super()._update_settings(delta) if not changed: return changed - # --- Sync model -------------------------------------------------- - if model_given: - # Top-level model wins → push into live_options. - self._settings.live_options.model = self._settings.model - elif "live_options" in changed and self._settings.live_options.model is not None: - # Only live_options was given → pull model up. - self._settings.model = self._settings.live_options.model - self._sync_model_name_to_metrics() - - # --- Sync language ----------------------------------------------- - if language_given: - lang = self._settings.language - if isinstance(lang, Language): - lang = lang.value - self._settings.live_options.language = lang - elif "live_options" in changed and self._settings.live_options.language is not None: - self._settings.language = self._settings.live_options.language - # TODO: someday we could reconnect here to apply updated settings. # Code might look something like the below: # await self._disconnect() @@ -223,7 +218,6 @@ class DeepgramSageMakerSTTService(STTService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings.live_options.sample_rate = self.sample_rate await self._connect() async def stop(self, frame: EndFrame): @@ -260,6 +254,36 @@ class DeepgramSageMakerSTTService(STTService): yield ErrorFrame(error=f"Unknown error occurred: {e}") yield None + def _build_live_options(self) -> LiveOptions: + """Build a ``LiveOptions`` from flat settings fields, sample rate, and extras. + + Returns: + A fully-populated ``LiveOptions`` ready for the Deepgram SDK. + """ + valid_kwargs = set(inspect.signature(LiveOptions.__init__).parameters) - {"self"} + + # Start with extras that are valid LiveOptions kwargs. + opts: dict[str, Any] = {k: v for k, v in self._settings.extra.items() if k in valid_kwargs} + + # Override with flat settings fields (these take precedence). + s = self._settings + opts.update( + { + "model": s.model, + "language": s.language, + "encoding": s.encoding, + "channels": s.channels, + "interim_results": s.interim_results, + "smart_format": s.smart_format, + "punctuate": s.punctuate, + "profanity_filter": s.profanity_filter, + "vad_events": s.vad_events, + "sample_rate": self.sample_rate, + } + ) + + return LiveOptions(**opts) + async def _connect(self): """Connect to the SageMaker endpoint and start the BiDi session. @@ -269,12 +293,9 @@ class DeepgramSageMakerSTTService(STTService): """ logger.debug("Connecting to Deepgram on SageMaker...") - # Update sample rate in live_options - self._settings.live_options.sample_rate = self.sample_rate - # Build query string from live_options, converting booleans to strings query_params = {} - for key, value in self._settings.live_options.to_dict().items(): + for key, value in self._build_live_options().to_dict().items(): if value is not None: # Convert boolean values to lowercase strings for Deepgram API if isinstance(value, bool): From 8b6aa4b912958ceaa619192fb779e9c20897b037 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 25 Feb 2026 18:18:47 -0500 Subject: [PATCH 113/189] Unflatten `LiveOptions` back into a single `live_options` field on `DeepgramSTTSettings` and `DeepgramSageMakerSTTSettings`; add `apply_update` override with delta-merge semantics and `from_mapping` override for backward-compatible dict-style updates --- ...-update-settings-deepgram-sagemaker-stt.py | 11 +- .../55a-update-settings-deepgram-stt.py | 11 +- src/pipecat/services/deepgram/stt.py | 201 +++++++++++------ .../services/deepgram/stt_sagemaker.py | 196 ++++++++++------ tests/test_settings.py | 211 ++++++++++++++++++ 5 files changed, 496 insertions(+), 134 deletions(-) diff --git a/examples/foundational/55a-update-settings-deepgram-sagemaker-stt.py b/examples/foundational/55a-update-settings-deepgram-sagemaker-stt.py index fba722648..e8094183a 100644 --- a/examples/foundational/55a-update-settings-deepgram-sagemaker-stt.py +++ b/examples/foundational/55a-update-settings-deepgram-sagemaker-stt.py @@ -7,6 +7,7 @@ import asyncio import os +from deepgram import LiveOptions from dotenv import load_dotenv from loguru import logger @@ -106,10 +107,16 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) + # NOTE: after this change, the bot will only respond if you speak Spanish await asyncio.sleep(10) - logger.info("Updating Deepgram SageMaker STT settings: language=es") + logger.info("Updating Deepgram SageMaker STT settings: language=es, punctuate=False") await task.queue_frame( - STTUpdateSettingsFrame(delta=DeepgramSageMakerSTTSettings(language=Language.ES)) + STTUpdateSettingsFrame( + delta=DeepgramSageMakerSTTSettings( + language=Language.ES, + live_options=LiveOptions(punctuate=False), + ) + ) ) # Old-style dict update (for backward-compat testing): diff --git a/examples/foundational/55a-update-settings-deepgram-stt.py b/examples/foundational/55a-update-settings-deepgram-stt.py index 20068ab75..8808f6f4c 100644 --- a/examples/foundational/55a-update-settings-deepgram-stt.py +++ b/examples/foundational/55a-update-settings-deepgram-stt.py @@ -7,6 +7,7 @@ import asyncio import os +from deepgram import LiveOptions from dotenv import load_dotenv from loguru import logger @@ -100,10 +101,16 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) + # NOTE: after this change, the bot will only respond if you speak Spanish await asyncio.sleep(10) - logger.info("Updating Deepgram STT settings: language=es") + logger.info("Updating Deepgram STT settings: language=es, punctuate=False") await task.queue_frame( - STTUpdateSettingsFrame(delta=DeepgramSTTSettings(language=Language.ES)) + STTUpdateSettingsFrame( + delta=DeepgramSTTSettings( + language=Language.ES, + live_options=LiveOptions(punctuate=False), + ) + ) ) # Old-style dict update (for backward-compat testing): diff --git a/src/pipecat/services/deepgram/stt.py b/src/pipecat/services/deepgram/stt.py index 768c4d6eb..f1c2fd19c 100644 --- a/src/pipecat/services/deepgram/stt.py +++ b/src/pipecat/services/deepgram/stt.py @@ -7,8 +7,8 @@ """Deepgram speech-to-text service implementation.""" import inspect -from dataclasses import dataclass, field, fields -from typing import Any, AsyncGenerator, Dict, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, Dict, Mapping, Optional, Type from loguru import logger @@ -25,7 +25,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven +from pipecat.services.settings import _S, NOT_GIVEN, STTSettings, _NotGiven, is_given from pipecat.services.stt_latency import DEEPGRAM_TTFS_P99 from pipecat.services.stt_service import STTService from pipecat.transcriptions.language import Language @@ -52,32 +52,117 @@ except ModuleNotFoundError as e: class DeepgramSTTSettings(STTSettings): """Settings for the Deepgram STT service. - Some commonly used ``LiveOptions`` fields are declared as top-level - fields here so they can be updated individually via - ``STTUpdateSettingsFrame``. Any *additional* ``LiveOptions`` fields - (e.g. ``filler_words``, ``diarize``, ``utterance_end_ms``) can be - passed through the ``extra`` dict — they will be forwarded to - ``LiveOptions`` when the WebSocket connection is (re)established. - This keeps the settings class future-proof: new Deepgram features work - without code changes on the Pipecat side. + Wraps the Deepgram SDK's ``LiveOptions`` in a single ``live_options`` + field. All Deepgram-specific options (``filler_words``, ``diarize``, + ``utterance_end_ms``, etc.) should be passed directly via + ``LiveOptions``. + + In **delta mode** (i.e. when carried by ``STTUpdateSettingsFrame``), + ``live_options`` is treated as a **delta** — its non-None fields are + merged into the stored ``LiveOptions``, not replaced wholesale. For + example, ``DeepgramSTTSettings(live_options=LiveOptions(punctuate=False))`` + changes only ``punctuate`` and leaves all other options intact. Parameters: - encoding: Audio encoding format (e.g. ``"linear16"``). - channels: Number of audio channels. - interim_results: Whether to return interim transcription results. - smart_format: Whether to enable Deepgram smart formatting. - punctuate: Whether to add punctuation to transcripts. - profanity_filter: Whether to filter profanity from transcripts. - vad_events: Whether to enable Deepgram VAD events (deprecated). + live_options: Deepgram ``LiveOptions`` for STT configuration. + In delta mode only its non-None fields are merged into the + stored options. """ - encoding: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - channels: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - interim_results: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - smart_format: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - punctuate: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - profanity_filter: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - vad_events: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + live_options: LiveOptions | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + # Valid LiveOptions __init__ parameter names (cached at class level). + _live_options_params: set[str] | None = field(default=None, init=False, repr=False) + + @classmethod + def _get_live_options_params(cls) -> set[str]: + """Return the set of valid ``LiveOptions.__init__`` parameter names.""" + if cls._live_options_params is None: + cls._live_options_params = set(inspect.signature(LiveOptions.__init__).parameters) - { + "self" + } + return cls._live_options_params + + def apply_update(self: _S, delta: _S) -> Dict[str, Any]: + """Merge a delta into this store, with delta-merge for ``live_options``. + + ``live_options`` is merged field-by-field (non-None fields from the + delta overwrite corresponding fields in the stored options) rather + than being replaced wholesale. + + ``model`` and ``language`` are kept in sync bidirectionally between + the top-level settings fields and ``live_options``. + """ + # Pull live_options out of the delta so super() doesn't replace it. + delta_lo = getattr(delta, "live_options", NOT_GIVEN) + if is_given(delta_lo): + delta.live_options = NOT_GIVEN # type: ignore[assignment] + + # Let the base class handle model, language, extra. + changed = super().apply_update(delta) + + # Sync top-level model/language changes into stored live_options. + if "model" in changed: + self.live_options.model = self.model # type: ignore[union-attr] + if "language" in changed: + self.live_options.language = self.language # type: ignore[union-attr] + + # Merge live_options delta. + if is_given(delta_lo): + old_dict = self.live_options.to_dict() # type: ignore[union-attr] + delta_dict = delta_lo.to_dict() + + if delta_dict: + merged = {**old_dict, **delta_dict} + self.live_options = LiveOptions(**merged) + + for key in delta_dict: + old_val = old_dict.get(key, NOT_GIVEN) + if old_val != delta_dict[key]: + changed[key] = old_val + + # Sync model/language from live_options delta to top-level. + if "model" in delta_dict and delta_dict["model"] != self.model: + changed.setdefault("model", self.model) + self.model = delta_dict["model"] + if "language" in delta_dict and delta_dict["language"] != self.language: + changed.setdefault("language", self.language) + self.language = delta_dict["language"] + + return changed + + @classmethod + def from_mapping(cls: Type[_S], settings: Mapping[str, Any]) -> _S: + """Build a delta from a plain dict, routing LiveOptions keys correctly. + + Keys that are valid ``LiveOptions.__init__`` parameters (and not + top-level ``STTSettings`` fields like ``model`` / ``language``) are + collected into a ``LiveOptions`` object. ``model`` and ``language`` + are routed to the top-level settings fields. Truly unknown keys go + to ``extra``. + """ + lo_params = cls._get_live_options_params() + stt_field_names = {"model", "language"} + + kwargs: Dict[str, Any] = {} + lo_kwargs: Dict[str, Any] = {} + extra: Dict[str, Any] = {} + + for key, value in settings.items(): + canonical = cls._aliases.get(key, key) + if canonical in stt_field_names: + kwargs[canonical] = value + elif canonical in lo_params: + lo_kwargs[canonical] = value + else: + extra[key] = value + + if lo_kwargs: + kwargs["live_options"] = LiveOptions(**lo_kwargs) + + instance = cls(**kwargs) + instance.extra = extra + return instance class DeepgramSTTService(STTService): @@ -124,7 +209,9 @@ class DeepgramSTTService(STTService): base_url: Custom Deepgram API base URL. sample_rate: Audio sample rate. If None, uses default or live_options value. - live_options: Deepgram LiveOptions for detailed configuration. + live_options: Deepgram LiveOptions configuration. Treated as a + delta from a set of sensible defaults — only the fields you + set are overridden; all others keep their default values. addons: Additional Deepgram features to enable. should_interrupt: Determine whether the bot should be interrupted when Deepgram VAD events are enabled and the system detects that the user is speaking. @@ -163,29 +250,26 @@ class DeepgramSTTService(STTService): vad_events=False, ) - merged_options = default_options.to_dict() + merged_dict = default_options.to_dict() if live_options: default_model = default_options.model - merged_options.update(live_options.to_dict()) - # NOTE(aleix): Fixes an in deepgram-sdk where `model` is initialized + merged_dict.update(live_options.to_dict()) + # NOTE(aleix): Fixes a bug in deepgram-sdk where `model` is initialized # to the string "None" instead of the value `None`. - if "model" in merged_options and merged_options["model"] == "None": - merged_options["model"] = default_model + if "model" in merged_dict and merged_dict["model"] == "None": + merged_dict["model"] = default_model - if "language" in merged_options and isinstance(merged_options["language"], Language): - merged_options["language"] = merged_options["language"].value + if "language" in merged_dict and isinstance(merged_dict["language"], Language): + merged_dict["language"] = merged_dict["language"].value - settings_fields = {f.name for f in fields(DeepgramSTTSettings)} - settings_kwargs = {} - extra = {} - for key, value in merged_options.items(): - if key in settings_fields: - settings_kwargs[key] = value - else: - extra[key] = value + # Extract model/language for top-level STTSettings fields; everything + # else lives inside LiveOptions. + model = merged_dict.pop("model", None) + language = merged_dict.pop("language", None) - settings = DeepgramSTTSettings(**settings_kwargs) - settings.extra = extra + settings = DeepgramSTTSettings( + model=model, language=language, live_options=LiveOptions(**merged_dict) + ) super().__init__( sample_rate=sample_rate, @@ -197,7 +281,7 @@ class DeepgramSTTService(STTService): self._addons = addons self._should_interrupt = should_interrupt - if self._settings.vad_events: + if self._settings.live_options.vad_events: import warnings with warnings.catch_warnings(): @@ -228,7 +312,7 @@ class DeepgramSTTService(STTService): Returns: True if VAD events are enabled in the current settings. """ - return self._settings.vad_events + return self._settings.live_options.vad_events def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -290,32 +374,17 @@ class DeepgramSTTService(STTService): yield None def _build_live_options(self) -> LiveOptions: - """Build a ``LiveOptions`` from flat settings fields, sample rate, and extras. + """Build a ``LiveOptions`` from stored settings and sample rate. Returns: A fully-populated ``LiveOptions`` ready for the Deepgram SDK. """ - valid_kwargs = set(inspect.signature(LiveOptions.__init__).parameters) - {"self"} + opts: dict[str, Any] = self._settings.live_options.to_dict() - # Start with extras that are valid LiveOptions kwargs. - opts: dict[str, Any] = {k: v for k, v in self._settings.extra.items() if k in valid_kwargs} - - # Override with flat settings fields (these take precedence). - s = self._settings - opts.update( - { - "model": s.model, - "language": s.language, - "encoding": s.encoding, - "channels": s.channels, - "interim_results": s.interim_results, - "smart_format": s.smart_format, - "punctuate": s.punctuate, - "profanity_filter": s.profanity_filter, - "vad_events": s.vad_events, - "sample_rate": self.sample_rate, - } - ) + # Overlay model/language from top-level settings and sample_rate from service. + opts["model"] = self._settings.model + opts["language"] = self._settings.language + opts["sample_rate"] = self.sample_rate return LiveOptions(**opts) diff --git a/src/pipecat/services/deepgram/stt_sagemaker.py b/src/pipecat/services/deepgram/stt_sagemaker.py index 97309ab93..12357a8cd 100644 --- a/src/pipecat/services/deepgram/stt_sagemaker.py +++ b/src/pipecat/services/deepgram/stt_sagemaker.py @@ -15,8 +15,8 @@ languages, and various Deepgram features. import asyncio import inspect import json -from dataclasses import dataclass, field, fields -from typing import Any, AsyncGenerator, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, Dict, Mapping, Optional, Type from loguru import logger @@ -33,7 +33,7 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.aws.sagemaker.bidi_client import SageMakerBidiClient -from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven +from pipecat.services.settings import _S, NOT_GIVEN, STTSettings, _NotGiven, is_given from pipecat.services.stt_latency import DEEPGRAM_SAGEMAKER_TTFS_P99 from pipecat.services.stt_service import STTService from pipecat.transcriptions.language import Language @@ -54,32 +54,117 @@ except ModuleNotFoundError as e: class DeepgramSageMakerSTTSettings(STTSettings): """Settings for the Deepgram SageMaker STT service. - Some commonly used ``LiveOptions`` fields are declared as top-level - fields here so they can be updated individually via - ``STTUpdateSettingsFrame``. Any *additional* ``LiveOptions`` fields - (e.g. ``filler_words``, ``diarize``, ``utterance_end_ms``) can be - passed through the ``extra`` dict — they will be forwarded to - ``LiveOptions`` when the connection is (re)established. This keeps the - settings class future-proof: new Deepgram features work without code - changes on the Pipecat side. + Wraps the Deepgram SDK's ``LiveOptions`` in a single ``live_options`` + field. All Deepgram-specific options (``filler_words``, ``diarize``, + ``utterance_end_ms``, etc.) should be passed directly via + ``LiveOptions``. + + In **delta mode** (i.e. when carried by ``STTUpdateSettingsFrame``), + ``live_options`` is treated as a **delta** — its non-None fields are + merged into the stored ``LiveOptions``, not replaced wholesale. For + example, ``DeepgramSageMakerSTTSettings(live_options=LiveOptions(punctuate=False))`` + changes only ``punctuate`` and leaves all other options intact. Parameters: - encoding: Audio encoding format (e.g. ``"linear16"``). - channels: Number of audio channels. - interim_results: Whether to return interim transcription results. - smart_format: Whether to enable Deepgram smart formatting. - punctuate: Whether to add punctuation to transcripts. - profanity_filter: Whether to filter profanity from transcripts. - vad_events: Whether to enable Deepgram VAD events (deprecated). + live_options: Deepgram ``LiveOptions`` for STT configuration. + In delta mode only its non-None fields are merged into the + stored options. """ - encoding: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - channels: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - interim_results: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - smart_format: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - punctuate: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - profanity_filter: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - vad_events: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + live_options: LiveOptions | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + # Valid LiveOptions __init__ parameter names (cached at class level). + _live_options_params: set[str] | None = field(default=None, init=False, repr=False) + + @classmethod + def _get_live_options_params(cls) -> set[str]: + """Return the set of valid ``LiveOptions.__init__`` parameter names.""" + if cls._live_options_params is None: + cls._live_options_params = set(inspect.signature(LiveOptions.__init__).parameters) - { + "self" + } + return cls._live_options_params + + def apply_update(self: _S, delta: _S) -> Dict[str, Any]: + """Merge a delta into this store, with delta-merge for ``live_options``. + + ``live_options`` is merged field-by-field (non-None fields from the + delta overwrite corresponding fields in the stored options) rather + than being replaced wholesale. + + ``model`` and ``language`` are kept in sync bidirectionally between + the top-level settings fields and ``live_options``. + """ + # Pull live_options out of the delta so super() doesn't replace it. + delta_lo = getattr(delta, "live_options", NOT_GIVEN) + if is_given(delta_lo): + delta.live_options = NOT_GIVEN # type: ignore[assignment] + + # Let the base class handle model, language, extra. + changed = super().apply_update(delta) + + # Sync top-level model/language changes into stored live_options. + if "model" in changed: + self.live_options.model = self.model # type: ignore[union-attr] + if "language" in changed: + self.live_options.language = self.language # type: ignore[union-attr] + + # Merge live_options delta. + if is_given(delta_lo): + old_dict = self.live_options.to_dict() # type: ignore[union-attr] + delta_dict = delta_lo.to_dict() + + if delta_dict: + merged = {**old_dict, **delta_dict} + self.live_options = LiveOptions(**merged) + + for key in delta_dict: + old_val = old_dict.get(key, NOT_GIVEN) + if old_val != delta_dict[key]: + changed[key] = old_val + + # Sync model/language from live_options delta to top-level. + if "model" in delta_dict and delta_dict["model"] != self.model: + changed.setdefault("model", self.model) + self.model = delta_dict["model"] + if "language" in delta_dict and delta_dict["language"] != self.language: + changed.setdefault("language", self.language) + self.language = delta_dict["language"] + + return changed + + @classmethod + def from_mapping(cls: Type[_S], settings: Mapping[str, Any]) -> _S: + """Build a delta from a plain dict, routing LiveOptions keys correctly. + + Keys that are valid ``LiveOptions.__init__`` parameters (and not + top-level ``STTSettings`` fields like ``model`` / ``language``) are + collected into a ``LiveOptions`` object. ``model`` and ``language`` + are routed to the top-level settings fields. Truly unknown keys go + to ``extra``. + """ + lo_params = cls._get_live_options_params() + stt_field_names = {"model", "language"} + + kwargs: Dict[str, Any] = {} + lo_kwargs: Dict[str, Any] = {} + extra: Dict[str, Any] = {} + + for key, value in settings.items(): + canonical = cls._aliases.get(key, key) + if canonical in stt_field_names: + kwargs[canonical] = value + elif canonical in lo_params: + lo_kwargs[canonical] = value + else: + extra[key] = value + + if lo_kwargs: + kwargs["live_options"] = LiveOptions(**lo_kwargs) + + instance = cls(**kwargs) + instance.extra = extra + return instance class DeepgramSageMakerSTTService(STTService): @@ -130,8 +215,9 @@ class DeepgramSageMakerSTTService(STTService): region: AWS region where the endpoint is deployed (e.g., "us-east-2"). sample_rate: Audio sample rate in Hz. If None, uses value from live_options or defaults to the value from StartFrame. - live_options: Deepgram LiveOptions for detailed configuration. If None, - uses sensible defaults (nova-3 model, English, interim results enabled). + live_options: Deepgram LiveOptions configuration. Treated as a + delta from a set of sensible defaults — only the fields you + set are overridden; all others keep their default values. ttfs_p99_latency: P99 latency from speech end to final transcript in seconds. Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to the parent STTService. @@ -149,29 +235,26 @@ class DeepgramSageMakerSTTService(STTService): ) # Merge with provided options - merged_options = default_options.to_dict() + merged_dict = default_options.to_dict() if live_options: default_model = default_options.model - merged_options.update(live_options.to_dict()) + merged_dict.update(live_options.to_dict()) # Handle the "None" string bug from deepgram-sdk - if "model" in merged_options and merged_options["model"] == "None": - merged_options["model"] = default_model + if "model" in merged_dict and merged_dict["model"] == "None": + merged_dict["model"] = default_model # Convert Language enum to string if needed - if "language" in merged_options and isinstance(merged_options["language"], Language): - merged_options["language"] = merged_options["language"].value + if "language" in merged_dict and isinstance(merged_dict["language"], Language): + merged_dict["language"] = merged_dict["language"].value - settings_fields = {f.name for f in fields(DeepgramSageMakerSTTSettings)} - settings_kwargs = {} - extra = {} - for key, value in merged_options.items(): - if key in settings_fields: - settings_kwargs[key] = value - else: - extra[key] = value + # Extract model/language for top-level STTSettings fields; everything + # else lives inside LiveOptions. + model = merged_dict.pop("model", None) + language = merged_dict.pop("language", None) - settings = DeepgramSageMakerSTTSettings(**settings_kwargs) - settings.extra = extra + settings = DeepgramSageMakerSTTSettings( + model=model, language=language, live_options=LiveOptions(**merged_dict) + ) super().__init__( sample_rate=sample_rate, @@ -255,32 +338,17 @@ class DeepgramSageMakerSTTService(STTService): yield None def _build_live_options(self) -> LiveOptions: - """Build a ``LiveOptions`` from flat settings fields, sample rate, and extras. + """Build a ``LiveOptions`` from stored settings and sample rate. Returns: A fully-populated ``LiveOptions`` ready for the Deepgram SDK. """ - valid_kwargs = set(inspect.signature(LiveOptions.__init__).parameters) - {"self"} + opts: dict[str, Any] = self._settings.live_options.to_dict() - # Start with extras that are valid LiveOptions kwargs. - opts: dict[str, Any] = {k: v for k, v in self._settings.extra.items() if k in valid_kwargs} - - # Override with flat settings fields (these take precedence). - s = self._settings - opts.update( - { - "model": s.model, - "language": s.language, - "encoding": s.encoding, - "channels": s.channels, - "interim_results": s.interim_results, - "smart_format": s.smart_format, - "punctuate": s.punctuate, - "profanity_filter": s.profanity_filter, - "vad_events": s.vad_events, - "sample_rate": self.sample_rate, - } - ) + # Overlay model/language from top-level settings and sample_rate from service. + opts["model"] = self._settings.model + opts["language"] = self._settings.language + opts["sample_rate"] = self.sample_rate return LiveOptions(**opts) diff --git a/tests/test_settings.py b/tests/test_settings.py index 85f89987c..3201e3c24 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -7,7 +7,10 @@ """Tests for the typed settings infrastructure in pipecat.services.settings.""" import pytest +from deepgram import LiveOptions +from pipecat.services.deepgram.stt import DeepgramSTTSettings +from pipecat.services.deepgram.stt_sagemaker import DeepgramSageMakerSTTSettings from pipecat.services.settings import ( NOT_GIVEN, LLMSettings, @@ -311,3 +314,211 @@ class TestRoundtrip: assert changed["model"] == "gpt-4o" assert current.model == "gpt-4o-mini" assert current.temperature == 0.9 + + +# --------------------------------------------------------------------------- +# DeepgramSTTSettings: live_options delta merge +# --------------------------------------------------------------------------- + + +class TestDeepgramSTTSettingsApplyUpdate: + def _make_store(self, **lo_kwargs) -> DeepgramSTTSettings: + """Helper to build a store-mode DeepgramSTTSettings.""" + defaults = dict( + encoding="linear16", + channels=1, + interim_results=True, + smart_format=False, + punctuate=True, + profanity_filter=True, + vad_events=False, + ) + defaults.update(lo_kwargs) + s = DeepgramSTTSettings( + model="nova-3-general", + language="en", + live_options=LiveOptions(**defaults), + ) + return s + + def test_apply_update_merges_live_options_as_delta(self): + """Only the given fields in the delta LiveOptions are merged.""" + current = self._make_store() + assert current.live_options.punctuate is True + + delta = DeepgramSTTSettings(live_options=LiveOptions(punctuate=False)) + changed = current.apply_update(delta) + + assert current.live_options.punctuate is False + assert "punctuate" in changed + # Other fields are untouched + assert current.live_options.encoding == "linear16" + assert current.live_options.channels == 1 + + def test_apply_update_syncs_model_from_live_options_to_top_level(self): + """model inside live_options delta should sync to top-level model.""" + current = self._make_store() + assert current.model == "nova-3-general" + + delta = DeepgramSTTSettings(live_options=LiveOptions(model="nova-2")) + changed = current.apply_update(delta) + + assert current.model == "nova-2" + assert "model" in changed + + def test_apply_update_syncs_language_from_live_options_to_top_level(self): + """language inside live_options delta should sync to top-level language.""" + current = self._make_store() + assert current.language == "en" + + delta = DeepgramSTTSettings(live_options=LiveOptions(language="es")) + changed = current.apply_update(delta) + + assert current.language == "es" + assert "language" in changed + + def test_apply_update_syncs_top_level_model_into_live_options(self): + """Top-level model change should propagate into stored live_options.""" + current = self._make_store() + assert current.model == "nova-3-general" + + delta = DeepgramSTTSettings(model="nova-2") + changed = current.apply_update(delta) + + assert current.model == "nova-2" + assert current.live_options.model == "nova-2" + assert "model" in changed + + def test_apply_update_syncs_top_level_language_into_live_options(self): + """Top-level language change should propagate into stored live_options.""" + current = self._make_store() + + delta = DeepgramSTTSettings(language="fr") + changed = current.apply_update(delta) + + assert current.language == "fr" + assert current.live_options.language == "fr" + assert "language" in changed + + def test_apply_update_no_change(self): + """Delta with same values should report no changes.""" + current = self._make_store() + delta = DeepgramSTTSettings(live_options=LiveOptions(punctuate=True)) + changed = current.apply_update(delta) + assert changed == {} + + +class TestDeepgramSTTSettingsFromMapping: + def test_routes_live_options_kwargs(self): + """LiveOptions-valid keys should be collected into live_options.""" + delta = DeepgramSTTSettings.from_mapping({"punctuate": False, "filler_words": True}) + assert is_given(delta.live_options) + assert delta.live_options.punctuate is False + assert delta.live_options.filler_words is True + + def test_routes_model_and_language_to_top_level(self): + """model and language should be top-level fields, not in live_options.""" + delta = DeepgramSTTSettings.from_mapping({"model": "nova-2", "language": "es"}) + assert delta.model == "nova-2" + assert delta.language == "es" + assert not is_given(delta.live_options) + + def test_unknown_keys_go_to_extra(self): + """Keys that aren't LiveOptions params or STT fields go to extra.""" + delta = DeepgramSTTSettings.from_mapping({"unknown_param": 42}) + assert delta.extra == {"unknown_param": 42} + assert not is_given(delta.live_options) + + def test_mixed_keys(self): + """model + LiveOptions keys + unknown keys are routed correctly.""" + delta = DeepgramSTTSettings.from_mapping( + {"model": "nova-2", "punctuate": False, "unknown": "val"} + ) + assert delta.model == "nova-2" + assert delta.live_options.punctuate is False + assert delta.extra == {"unknown": "val"} + + def test_roundtrip_from_mapping_apply_update(self): + """Simulate dict-style update: from_mapping -> apply_update.""" + current = DeepgramSTTSettings( + model="nova-3-general", + language="en", + live_options=LiveOptions( + encoding="linear16", + channels=1, + interim_results=True, + punctuate=True, + profanity_filter=True, + vad_events=False, + ), + ) + + raw = {"punctuate": False, "filler_words": True} + delta = DeepgramSTTSettings.from_mapping(raw) + changed = current.apply_update(delta) + + assert current.live_options.punctuate is False + assert current.live_options.filler_words is True + # Unchanged fields stay put + assert current.live_options.encoding == "linear16" + assert current.model == "nova-3-general" + assert "punctuate" in changed + + def test_roundtrip_model_via_dict(self): + """Dict update with model should change top-level and NOT create live_options.""" + current = DeepgramSTTSettings( + model="nova-3-general", + language="en", + live_options=LiveOptions(encoding="linear16", channels=1), + ) + + raw = {"model": "nova-2"} + delta = DeepgramSTTSettings.from_mapping(raw) + changed = current.apply_update(delta) + + assert current.model == "nova-2" + assert current.live_options.model == "nova-2" + assert "model" in changed + + +# --------------------------------------------------------------------------- +# DeepgramSageMakerSTTSettings: same pattern +# --------------------------------------------------------------------------- + + +class TestDeepgramSageMakerSTTSettingsApplyUpdate: + def _make_store(self, **lo_kwargs) -> DeepgramSageMakerSTTSettings: + defaults = dict( + encoding="linear16", + channels=1, + interim_results=True, + punctuate=True, + ) + defaults.update(lo_kwargs) + return DeepgramSageMakerSTTSettings( + model="nova-3", + language="en", + live_options=LiveOptions(**defaults), + ) + + def test_apply_update_merges_live_options_as_delta(self): + current = self._make_store() + delta = DeepgramSageMakerSTTSettings(live_options=LiveOptions(punctuate=False)) + changed = current.apply_update(delta) + assert current.live_options.punctuate is False + assert "punctuate" in changed + assert current.live_options.encoding == "linear16" + + def test_apply_update_syncs_model_from_live_options(self): + current = self._make_store() + delta = DeepgramSageMakerSTTSettings(live_options=LiveOptions(model="nova-2")) + current.apply_update(delta) + assert current.model == "nova-2" + + def test_from_mapping_routes_correctly(self): + delta = DeepgramSageMakerSTTSettings.from_mapping( + {"model": "nova-2", "punctuate": False, "unknown": "val"} + ) + assert delta.model == "nova-2" + assert delta.live_options.punctuate is False + assert delta.extra == {"unknown": "val"} From e21e8585f07964a6a1de114b259a7fd3a88cba53 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 25 Feb 2026 18:40:44 -0500 Subject: [PATCH 114/189] Add `deepgram` and `sagemaker` extras to CI test dependencies so Deepgram and Deepgram Sagemaker settings tests can run --- .github/workflows/coverage.yaml | 2 ++ .github/workflows/tests.yaml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/.github/workflows/coverage.yaml b/.github/workflows/coverage.yaml index d65841a7d..26d03861b 100644 --- a/.github/workflows/coverage.yaml +++ b/.github/workflows/coverage.yaml @@ -37,10 +37,12 @@ jobs: uv sync --group dev \ --extra anthropic \ --extra aws \ + --extra deepgram \ --extra google \ --extra langchain \ --extra livekit \ --extra piper \ + --extra sagemaker \ --extra tracing \ --extra websocket diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index a36a2fbd0..b22d502c4 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -41,10 +41,12 @@ jobs: uv sync --group dev \ --extra anthropic \ --extra aws \ + --extra deepgram \ --extra google \ --extra langchain \ --extra livekit \ --extra piper \ + --extra sagemaker \ --extra tracing \ --extra websocket From f7434cdde1a1ca90e3ff44bca9883908fcc06b1c Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Tue, 10 Feb 2026 13:54:57 -0500 Subject: [PATCH 115/189] Add text aggregation time metric for TTS sentence aggregation Add TextAggregationMetricsData measuring the time from the first LLM token to the first complete sentence, representing the latency cost of sentence aggregation in the TTS pipeline. --- changelog/3696.added.md | 1 + src/pipecat/metrics/metrics.py | 13 ++++++++++ src/pipecat/processors/frame_processor.py | 12 +++++++++ .../metrics/frame_processor_metrics.py | 25 +++++++++++++++++++ 4 files changed, 51 insertions(+) create mode 100644 changelog/3696.added.md diff --git a/changelog/3696.added.md b/changelog/3696.added.md new file mode 100644 index 000000000..39726d930 --- /dev/null +++ b/changelog/3696.added.md @@ -0,0 +1 @@ +- Added `TextAggregationMetricsData` metric measuring the time from the first LLM token to the first complete sentence, representing the latency cost of sentence aggregation in the TTS pipeline. diff --git a/src/pipecat/metrics/metrics.py b/src/pipecat/metrics/metrics.py index ccf30227a..2030306e5 100644 --- a/src/pipecat/metrics/metrics.py +++ b/src/pipecat/metrics/metrics.py @@ -87,6 +87,19 @@ class TTSUsageMetricsData(MetricsData): value: int +class TextAggregationMetricsData(MetricsData): + """Text aggregation time metrics data. + + Measures the time from the first LLM token to the first complete sentence, + representing the latency cost of sentence aggregation in the TTS pipeline. + + Parameters: + value: Aggregation time in seconds. + """ + + value: float + + class TurnMetricsData(MetricsData): """Metrics data for turn detection predictions. diff --git a/src/pipecat/processors/frame_processor.py b/src/pipecat/processors/frame_processor.py index bcdb2d57b..37e8dc10d 100644 --- a/src/pipecat/processors/frame_processor.py +++ b/src/pipecat/processors/frame_processor.py @@ -485,6 +485,18 @@ class FrameProcessor(BaseObject): if frame: await self.push_frame(frame) + async def start_text_aggregation_metrics(self): + """Start text aggregation time metrics collection.""" + if self.can_generate_metrics() and self.metrics_enabled: + await self._metrics.start_text_aggregation_metrics() + + async def stop_text_aggregation_metrics(self): + """Stop text aggregation time metrics collection and push results.""" + if self.can_generate_metrics() and self.metrics_enabled: + frame = await self._metrics.stop_text_aggregation_metrics() + if frame: + await self.push_frame(frame) + async def stop_all_metrics(self): """Stop all active metrics collection.""" await self.stop_ttfb_metrics() diff --git a/src/pipecat/processors/metrics/frame_processor_metrics.py b/src/pipecat/processors/metrics/frame_processor_metrics.py index c82fd9698..cb5bc8a42 100644 --- a/src/pipecat/processors/metrics/frame_processor_metrics.py +++ b/src/pipecat/processors/metrics/frame_processor_metrics.py @@ -17,6 +17,7 @@ from pipecat.metrics.metrics import ( LLMUsageMetricsData, MetricsData, ProcessingMetricsData, + TextAggregationMetricsData, TTFBMetricsData, TTSUsageMetricsData, ) @@ -211,3 +212,27 @@ class FrameProcessorMetrics(BaseObject): ) logger.debug(f"{self._processor_name()} usage characters: {characters.value}") return MetricsFrame(data=[characters]) + + async def start_text_aggregation_metrics(self): + """Start measuring text aggregation time (first token to first sentence).""" + self._start_text_aggregation_time = time.time() + + async def stop_text_aggregation_metrics(self): + """Stop text aggregation measurement and generate metrics frame. + + Returns: + MetricsFrame containing text aggregation time, or None if not measuring. + """ + if ( + not hasattr(self, "_start_text_aggregation_time") + or self._start_text_aggregation_time == 0 + ): + return None + + value = time.time() - self._start_text_aggregation_time + logger.debug(f"{self._processor_name()} text aggregation time: {value}") + aggregation = TextAggregationMetricsData( + processor=self._processor_name(), value=value, model=self._model_name() + ) + self._start_text_aggregation_time = 0 + return MetricsFrame(data=[aggregation]) From d69a337def945729f10fd444d29f1d21203cef8a Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Tue, 10 Feb 2026 13:55:19 -0500 Subject: [PATCH 116/189] Add text_aggregation_mode parameter to TTSService Move the sentence vs token aggregation concern into text aggregators so all text flows through them regardless of mode. This enables pattern detection and tag handling to work in TOKEN mode. - Add TextAggregationMode enum (SENTENCE, TOKEN) as the user-facing TTS setting, separate from the internal AggregationType - Add TOKEN mode support to Simple, SkipTags, and PatternPair aggregators - Add text_aggregation_mode parameter to TTSService and all TTS subclasses - Deprecate aggregate_sentences in favor of text_aggregation_mode - Merge TTSService._process_text_frame() into a single codepath --- changelog/3696.changed.md | 1 + changelog/3696.deprecated.md | 1 + examples/foundational/07-interruptible.py | 2 + src/pipecat/frames/frames.py | 12 +- src/pipecat/services/asyncai/tts.py | 13 +- src/pipecat/services/azure/tts.py | 15 +- src/pipecat/services/cartesia/tts.py | 33 +++-- src/pipecat/services/elevenlabs/tts.py | 28 +++- src/pipecat/services/inworld/tts.py | 13 +- src/pipecat/services/neuphonic/tts.py | 13 +- src/pipecat/services/rime/tts.py | 29 +++- src/pipecat/services/sarvam/tts.py | 13 +- src/pipecat/services/tts_service.py | 130 ++++++++++++++---- .../utils/text/base_text_aggregator.py | 20 +++ .../utils/text/pattern_pair_aggregator.py | 41 ++++-- .../utils/text/simple_text_aggregator.py | 32 +++-- .../utils/text/skip_tags_aggregator.py | 29 +++- tests/test_pattern_pair_aggregator.py | 61 ++++++++ tests/test_simple_text_aggregator.py | 34 +++++ tests/test_skip_tags_aggregator.py | 55 ++++++++ 20 files changed, 480 insertions(+), 95 deletions(-) create mode 100644 changelog/3696.changed.md create mode 100644 changelog/3696.deprecated.md diff --git a/changelog/3696.changed.md b/changelog/3696.changed.md new file mode 100644 index 000000000..a495560ba --- /dev/null +++ b/changelog/3696.changed.md @@ -0,0 +1 @@ +- Added `text_aggregation_mode` parameter to `TTSService` and all TTS subclasses with a new `TextAggregationMode` enum (`SENTENCE`, `TOKEN`). All text now flows through text aggregators regardless of mode, enabling pattern detection and tag handling in TOKEN mode. diff --git a/changelog/3696.deprecated.md b/changelog/3696.deprecated.md new file mode 100644 index 000000000..7b371fc21 --- /dev/null +++ b/changelog/3696.deprecated.md @@ -0,0 +1 @@ +- ⚠️ Deprecated `aggregate_sentences` parameter on `TTSService` and all TTS subclasses. Use `text_aggregation_mode=TextAggregationMode.SENTENCE` or `text_aggregation_mode=TextAggregationMode.TOKEN` instead. diff --git a/examples/foundational/07-interruptible.py b/examples/foundational/07-interruptible.py index c5964506a..e47d2c811 100644 --- a/examples/foundational/07-interruptible.py +++ b/examples/foundational/07-interruptible.py @@ -24,6 +24,7 @@ from pipecat.runner.utils import create_transport from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.tts_service import TextAggregationMode from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams @@ -56,6 +57,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): tts = CartesiaTTSService( api_key=os.getenv("CARTESIA_API_KEY"), voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + text_aggregation_mode=TextAggregationMode.TOKEN, ) llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index d359bcfb1..55ae975d1 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -14,7 +14,6 @@ and LLM processing. import asyncio import time from dataclasses import dataclass, field -from enum import Enum from typing import ( TYPE_CHECKING, Any, @@ -36,6 +35,7 @@ from pipecat.audio.turn.base_turn_analyzer import BaseTurnParams from pipecat.audio.vad.vad_analyzer import VADParams from pipecat.metrics.metrics import MetricsData from pipecat.transcriptions.language import Language +from pipecat.utils.text.base_text_aggregator import AggregationType from pipecat.utils.time import nanoseconds_to_str from pipecat.utils.utils import obj_count, obj_id @@ -393,16 +393,6 @@ class LLMTextFrame(TextFrame): self.includes_inter_frame_spaces = True -class AggregationType(str, Enum): - """Built-in aggregation strings.""" - - SENTENCE = "sentence" - WORD = "word" - - def __str__(self): - return self.value - - @dataclass class AggregatedTextFrame(TextFrame): """Text frame representing an aggregation of TextFrames. diff --git a/src/pipecat/services/asyncai/tts.py b/src/pipecat/services/asyncai/tts.py index 334f80d80..4f1fd5a58 100644 --- a/src/pipecat/services/asyncai/tts.py +++ b/src/pipecat/services/asyncai/tts.py @@ -28,7 +28,7 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven -from pipecat.services.tts_service import AudioContextTTSService, TTSService +from pipecat.services.tts_service import AudioContextTTSService, TextAggregationMode, TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -128,7 +128,8 @@ class AsyncAITTSService(AudioContextTTSService): encoding: str = "pcm_s16le", container: str = "raw", params: Optional[InputParams] = None, - aggregate_sentences: Optional[bool] = True, + aggregate_sentences: Optional[bool] = None, + text_aggregation_mode: Optional[TextAggregationMode] = None, **kwargs, ): """Initialize the Async TTS service. @@ -144,13 +145,19 @@ class AsyncAITTSService(AudioContextTTSService): encoding: Audio encoding format. container: Audio container format. params: Additional input parameters for voice customization. - aggregate_sentences: Whether to aggregate sentences within the TTSService. + aggregate_sentences: Deprecated. Use text_aggregation_mode instead. + + .. deprecated:: 0.0.104 + Use ``text_aggregation_mode`` instead. + + text_aggregation_mode: How to aggregate text before synthesis. **kwargs: Additional arguments passed to the parent service. """ params = params or AsyncAITTSService.InputParams() super().__init__( aggregate_sentences=aggregate_sentences, + text_aggregation_mode=text_aggregation_mode, pause_frame_processing=True, push_stop_frames=True, sample_rate=sample_rate, diff --git a/src/pipecat/services/azure/tts.py b/src/pipecat/services/azure/tts.py index b3534b28e..f68694eb5 100644 --- a/src/pipecat/services/azure/tts.py +++ b/src/pipecat/services/azure/tts.py @@ -27,7 +27,7 @@ from pipecat.frames.frames import ( from pipecat.processors.frame_processor import FrameDirection from pipecat.services.azure.common import language_to_azure_language from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven -from pipecat.services.tts_service import TTSService +from pipecat.services.tts_service import TextAggregationMode, TTSService from pipecat.transcriptions.language import Language from pipecat.utils.tracing.service_decorators import traced_tts @@ -256,7 +256,8 @@ class AzureTTSService(TTSService, AzureBaseTTSService): voice: str = "en-US-SaraNeural", sample_rate: Optional[int] = None, params: Optional[AzureBaseTTSService.InputParams] = None, - aggregate_sentences: bool = True, + aggregate_sentences: Optional[bool] = None, + text_aggregation_mode: Optional[TextAggregationMode] = None, **kwargs, ): """Initialize the Azure streaming TTS service. @@ -267,13 +268,19 @@ class AzureTTSService(TTSService, AzureBaseTTSService): voice: Voice name to use for synthesis. Defaults to "en-US-SaraNeural". sample_rate: Audio sample rate in Hz. If None, uses service default. params: Voice and synthesis parameters configuration. - aggregate_sentences: Whether to aggregate sentences before synthesis. - **kwargs: Additional arguments passed to the parent TTSService. + aggregate_sentences: Deprecated. Use text_aggregation_mode instead. + + .. deprecated:: 0.0.104 + Use ``text_aggregation_mode`` instead. + + text_aggregation_mode: How to aggregate text before synthesis. + **kwargs: Additional arguments passed to parent WordTTSService. """ params = params or AzureBaseTTSService.InputParams() super().__init__( aggregate_sentences=aggregate_sentences, + text_aggregation_mode=text_aggregation_mode, push_text_frames=False, # We'll push text frames based on word timestamps push_stop_frames=True, pause_frame_processing=True, diff --git a/src/pipecat/services/cartesia/tts.py b/src/pipecat/services/cartesia/tts.py index edf838e59..0749af062 100644 --- a/src/pipecat/services/cartesia/tts.py +++ b/src/pipecat/services/cartesia/tts.py @@ -27,7 +27,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven -from pipecat.services.tts_service import AudioContextTTSService, TTSService +from pipecat.services.tts_service import AudioContextTTSService, TextAggregationMode, TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.text.base_text_aggregator import BaseTextAggregator from pipecat.utils.text.skip_tags_aggregator import SkipTagsAggregator @@ -272,7 +272,8 @@ class CartesiaTTSService(AudioContextTTSService): container: str = "raw", params: Optional[InputParams] = None, text_aggregator: Optional[BaseTextAggregator] = None, - aggregate_sentences: Optional[bool] = True, + text_aggregation_mode: Optional[TextAggregationMode] = None, + aggregate_sentences: Optional[bool] = None, **kwargs, ): """Initialize the Cartesia TTS service. @@ -292,13 +293,19 @@ class CartesiaTTSService(AudioContextTTSService): .. deprecated:: 0.0.95 Use an LLMTextProcessor before the TTSService for custom text aggregation. + text_aggregation_mode: How to aggregate incoming text before synthesis. aggregate_sentences: Whether to aggregate sentences within the TTSService. + + .. deprecated:: 0.0.104 + Use ``text_aggregation_mode`` instead. + **kwargs: Additional arguments passed to the parent service. """ - # Aggregating sentences still gives cleaner-sounding results and fewer - # artifacts than streaming one word at a time. On average, waiting for a - # full sentence should only "cost" us 15ms or so with GPT-4o or a Llama - # 3 model, and it's worth it for the better audio quality. + # By default, we aggregate sentences before sending to TTS. This adds + # ~200-300ms of latency per sentence (waiting for the sentence-ending + # punctuation token from the LLM). Setting aggregate_sentences=False + # streams tokens directly, which reduces latency. Streaming quality + # is good but less tested than sentence aggregation. # # We also don't want to automatically push LLM response text frames, # because the context aggregators will add them to the LLM context even @@ -308,6 +315,7 @@ class CartesiaTTSService(AudioContextTTSService): params = params or CartesiaTTSService.InputParams() super().__init__( + text_aggregation_mode=text_aggregation_mode, aggregate_sentences=aggregate_sentences, push_text_frames=False, pause_frame_processing=True, @@ -337,7 +345,9 @@ class CartesiaTTSService(AudioContextTTSService): # The preferred way of taking advantage of Cartesia SSML Tags is # to use an LLMTextProcessor and/or a text_transformer to identify # and insert these tags for the purpose of the TTS service alone. - self._text_aggregator = SkipTagsAggregator([("", "")]) + self._text_aggregator = SkipTagsAggregator( + [("", "")], aggregation_type=self._text_aggregation_mode + ) self._api_key = api_key self._cartesia_version = cartesia_version @@ -639,7 +649,10 @@ class CartesiaTTSService(AudioContextTTSService): Yields: Frame: Audio frames containing the synthesized speech. """ - logger.debug(f"{self}: Generating TTS [{text}]") + if not self._is_streaming_tokens: + logger.debug(f"{self}: Generating TTS [{text}]") + else: + logger.trace(f"{self}: Generating TTS [{text}]") try: if not self._websocket or self._websocket.state is State.CLOSED: @@ -654,7 +667,9 @@ class CartesiaTTSService(AudioContextTTSService): try: await self._get_websocket().send(msg) - await self.start_tts_usage_metrics(text) + # Usage metrics are aggregated at flush time when streaming tokens. + if not self._is_streaming_tokens: + await self.start_tts_usage_metrics(text) except Exception as e: yield ErrorFrame(error=f"Unknown error occurred: {e}") yield TTSStoppedFrame(context_id=context_id) diff --git a/src/pipecat/services/elevenlabs/tts.py b/src/pipecat/services/elevenlabs/tts.py index c68d005f1..dcfdebc2f 100644 --- a/src/pipecat/services/elevenlabs/tts.py +++ b/src/pipecat/services/elevenlabs/tts.py @@ -47,6 +47,7 @@ from pipecat.processors.frame_processor import FrameDirection from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import ( AudioContextTTSService, + TextAggregationMode, TTSService, ) from pipecat.transcriptions.language import Language, resolve_language @@ -365,7 +366,8 @@ class ElevenLabsTTSService(AudioContextTTSService): url: str = "wss://api.elevenlabs.io", sample_rate: Optional[int] = None, params: Optional[InputParams] = None, - aggregate_sentences: Optional[bool] = True, + text_aggregation_mode: Optional[TextAggregationMode] = None, + aggregate_sentences: Optional[bool] = None, **kwargs, ): """Initialize the ElevenLabs TTS service. @@ -377,13 +379,19 @@ class ElevenLabsTTSService(AudioContextTTSService): url: WebSocket URL for ElevenLabs TTS API. sample_rate: Audio sample rate. If None, uses default. params: Additional input parameters for voice customization. + text_aggregation_mode: How to aggregate incoming text before synthesis. aggregate_sentences: Whether to aggregate sentences within the TTSService. + + .. deprecated:: 0.0.104 + Use ``text_aggregation_mode`` instead. + **kwargs: Additional arguments passed to the parent service. """ - # Aggregating sentences still gives cleaner-sounding results and fewer - # artifacts than streaming one word at a time. On average, waiting for a - # full sentence should only "cost" us 15ms or so with GPT-4o or a Llama - # 3 model, and it's worth it for the better audio quality. + # By default, we aggregate sentences before sending to TTS. This adds + # ~200-300ms of latency per sentence (waiting for the sentence-ending + # punctuation token from the LLM). Setting aggregate_sentences=False + # streams tokens directly. To use this mode, you must set auto_mode=False. + # This eliminates aggregation time, but slows down ElevenLabs. # # We also don't want to automatically push LLM response text frames, # because the context aggregators will add them to the LLM context even @@ -397,6 +405,7 @@ class ElevenLabsTTSService(AudioContextTTSService): params = params or ElevenLabsTTSService.InputParams() super().__init__( + text_aggregation_mode=text_aggregation_mode, aggregate_sentences=aggregate_sentences, push_text_frames=False, push_stop_frames=True, @@ -893,7 +902,8 @@ class ElevenLabsHttpTTSService(TTSService): base_url: str = "https://api.elevenlabs.io", sample_rate: Optional[int] = None, params: Optional[InputParams] = None, - aggregate_sentences: Optional[bool] = True, + text_aggregation_mode: Optional[TextAggregationMode] = None, + aggregate_sentences: Optional[bool] = None, **kwargs, ): """Initialize the ElevenLabs HTTP TTS service. @@ -906,12 +916,18 @@ class ElevenLabsHttpTTSService(TTSService): base_url: Base URL for ElevenLabs HTTP API. sample_rate: Audio sample rate. If None, uses default. params: Additional input parameters for voice customization. + text_aggregation_mode: How to aggregate incoming text before synthesis. aggregate_sentences: Whether to aggregate sentences within the TTSService. + + .. deprecated:: 0.0.104 + Use ``text_aggregation_mode`` instead. + **kwargs: Additional arguments passed to the parent service. """ params = params or ElevenLabsHttpTTSService.InputParams() super().__init__( + text_aggregation_mode=text_aggregation_mode, aggregate_sentences=aggregate_sentences, push_text_frames=False, push_stop_frames=True, diff --git a/src/pipecat/services/inworld/tts.py b/src/pipecat/services/inworld/tts.py index 2fb86b4a6..d3f64c16f 100644 --- a/src/pipecat/services/inworld/tts.py +++ b/src/pipecat/services/inworld/tts.py @@ -51,7 +51,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.tts_service import AudioContextTTSService, TTSService +from pipecat.services.tts_service import AudioContextTTSService, TextAggregationMode, TTSService from pipecat.utils.tracing.service_decorators import traced_tts @@ -509,7 +509,8 @@ class InworldTTSService(AudioContextTTSService): sample_rate: Optional[int] = None, encoding: str = "LINEAR16", params: InputParams = None, - aggregate_sentences: bool = True, + aggregate_sentences: Optional[bool] = None, + text_aggregation_mode: Optional[TextAggregationMode] = None, append_trailing_space: bool = True, **kwargs: Any, ): @@ -523,7 +524,12 @@ class InworldTTSService(AudioContextTTSService): sample_rate: Audio sample rate in Hz. encoding: Audio encoding format. params: Input parameters for Inworld WebSocket TTS configuration. - aggregate_sentences: Whether to aggregate sentences before synthesis. + aggregate_sentences: Deprecated. Use text_aggregation_mode instead. + + .. deprecated:: 0.0.104 + Use ``text_aggregation_mode`` instead. + + text_aggregation_mode: How to aggregate text before synthesis. append_trailing_space: Whether to append a trailing space to text before sending to TTS. **kwargs: Additional arguments passed to the parent class. """ @@ -536,6 +542,7 @@ class InworldTTSService(AudioContextTTSService): supports_word_timestamps=True, sample_rate=sample_rate, aggregate_sentences=aggregate_sentences, + text_aggregation_mode=text_aggregation_mode, append_trailing_space=append_trailing_space, settings=InworldTTSSettings( model=model, diff --git a/src/pipecat/services/neuphonic/tts.py b/src/pipecat/services/neuphonic/tts.py index 81b366a8b..63411c3eb 100644 --- a/src/pipecat/services/neuphonic/tts.py +++ b/src/pipecat/services/neuphonic/tts.py @@ -36,7 +36,7 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven -from pipecat.services.tts_service import InterruptibleTTSService, TTSService +from pipecat.services.tts_service import InterruptibleTTSService, TextAggregationMode, TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -119,7 +119,8 @@ class NeuphonicTTSService(InterruptibleTTSService): sample_rate: Optional[int] = 22050, encoding: str = "pcm_linear", params: Optional[InputParams] = None, - aggregate_sentences: Optional[bool] = True, + aggregate_sentences: Optional[bool] = None, + text_aggregation_mode: Optional[TextAggregationMode] = None, **kwargs, ): """Initialize the Neuphonic TTS service. @@ -131,13 +132,19 @@ class NeuphonicTTSService(InterruptibleTTSService): sample_rate: Audio sample rate in Hz. Defaults to 22050. encoding: Audio encoding format. Defaults to "pcm_linear". params: Additional input parameters for TTS configuration. - aggregate_sentences: Whether to aggregate sentences within the TTSService. + aggregate_sentences: Deprecated. Use text_aggregation_mode instead. + + .. deprecated:: 0.0.104 + Use ``text_aggregation_mode`` instead. + + text_aggregation_mode: How to aggregate text before synthesis. **kwargs: Additional arguments passed to parent InterruptibleTTSService. """ params = params or NeuphonicTTSService.InputParams() super().__init__( aggregate_sentences=aggregate_sentences, + text_aggregation_mode=text_aggregation_mode, push_stop_frames=True, stop_frame_timeout_s=2.0, sample_rate=sample_rate, diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index 059db8178..d5f97e028 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -35,6 +35,7 @@ from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import ( AudioContextTTSService, InterruptibleTTSService, + TextAggregationMode, TTSService, ) from pipecat.transcriptions.language import Language, resolve_language @@ -181,7 +182,8 @@ class RimeTTSService(AudioContextTTSService): sample_rate: Optional[int] = None, params: Optional[InputParams] = None, text_aggregator: Optional[BaseTextAggregator] = None, - aggregate_sentences: Optional[bool] = True, + text_aggregation_mode: Optional[TextAggregationMode] = None, + aggregate_sentences: Optional[bool] = None, **kwargs, ): """Initialize Rime TTS service. @@ -198,13 +200,19 @@ class RimeTTSService(AudioContextTTSService): .. deprecated:: 0.0.95 Use an LLMTextProcessor before the TTSService for custom text aggregation. - aggregate_sentences: Whether to aggregate sentences within the TTSService. + text_aggregation_mode: How to aggregate incoming text before synthesis. + aggregate_sentences: Deprecated. Use text_aggregation_mode instead. + + .. deprecated:: 0.0.104 + Use ``text_aggregation_mode`` instead. + **kwargs: Additional arguments passed to parent class. """ # Initialize with parent class settings for proper frame handling params = params or RimeTTSService.InputParams() super().__init__( + text_aggregation_mode=text_aggregation_mode, aggregate_sentences=aggregate_sentences, push_text_frames=False, push_stop_frames=True, @@ -243,7 +251,9 @@ class RimeTTSService(AudioContextTTSService): # The preferred way of taking advantage of Rime spelling is # to use an LLMTextProcessor and/or a text_transformer to identify # and insert these tags for the purpose of the TTS service alone. - self._text_aggregator = SkipTagsAggregator([("spell(", ")")]) + self._text_aggregator = SkipTagsAggregator( + [("spell(", ")")], aggregation_type=self._text_aggregation_mode + ) # Store service configuration self._api_key = api_key @@ -826,7 +836,8 @@ class RimeNonJsonTTSService(InterruptibleTTSService): audio_format: str = "pcm", sample_rate: Optional[int] = None, params: Optional[InputParams] = None, - aggregate_sentences: Optional[bool] = True, + aggregate_sentences: Optional[bool] = None, + text_aggregation_mode: Optional[TextAggregationMode] = None, **kwargs, ): """Initialize Rime Non-JSON WebSocket TTS service. @@ -839,13 +850,21 @@ class RimeNonJsonTTSService(InterruptibleTTSService): audio_format: Audio format to use. sample_rate: Audio sample rate in Hz. params: Additional configuration parameters. - aggregate_sentences: Whether to aggregate sentences within the TTSService. + aggregate_sentences: Deprecated. Use text_aggregation_mode instead. + + .. deprecated:: 0.0.104 + Use ``text_aggregation_mode`` instead. Set to ``TextAggregationMode.SENTENCE`` + to aggregate text into sentences before synthesis, or + ``TextAggregationMode.TOKEN`` to stream tokens directly for lower latency. + + text_aggregation_mode: How to aggregate text before synthesis. **kwargs: Additional arguments passed to parent class. """ params = params or RimeNonJsonTTSService.InputParams() super().__init__( sample_rate=sample_rate, aggregate_sentences=aggregate_sentences, + text_aggregation_mode=text_aggregation_mode, push_stop_frames=True, pause_frame_processing=True, append_trailing_space=True, diff --git a/src/pipecat/services/sarvam/tts.py b/src/pipecat/services/sarvam/tts.py index 7b63828a1..87604a9f9 100644 --- a/src/pipecat/services/sarvam/tts.py +++ b/src/pipecat/services/sarvam/tts.py @@ -63,7 +63,7 @@ from pipecat.frames.frames import ( from pipecat.processors.frame_processor import FrameDirection from pipecat.services.sarvam._sdk import sdk_headers from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven -from pipecat.services.tts_service import InterruptibleTTSService, TTSService +from pipecat.services.tts_service import InterruptibleTTSService, TextAggregationMode, TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -785,7 +785,8 @@ class SarvamTTSService(InterruptibleTTSService): model: str = "bulbul:v2", voice_id: Optional[str] = None, url: str = "wss://api.sarvam.ai/text-to-speech/ws", - aggregate_sentences: Optional[bool] = True, + aggregate_sentences: Optional[bool] = None, + text_aggregation_mode: Optional[TextAggregationMode] = None, sample_rate: Optional[int] = None, params: Optional[InputParams] = None, **kwargs, @@ -799,7 +800,12 @@ class SarvamTTSService(InterruptibleTTSService): - "bulbul:v3-beta": Advanced model with temperature control voice_id: Speaker voice ID. If None, uses model-appropriate default. url: WebSocket URL for the TTS backend (default production URL). - aggregate_sentences: Merge multiple sentences into one audio chunk (default True). + aggregate_sentences: Deprecated. Use text_aggregation_mode instead. + + .. deprecated:: 0.0.104 + Use ``text_aggregation_mode`` instead. + + text_aggregation_mode: How to aggregate text before synthesis. sample_rate: Output audio sample rate in Hz (8000, 16000, 22050, 24000). If None, uses model-specific default. params: Optional input parameters to override defaults. @@ -834,6 +840,7 @@ class SarvamTTSService(InterruptibleTTSService): # Initialize parent class first super().__init__( aggregate_sentences=aggregate_sentences, + text_aggregation_mode=text_aggregation_mode, push_text_frames=True, pause_frame_processing=True, push_stop_frames=True, diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index e36d4754f..8c61f225d 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -11,6 +11,7 @@ import uuid import warnings from abc import abstractmethod from dataclasses import dataclass +from enum import Enum from typing import ( Any, AsyncGenerator, @@ -72,6 +73,23 @@ class TTSContext: append_to_context: bool = True +class TextAggregationMode(str, Enum): + """Controls how incoming text is aggregated before TTS synthesis. + + Parameters: + SENTENCE: Buffer text until sentence boundaries are detected before synthesis. + Produces more natural speech but adds latency (~200-300ms per sentence). + TOKEN: Stream text tokens directly to TTS as they arrive. + Reduces latency but may affect speech quality depending on the TTS provider. + """ + + SENTENCE = "sentence" + TOKEN = "token" + + def __str__(self): + return self.value + + class TTSService(AIService): """Base class for text-to-speech services. @@ -109,7 +127,8 @@ class TTSService(AIService): def __init__( self, *, - aggregate_sentences: bool = True, + text_aggregation_mode: Optional[TextAggregationMode] = None, + aggregate_sentences: Optional[bool] = None, # if True, TTSService will push TextFrames and LLMFullResponseEndFrames, # otherwise subclass must do it push_text_frames: bool = True, @@ -153,7 +172,16 @@ class TTSService(AIService): """Initialize the TTS service. Args: + text_aggregation_mode: How to aggregate incoming text before synthesis. + TextAggregationMode.SENTENCE (default) buffers until sentence boundaries, + TextAggregationMode.TOKEN streams tokens directly for lower latency. aggregate_sentences: Whether to aggregate text into sentences before synthesis. + + .. deprecated:: 0.0.104 + Use ``text_aggregation_mode`` instead. Set to ``TextAggregationMode.SENTENCE`` + to aggregate text into sentences before synthesis, or + ``TextAggregationMode.TOKEN`` to stream tokens directly for lower latency. + push_text_frames: Whether to push TextFrames and LLMFullResponseEndFrames. push_stop_frames: Whether to automatically push TTSStoppedFrames. stop_frame_timeout_s: Idle time before pushing TTSStoppedFrame when push_stop_frames is True. @@ -194,7 +222,33 @@ class TTSService(AIService): or TTSSettings(), **kwargs, ) - self._aggregate_sentences: bool = aggregate_sentences + + # Resolve text_aggregation_mode from the new param or deprecated aggregate_sentences + if aggregate_sentences is not None: + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "Parameter 'aggregate_sentences' is deprecated. " + "Use 'text_aggregation_mode=TextAggregationMode.SENTENCE' or " + "'text_aggregation_mode=TextAggregationMode.TOKEN' instead.", + DeprecationWarning, + stacklevel=2, + ) + if text_aggregation_mode is None: + text_aggregation_mode = ( + TextAggregationMode.SENTENCE + if aggregate_sentences + else TextAggregationMode.TOKEN + ) + + if text_aggregation_mode is None: + text_aggregation_mode = TextAggregationMode.SENTENCE + + self._text_aggregation_mode: TextAggregationMode = text_aggregation_mode + # Keep for backward compat with subclasses that read self._aggregate_sentences + self._aggregate_sentences: bool = text_aggregation_mode != TextAggregationMode.TOKEN self._push_text_frames: bool = push_text_frames self._push_stop_frames: bool = push_stop_frames self._stop_frame_timeout_s: float = stop_frame_timeout_s @@ -204,7 +258,9 @@ class TTSService(AIService): self._append_trailing_space: bool = append_trailing_space self._init_sample_rate = sample_rate self._sample_rate = 0 - self._text_aggregator: BaseTextAggregator = text_aggregator or SimpleTextAggregator() + self._text_aggregator: BaseTextAggregator = text_aggregator or SimpleTextAggregator( + aggregation_type=self._text_aggregation_mode + ) if text_aggregator: import warnings @@ -240,6 +296,8 @@ class TTSService(AIService): self._processing_text: bool = False self._tts_contexts: Dict[str, TTSContext] = {} + self._streaming_text_log: str = "" + self._aggregation_logged: bool = False # Word timestamp state (active when supports_word_timestamps=True) self._supports_word_timestamps: bool = supports_word_timestamps @@ -253,6 +311,11 @@ class TTSService(AIService): self._register_event_handler("on_connection_error") self._register_event_handler("on_tts_request") + @property + def _is_streaming_tokens(self) -> bool: + """Whether the service is streaming tokens directly without sentence aggregation.""" + return self._text_aggregation_mode == TextAggregationMode.TOKEN + @property def sample_rate(self) -> int: """Get the current sample rate for audio output. @@ -511,6 +574,9 @@ class TTSService(AIService): and not isinstance(frame, InterimTranscriptionFrame) and not isinstance(frame, TranscriptionFrame) ): + if not self._is_streaming_tokens and not self._aggregation_logged: + await self.start_text_aggregation_metrics() + self._aggregation_logged = True await self._process_text_frame(frame) elif isinstance(frame, InterruptionFrame): await self._handle_interruption(frame, direction) @@ -527,8 +593,18 @@ class TTSService(AIService): # Flush any remaining text (including text waiting for lookahead) remaining = await self._text_aggregator.flush() if remaining: + # If this is the first (and only) sentence, stop the aggregation metric. + await self.stop_text_aggregation_metrics() await self._push_tts_frames(AggregatedTextFrame(remaining.text, remaining.type)) + self._aggregation_logged = False + + # Log accumulated streamed text and emit aggregated usage metric. + if self._streaming_text_log: + logger.debug(f"{self}: Generating TTS [{self._streaming_text_log}]") + await self.start_tts_usage_metrics(self._streaming_text_log) + self._streaming_text_log = "" + # Reset aggregator state self._processing_text = False if isinstance(frame, LLMFullResponseEndFrame): @@ -690,26 +766,18 @@ class TTSService(AIService): await self.resume_processing_frames() async def _process_text_frame(self, frame: TextFrame): - text: Optional[str] = None - includes_inter_frame_spaces: bool = False - if not self._aggregate_sentences: - text = frame.text - includes_inter_frame_spaces = frame.includes_inter_frame_spaces - aggregated_by = "token" - - if text: - logger.trace(f"Pushing TTS frames for text: {text}, {aggregated_by}") - await self._push_tts_frames( - AggregatedTextFrame(text, aggregated_by), includes_inter_frame_spaces - ) - else: - async for aggregate in self._text_aggregator.aggregate(frame.text): - text = aggregate.text - aggregated_by = aggregate.type - logger.trace(f"Pushing TTS frames for text: {text}, {aggregated_by}") - await self._push_tts_frames( - AggregatedTextFrame(text, aggregated_by), includes_inter_frame_spaces - ) + async for aggregate in self._text_aggregator.aggregate(frame.text): + includes_inter_frame_spaces = ( + frame.includes_inter_frame_spaces + if aggregate.type == AggregationType.TOKEN + else False + ) + if aggregate.type != AggregationType.TOKEN: + # Stop the aggregation metric on the first sentence only. + await self.stop_text_aggregation_metrics() + await self._push_tts_frames( + AggregatedTextFrame(aggregate.text, aggregate.type), includes_inter_frame_spaces + ) async def _push_tts_frames( self, @@ -739,7 +807,15 @@ class TTSService(AIService): # or when we received an LLMFullResponseEndFrame self._processing_text = True - await self.start_processing_metrics() + # Accumulate text for a single debug log at flush time when streaming tokens. + if self._is_streaming_tokens: + self._streaming_text_log += text + + # Skip per-token processing metrics when streaming. The per-token + # processing time is just websocket send overhead (~0.1ms) and not + # meaningful. TTFB captures the important timing for streaming TTS. + if not self._is_streaming_tokens: + await self.start_processing_metrics() # Process all filters. for filter in self._text_filters: @@ -747,7 +823,8 @@ class TTSService(AIService): text = await filter.filter(text) if not text.strip(): - await self.stop_processing_metrics() + if not self._is_streaming_tokens: + await self.stop_processing_metrics() return # Create context ID and store metadata @@ -785,7 +862,8 @@ class TTSService(AIService): await self.process_generator(self.run_tts(prepared_text, context_id)) - await self.stop_processing_metrics() + if not self._is_streaming_tokens: + await self.stop_processing_metrics() if self._push_text_frames: # In TTS services that support word timestamps, the TTSTextFrames diff --git a/src/pipecat/utils/text/base_text_aggregator.py b/src/pipecat/utils/text/base_text_aggregator.py index 13691d9cd..2b050fcb7 100644 --- a/src/pipecat/utils/text/base_text_aggregator.py +++ b/src/pipecat/utils/text/base_text_aggregator.py @@ -21,6 +21,7 @@ class AggregationType(str, Enum): """Built-in aggregation strings.""" SENTENCE = "sentence" + TOKEN = "token" WORD = "word" def __str__(self): @@ -66,6 +67,25 @@ class BaseTextAggregator(ABC): logic, text manipulation behavior, and state management for interruptions. """ + def __init__(self, *, aggregation_type: AggregationType = AggregationType.SENTENCE): + """Initialize the base text aggregator. + + Args: + aggregation_type: The aggregation strategy to use. SENTENCE buffers + text until sentence boundaries are detected, TOKEN passes text + through immediately, and WORD buffers until word boundaries. + """ + self._aggregation_type = AggregationType(aggregation_type) + + @property + def aggregation_type(self) -> AggregationType: + """Get the aggregation type for this aggregator. + + Returns: + The aggregation type. + """ + return self._aggregation_type + @property @abstractmethod def text(self) -> Aggregation: diff --git a/src/pipecat/utils/text/pattern_pair_aggregator.py b/src/pipecat/utils/text/pattern_pair_aggregator.py index bfaf9291b..835bb8591 100644 --- a/src/pipecat/utils/text/pattern_pair_aggregator.py +++ b/src/pipecat/utils/text/pattern_pair_aggregator.py @@ -96,8 +96,11 @@ class PatternPairAggregator(SimpleTextAggregator): Creates an empty aggregator with no patterns or handlers registered. Text buffering and pattern detection will begin when text is aggregated. + + Args: + **kwargs: Additional arguments passed to SimpleTextAggregator (e.g. aggregation_type). """ - super().__init__() + super().__init__(**kwargs) self._patterns = {} self._handlers = {} self._last_processed_position = 0 # Track where we last checked for complete patterns @@ -146,7 +149,7 @@ class PatternPairAggregator(SimpleTextAggregator): Returns: Self for method chaining. """ - if type in [AggregationType.SENTENCE, AggregationType.WORD]: + if type in [AggregationType.SENTENCE, AggregationType.WORD, AggregationType.TOKEN]: raise ValueError( f"The aggregation type '{type}' is reserved for default behavior and can not be used for custom patterns." ) @@ -321,6 +324,9 @@ class PatternPairAggregator(SimpleTextAggregator): and uses the parent's lookahead logic for sentence detection when no patterns are active. + In TOKEN mode, pattern detection still works but non-pattern text is + yielded as TOKEN aggregations instead of waiting for sentence boundaries. + Args: text: Text to aggregate. @@ -370,18 +376,35 @@ class PatternPairAggregator(SimpleTextAggregator): # boundaries when a pattern begins (e.g., "Here is code ..." yields "Here is code") result = self._text[: pattern_start[0]] self._text = self._text[pattern_start[0] :] - yield PatternMatch( - content=result.strip(), type=AggregationType.SENTENCE, full_match=result + agg_type = ( + AggregationType.TOKEN + if self._aggregation_type == AggregationType.TOKEN + else AggregationType.SENTENCE ) + yield PatternMatch(content=result.strip(), type=agg_type, full_match=result) continue - # Use parent's lookahead logic for sentence detection - aggregation = await super()._check_sentence_with_lookahead(char) - if aggregation: - # Convert to PatternMatch for consistency with return type + if self._aggregation_type != AggregationType.TOKEN: + # Use parent's lookahead logic for sentence detection + aggregation = await super()._check_sentence_with_lookahead(char) + if aggregation: + # Convert to PatternMatch for consistency with return type + yield PatternMatch( + content=aggregation.text, + type=aggregation.type, + full_match=aggregation.text, + ) + + # In TOKEN mode, yield any accumulated text after processing all chars, + # but only if there's no incomplete pattern being buffered. + if self._aggregation_type == AggregationType.TOKEN and self._text: + if self._match_start_of_pattern(self._text) is None: yield PatternMatch( - content=aggregation.text, type=aggregation.type, full_match=aggregation.text + content=self._text, + type=AggregationType.TOKEN, + full_match=self._text, ) + self._text = "" async def handle_interruption(self): """Handle interruptions by clearing the buffer and pattern state. diff --git a/src/pipecat/utils/text/simple_text_aggregator.py b/src/pipecat/utils/text/simple_text_aggregator.py index b0cc698a9..b5b179fcf 100644 --- a/src/pipecat/utils/text/simple_text_aggregator.py +++ b/src/pipecat/utils/text/simple_text_aggregator.py @@ -25,11 +25,15 @@ class SimpleTextAggregator(BaseTextAggregator): most straightforward implementation of text aggregation for TTS processing. """ - def __init__(self): + def __init__(self, **kwargs): """Initialize the simple text aggregator. Creates an empty text buffer ready to begin accumulating text tokens. + + Args: + **kwargs: Additional arguments passed to BaseTextAggregator (e.g. aggregation_type). """ + super().__init__(**kwargs) self._text = "" self._needs_lookahead: bool = False @@ -43,19 +47,25 @@ class SimpleTextAggregator(BaseTextAggregator): return Aggregation(text=self._text.strip(" "), type=AggregationType.SENTENCE) async def aggregate(self, text: str) -> AsyncIterator[Aggregation]: - """Aggregate text and yield completed sentences. + """Aggregate text and yield completed aggregations. - Processes the input text character-by-character. When sentence-ending - punctuation is detected, it waits for non-whitespace lookahead before - calling NLTK. This prevents false positives like "$29." being detected - as a sentence when it's actually "$29.95". + In SENTENCE mode, processes the input text character-by-character. When + sentence-ending punctuation is detected, it waits for non-whitespace + lookahead before calling NLTK. + + In TOKEN mode, yields the text immediately without buffering. Args: text: Text to aggregate. Yields: - Complete sentences as Aggregation objects. + Aggregation objects (sentences in SENTENCE mode, tokens in TOKEN mode). """ + if self._aggregation_type == AggregationType.TOKEN: + if text: + yield Aggregation(text=text, type=AggregationType.TOKEN) + return + # Process text character by character for char in text: self._text += char @@ -114,11 +124,15 @@ class SimpleTextAggregator(BaseTextAggregator): """Flush any remaining text in the buffer. Returns any text remaining in the buffer. This is called at the end - of a stream to ensure all text is processed. + of a stream to ensure all text is processed. In TOKEN mode, returns + None since tokens are yielded immediately. Returns: - Any remaining text as a sentence, or None if buffer is empty. + Any remaining text as a sentence, or None if buffer is empty or in TOKEN mode. """ + if self._aggregation_type == AggregationType.TOKEN: + return None + if self._text: # Return whatever we have in the buffer result = self._text diff --git a/src/pipecat/utils/text/skip_tags_aggregator.py b/src/pipecat/utils/text/skip_tags_aggregator.py index 4232efd7d..1b6a7f156 100644 --- a/src/pipecat/utils/text/skip_tags_aggregator.py +++ b/src/pipecat/utils/text/skip_tags_aggregator.py @@ -14,7 +14,7 @@ as a unit regardless of internal punctuation. from typing import AsyncIterator, Optional, Sequence from pipecat.utils.string import StartEndTags, parse_start_end_tags -from pipecat.utils.text.base_text_aggregator import Aggregation +from pipecat.utils.text.base_text_aggregator import Aggregation, AggregationType from pipecat.utils.text.simple_text_aggregator import SimpleTextAggregator @@ -31,14 +31,15 @@ class SkipTagsAggregator(SimpleTextAggregator): identified and that content within tags is never split at sentence boundaries. """ - def __init__(self, tags: Sequence[StartEndTags]): + def __init__(self, tags: Sequence[StartEndTags], **kwargs): """Initialize the skip tags aggregator. Args: tags: Sequence of StartEndTags objects defining the tag pairs that should prevent sentence boundary detection. + **kwargs: Additional arguments passed to SimpleTextAggregator (e.g. aggregation_type). """ - super().__init__() + super().__init__(**kwargs) self._tags = tags self._current_tag: Optional[StartEndTags] = None self._current_tag_index: int = 0 @@ -50,13 +51,33 @@ class SkipTagsAggregator(SimpleTextAggregator): uses the parent's lookahead logic for sentence detection when not inside tags. + In TOKEN mode, text is passed through immediately unless we're inside + a tag, in which case we buffer until the closing tag is found. + Args: text: Text to aggregate. Yields: Aggregation objects containing text up to a sentence boundary, - marked as SENTENCE type. + marked as SENTENCE type (or TOKEN type in TOKEN mode). """ + if self._aggregation_type == AggregationType.TOKEN: + # In TOKEN mode, process chars for tag tracking but yield the + # full input as a single token when not inside a tag. + for char in text: + self._text += char + + # Update tag state + (self._current_tag, self._current_tag_index) = parse_start_end_tags( + self._text, self._tags, self._current_tag, self._current_tag_index + ) + + # After processing all chars: if not inside a tag, yield accumulated text + if not self._current_tag and self._text: + yield Aggregation(text=self._text, type=AggregationType.TOKEN) + self._text = "" + return + # Process text character by character for char in text: self._text += char diff --git a/tests/test_pattern_pair_aggregator.py b/tests/test_pattern_pair_aggregator.py index bcc8d18f7..6c9e23552 100644 --- a/tests/test_pattern_pair_aggregator.py +++ b/tests/test_pattern_pair_aggregator.py @@ -194,5 +194,66 @@ class TestPatternPairAggregator(unittest.IsolatedAsyncioTestCase): self.assertEqual(self.aggregator.text.text, "") +class TestPatternPairAggregatorTokenMode(unittest.IsolatedAsyncioTestCase): + def setUp(self): + from pipecat.utils.text.base_text_aggregator import AggregationType + + self.aggregator = PatternPairAggregator(aggregation_type=AggregationType.TOKEN) + self.handler = AsyncMock() + self.aggregator.add_pattern( + type="think", + start_pattern="", + end_pattern="", + action=MatchAction.REMOVE, + ) + self.aggregator.on_pattern_match("think", self.handler) + + async def test_token_no_patterns(self): + """Non-pattern text passes through as TOKEN, one per aggregate call.""" + results = [] + for token in ["Hello", " world", "."]: + async for r in self.aggregator.aggregate(token): + results.append(r) + + self.assertEqual(len(results), 3) + self.assertEqual(results[0].text, "Hello") + self.assertEqual(results[1].text, " world") + self.assertEqual(results[2].text, ".") + for r in results: + self.assertEqual(r.type, "token") + + async def test_token_pattern_detection(self): + """Pattern detection still works with word-by-word token delivery.""" + results = [] + for token in ["Hi ", "", "secret", "", " bye"]: + async for r in self.aggregator.aggregate(token): + results.append(r) + + # Handler called once when the pattern completes + self.handler.assert_called_once() + call_args = self.handler.call_args[0][0] + self.assertEqual(call_args.text, "secret") + + # "Hi " yields before pattern starts, pattern is removed, " bye" yields after + self.assertEqual(len(results), 2) + self.assertEqual(results[0].text, "Hi ") + self.assertEqual(results[0].type, "token") + self.assertEqual(results[1].text, " bye") + self.assertEqual(results[1].type, "token") + + async def test_token_incomplete_pattern_buffers(self): + """Incomplete pattern is buffered across calls, not leaked to output.""" + results = [] + for token in ["Hi ", "", "partial"]: + async for r in self.aggregator.aggregate(token): + results.append(r) + + # Only "Hi " should be yielded; "partial" stays buffered + self.assertEqual(len(results), 1) + self.assertEqual(results[0].text, "Hi ") + self.assertEqual(results[0].type, "token") + self.handler.assert_not_called() + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_simple_text_aggregator.py b/tests/test_simple_text_aggregator.py index 4b3613e27..46c77df42 100644 --- a/tests/test_simple_text_aggregator.py +++ b/tests/test_simple_text_aggregator.py @@ -181,5 +181,39 @@ class TestSimpleTextAggregator(unittest.IsolatedAsyncioTestCase): assert result.text == "こんにちは。" +class TestSimpleTextAggregatorTokenMode(unittest.IsolatedAsyncioTestCase): + def setUp(self): + from pipecat.utils.text.base_text_aggregator import AggregationType + + self.aggregator = SimpleTextAggregator(aggregation_type=AggregationType.TOKEN) + + async def test_token_passthrough(self): + """TOKEN mode yields text immediately without buffering.""" + results = [agg async for agg in self.aggregator.aggregate("Hello")] + assert len(results) == 1 + assert results[0].text == "Hello" + assert results[0].type == "token" + + async def test_token_multiple_calls(self): + """Each aggregate call yields its text independently.""" + r1 = [agg async for agg in self.aggregator.aggregate("Hello ")] + r2 = [agg async for agg in self.aggregator.aggregate("world.")] + assert len(r1) == 1 + assert r1[0].text == "Hello " + assert len(r2) == 1 + assert r2[0].text == "world." + + async def test_token_empty_text(self): + """Empty text yields nothing.""" + results = [agg async for agg in self.aggregator.aggregate("")] + assert len(results) == 0 + + async def test_token_flush_returns_none(self): + """Flush returns None in TOKEN mode since nothing is buffered.""" + await self.aggregator.aggregate("Hello").__anext__() + result = await self.aggregator.flush() + assert result is None + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_skip_tags_aggregator.py b/tests/test_skip_tags_aggregator.py index c7fea22c3..882b26e82 100644 --- a/tests/test_skip_tags_aggregator.py +++ b/tests/test_skip_tags_aggregator.py @@ -64,5 +64,60 @@ class TestSkipTagsAggregator(unittest.IsolatedAsyncioTestCase): self.assertEqual(self.aggregator.text.type, "sentence") +class TestSkipTagsAggregatorTokenMode(unittest.IsolatedAsyncioTestCase): + def setUp(self): + from pipecat.utils.text.base_text_aggregator import AggregationType + + self.aggregator = SkipTagsAggregator( + [("", "")], aggregation_type=AggregationType.TOKEN + ) + + async def test_token_no_tags(self): + """No tags: text passes through immediately as TOKEN.""" + results = [agg async for agg in self.aggregator.aggregate("Hello!")] + self.assertEqual(len(results), 1) + self.assertEqual(results[0].text, "Hello!") + self.assertEqual(results[0].type, "token") + + async def test_token_inside_tag_buffers(self): + """Inside a tag, text is buffered until the closing tag is found.""" + results = [agg async for agg in self.aggregator.aggregate("foo@bar")] + # Still inside tag, nothing yielded + self.assertEqual(len(results), 0) + + # Close the tag + results = [agg async for agg in self.aggregator.aggregate("")] + self.assertEqual(len(results), 1) + self.assertEqual(results[0].text, "foo@bar") + self.assertEqual(results[0].type, "token") + + async def test_token_flush_unclosed_tag(self): + """Flush with unclosed tag returns remaining text.""" + async for _ in self.aggregator.aggregate("unclosed"): + pass + result = await self.aggregator.flush() + # TOKEN mode flush returns None (parent behavior) + self.assertIsNone(result) + + async def test_token_text_around_tags(self): + """Simulate word-by-word token delivery with tags.""" + results = [] + # Simulate LLM streaming tokens one at a time + for token in ["Hi ", "", "X", "", " bye"]: + async for agg in self.aggregator.aggregate(token): + results.append(agg) + + self.assertEqual(len(results), 3) + # Text before tag passes through immediately + self.assertEqual(results[0].text, "Hi ") + self.assertEqual(results[0].type, "token") + # Tagged content is buffered until the closing tag, then yielded whole + self.assertEqual(results[1].text, "X") + self.assertEqual(results[1].type, "token") + # Text after tag passes through immediately + self.assertEqual(results[2].text, " bye") + self.assertEqual(results[2].type, "token") + + if __name__ == "__main__": unittest.main() From 3c20eda8bf95a8590d907ba72603a2c319fad473 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Thu, 26 Feb 2026 09:32:52 -0500 Subject: [PATCH 117/189] Keep model/language in LiveOptions at construction time so apply_update's bidirectional sync is sufficient; simplify _build_live_options to only add sample_rate --- src/pipecat/services/deepgram/stt.py | 11 +++-------- src/pipecat/services/deepgram/stt_sagemaker.py | 11 +++-------- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/src/pipecat/services/deepgram/stt.py b/src/pipecat/services/deepgram/stt.py index f1c2fd19c..b193f899d 100644 --- a/src/pipecat/services/deepgram/stt.py +++ b/src/pipecat/services/deepgram/stt.py @@ -262,10 +262,9 @@ class DeepgramSTTService(STTService): if "language" in merged_dict and isinstance(merged_dict["language"], Language): merged_dict["language"] = merged_dict["language"].value - # Extract model/language for top-level STTSettings fields; everything - # else lives inside LiveOptions. - model = merged_dict.pop("model", None) - language = merged_dict.pop("language", None) + # Sync model/language to top-level STTSettings fields + model = merged_dict.get("model") + language = merged_dict.get("language") settings = DeepgramSTTSettings( model=model, language=language, live_options=LiveOptions(**merged_dict) @@ -380,10 +379,6 @@ class DeepgramSTTService(STTService): A fully-populated ``LiveOptions`` ready for the Deepgram SDK. """ opts: dict[str, Any] = self._settings.live_options.to_dict() - - # Overlay model/language from top-level settings and sample_rate from service. - opts["model"] = self._settings.model - opts["language"] = self._settings.language opts["sample_rate"] = self.sample_rate return LiveOptions(**opts) diff --git a/src/pipecat/services/deepgram/stt_sagemaker.py b/src/pipecat/services/deepgram/stt_sagemaker.py index 12357a8cd..6f91906b7 100644 --- a/src/pipecat/services/deepgram/stt_sagemaker.py +++ b/src/pipecat/services/deepgram/stt_sagemaker.py @@ -247,10 +247,9 @@ class DeepgramSageMakerSTTService(STTService): if "language" in merged_dict and isinstance(merged_dict["language"], Language): merged_dict["language"] = merged_dict["language"].value - # Extract model/language for top-level STTSettings fields; everything - # else lives inside LiveOptions. - model = merged_dict.pop("model", None) - language = merged_dict.pop("language", None) + # Sync model/language to top-level STTSettings fields + model = merged_dict.get("model") + language = merged_dict.get("language") settings = DeepgramSageMakerSTTSettings( model=model, language=language, live_options=LiveOptions(**merged_dict) @@ -344,10 +343,6 @@ class DeepgramSageMakerSTTService(STTService): A fully-populated ``LiveOptions`` ready for the Deepgram SDK. """ opts: dict[str, Any] = self._settings.live_options.to_dict() - - # Overlay model/language from top-level settings and sample_rate from service. - opts["model"] = self._settings.model - opts["language"] = self._settings.language opts["sample_rate"] = self.sample_rate return LiveOptions(**opts) From c184ac09b8323571a4269182e3f9af9fe6d70c1b Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Thu, 26 Feb 2026 09:42:15 -0500 Subject: [PATCH 118/189] Inline `_build_live_options` into `_connect` in `DeepgramSTTService` and `DeepgramSageMakerSTTService` since it's trivial and only called from one place --- src/pipecat/services/deepgram/stt.py | 19 +++++-------------- .../services/deepgram/stt_sagemaker.py | 17 +++++------------ 2 files changed, 10 insertions(+), 26 deletions(-) diff --git a/src/pipecat/services/deepgram/stt.py b/src/pipecat/services/deepgram/stt.py index b193f899d..59107d0f8 100644 --- a/src/pipecat/services/deepgram/stt.py +++ b/src/pipecat/services/deepgram/stt.py @@ -372,17 +372,6 @@ class DeepgramSTTService(STTService): await self._connection.send(audio) yield None - def _build_live_options(self) -> LiveOptions: - """Build a ``LiveOptions`` from stored settings and sample rate. - - Returns: - A fully-populated ``LiveOptions`` ready for the Deepgram SDK. - """ - opts: dict[str, Any] = self._settings.live_options.to_dict() - opts["sample_rate"] = self.sample_rate - - return LiveOptions(**opts) - async def _connect(self): logger.debug("Connecting to Deepgram") @@ -403,9 +392,11 @@ class DeepgramSTTService(STTService): self._on_utterance_end, ) - if not await self._connection.start( - options=self._build_live_options(), addons=self._addons - ): + live_options = LiveOptions( + **{**self._settings.live_options.to_dict(), "sample_rate": self.sample_rate} + ) + + if not await self._connection.start(options=live_options, addons=self._addons): await self.push_error(error_msg=f"Unable to connect to Deepgram") else: headers = { diff --git a/src/pipecat/services/deepgram/stt_sagemaker.py b/src/pipecat/services/deepgram/stt_sagemaker.py index 6f91906b7..ee2121bec 100644 --- a/src/pipecat/services/deepgram/stt_sagemaker.py +++ b/src/pipecat/services/deepgram/stt_sagemaker.py @@ -336,17 +336,6 @@ class DeepgramSageMakerSTTService(STTService): yield ErrorFrame(error=f"Unknown error occurred: {e}") yield None - def _build_live_options(self) -> LiveOptions: - """Build a ``LiveOptions`` from stored settings and sample rate. - - Returns: - A fully-populated ``LiveOptions`` ready for the Deepgram SDK. - """ - opts: dict[str, Any] = self._settings.live_options.to_dict() - opts["sample_rate"] = self.sample_rate - - return LiveOptions(**opts) - async def _connect(self): """Connect to the SageMaker endpoint and start the BiDi session. @@ -356,9 +345,13 @@ class DeepgramSageMakerSTTService(STTService): """ logger.debug("Connecting to Deepgram on SageMaker...") + live_options = LiveOptions( + **{**self._settings.live_options.to_dict(), "sample_rate": self.sample_rate} + ) + # Build query string from live_options, converting booleans to strings query_params = {} - for key, value in self._build_live_options().to_dict().items(): + for key, value in live_options.to_dict().items(): if value is not None: # Convert boolean values to lowercase strings for Deepgram API if isinstance(value, bool): From 3ae173520ee739c6a3e3948b7a16fedbab1d11be Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Thu, 26 Feb 2026 08:59:38 -0500 Subject: [PATCH 119/189] Code review feedback --- examples/foundational/07-interruptible.py | 4 +- src/pipecat/processors/frame_processor.py | 1 + .../metrics/frame_processor_metrics.py | 6 +- src/pipecat/services/cartesia/tts.py | 12 ++-- src/pipecat/services/elevenlabs/tts.py | 7 ++- src/pipecat/services/tts_service.py | 57 +++++++++++++------ 6 files changed, 57 insertions(+), 30 deletions(-) diff --git a/examples/foundational/07-interruptible.py b/examples/foundational/07-interruptible.py index e47d2c811..074e091ea 100644 --- a/examples/foundational/07-interruptible.py +++ b/examples/foundational/07-interruptible.py @@ -57,7 +57,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): tts = CartesiaTTSService( api_key=os.getenv("CARTESIA_API_KEY"), voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady - text_aggregation_mode=TextAggregationMode.TOKEN, + # Alternatively, you can use TextAggregationMode.TOKEN to stream tokens instead of + # sentencesfor faster response times. + # text_aggregation_mode=TextAggregationMode.TOKEN, ) llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) diff --git a/src/pipecat/processors/frame_processor.py b/src/pipecat/processors/frame_processor.py index 37e8dc10d..baa52cc70 100644 --- a/src/pipecat/processors/frame_processor.py +++ b/src/pipecat/processors/frame_processor.py @@ -501,6 +501,7 @@ class FrameProcessor(BaseObject): """Stop all active metrics collection.""" await self.stop_ttfb_metrics() await self.stop_processing_metrics() + await self.stop_text_aggregation_metrics() def create_task(self, coroutine: Coroutine, name: Optional[str] = None) -> asyncio.Task: """Create a new task managed by this processor. diff --git a/src/pipecat/processors/metrics/frame_processor_metrics.py b/src/pipecat/processors/metrics/frame_processor_metrics.py index cb5bc8a42..7a52895a2 100644 --- a/src/pipecat/processors/metrics/frame_processor_metrics.py +++ b/src/pipecat/processors/metrics/frame_processor_metrics.py @@ -44,6 +44,7 @@ class FrameProcessorMetrics(BaseObject): self._task_manager = None self._start_ttfb_time = 0 self._start_processing_time = 0 + self._start_text_aggregation_time = 0 self._last_ttfb_time = 0 self._should_report_ttfb = True @@ -223,10 +224,7 @@ class FrameProcessorMetrics(BaseObject): Returns: MetricsFrame containing text aggregation time, or None if not measuring. """ - if ( - not hasattr(self, "_start_text_aggregation_time") - or self._start_text_aggregation_time == 0 - ): + if self._start_text_aggregation_time == 0: return None value = time.time() - self._start_text_aggregation_time diff --git a/src/pipecat/services/cartesia/tts.py b/src/pipecat/services/cartesia/tts.py index 0749af062..2e637c339 100644 --- a/src/pipecat/services/cartesia/tts.py +++ b/src/pipecat/services/cartesia/tts.py @@ -303,9 +303,11 @@ class CartesiaTTSService(AudioContextTTSService): """ # By default, we aggregate sentences before sending to TTS. This adds # ~200-300ms of latency per sentence (waiting for the sentence-ending - # punctuation token from the LLM). Setting aggregate_sentences=False - # streams tokens directly, which reduces latency. Streaming quality - # is good but less tested than sentence aggregation. + # punctuation token from the LLM). Setting + # text_aggregation_mode=TextAggregationMode.TOKEN streams tokens + # directly, which reduces latency. Streaming quality is good but less + # tested than sentence aggregation. + # TODO: Consider making TOKEN the default for Cartesia in 1.0. # # We also don't want to automatically push LLM response text frames, # because the context aggregators will add them to the LLM context even @@ -667,9 +669,7 @@ class CartesiaTTSService(AudioContextTTSService): try: await self._get_websocket().send(msg) - # Usage metrics are aggregated at flush time when streaming tokens. - if not self._is_streaming_tokens: - await self.start_tts_usage_metrics(text) + await self.start_tts_usage_metrics(text) except Exception as e: yield ErrorFrame(error=f"Unknown error occurred: {e}") yield TTSStoppedFrame(context_id=context_id) diff --git a/src/pipecat/services/elevenlabs/tts.py b/src/pipecat/services/elevenlabs/tts.py index dcfdebc2f..1811ed971 100644 --- a/src/pipecat/services/elevenlabs/tts.py +++ b/src/pipecat/services/elevenlabs/tts.py @@ -389,9 +389,10 @@ class ElevenLabsTTSService(AudioContextTTSService): """ # By default, we aggregate sentences before sending to TTS. This adds # ~200-300ms of latency per sentence (waiting for the sentence-ending - # punctuation token from the LLM). Setting aggregate_sentences=False - # streams tokens directly. To use this mode, you must set auto_mode=False. - # This eliminates aggregation time, but slows down ElevenLabs. + # punctuation token from the LLM). Setting + # text_aggregation_mode=TextAggregationMode.TOKEN streams tokens + # directly. To use this mode, you must set auto_mode=False. This + # eliminates aggregation time, but slows down ElevenLabs. # # We also don't want to automatically push LLM response text frames, # because the context aggregators will add them to the LLM context even diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index 8c61f225d..c6d2672d6 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -247,8 +247,6 @@ class TTSService(AIService): text_aggregation_mode = TextAggregationMode.SENTENCE self._text_aggregation_mode: TextAggregationMode = text_aggregation_mode - # Keep for backward compat with subclasses that read self._aggregate_sentences - self._aggregate_sentences: bool = text_aggregation_mode != TextAggregationMode.TOKEN self._push_text_frames: bool = push_text_frames self._push_stop_frames: bool = push_stop_frames self._stop_frame_timeout_s: float = stop_frame_timeout_s @@ -296,8 +294,8 @@ class TTSService(AIService): self._processing_text: bool = False self._tts_contexts: Dict[str, TTSContext] = {} - self._streaming_text_log: str = "" - self._aggregation_logged: bool = False + self._streamed_text: str = "" + self._text_aggregation_metrics_started: bool = False # Word timestamp state (active when supports_word_timestamps=True) self._supports_word_timestamps: bool = supports_word_timestamps @@ -316,6 +314,35 @@ class TTSService(AIService): """Whether the service is streaming tokens directly without sentence aggregation.""" return self._text_aggregation_mode == TextAggregationMode.TOKEN + async def start_tts_usage_metrics(self, text: str): + """Record TTS usage metrics. + + When streaming tokens, usage metrics are aggregated and reported at + flush time instead of per token, so individual calls are skipped. + + Args: + text: The text being processed by TTS. + """ + if self._is_streaming_tokens: + return + await super().start_tts_usage_metrics(text) + + async def start_text_aggregation_metrics(self): + """Start text aggregation metrics if not already started. + + Only starts the metric once per LLM response. Skipped when streaming + tokens since per-token aggregation time is not meaningful. + """ + if self._is_streaming_tokens or self._text_aggregation_metrics_started: + return + self._text_aggregation_metrics_started = True + await super().start_text_aggregation_metrics() + + async def stop_text_aggregation_metrics(self): + """Stop text aggregation metrics and reset the started flag.""" + self._text_aggregation_metrics_started = False + await super().stop_text_aggregation_metrics() + @property def sample_rate(self) -> int: """Get the current sample rate for audio output. @@ -574,9 +601,7 @@ class TTSService(AIService): and not isinstance(frame, InterimTranscriptionFrame) and not isinstance(frame, TranscriptionFrame) ): - if not self._is_streaming_tokens and not self._aggregation_logged: - await self.start_text_aggregation_metrics() - self._aggregation_logged = True + await self.start_text_aggregation_metrics() await self._process_text_frame(frame) elif isinstance(frame, InterruptionFrame): await self._handle_interruption(frame, direction) @@ -592,18 +617,16 @@ class TTSService(AIService): # Flush any remaining text (including text waiting for lookahead) remaining = await self._text_aggregator.flush() + # Stop the aggregation metric (no-op if already stopped on first sentence). + await self.stop_text_aggregation_metrics() if remaining: - # If this is the first (and only) sentence, stop the aggregation metric. - await self.stop_text_aggregation_metrics() await self._push_tts_frames(AggregatedTextFrame(remaining.text, remaining.type)) - self._aggregation_logged = False - # Log accumulated streamed text and emit aggregated usage metric. - if self._streaming_text_log: - logger.debug(f"{self}: Generating TTS [{self._streaming_text_log}]") - await self.start_tts_usage_metrics(self._streaming_text_log) - self._streaming_text_log = "" + if self._streamed_text: + logger.debug(f"{self}: Generating TTS [{self._streamed_text}]") + await super().start_tts_usage_metrics(self._streamed_text) + self._streamed_text = "" # Reset aggregator state self._processing_text = False @@ -754,6 +777,8 @@ class TTSService(AIService): await filter.handle_interruption() self._llm_response_started = False + self._streamed_text = "" + self._text_aggregation_metrics_started = False if self._supports_word_timestamps: await self.reset_word_timestamps() @@ -809,7 +834,7 @@ class TTSService(AIService): # Accumulate text for a single debug log at flush time when streaming tokens. if self._is_streaming_tokens: - self._streaming_text_log += text + self._streamed_text += text # Skip per-token processing metrics when streaming. The per-token # processing time is just websocket send overhead (~0.1ms) and not From 907ff58d41ce77da6b6b9270fc2969f1d6106edb Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Mon, 23 Feb 2026 17:11:08 -0500 Subject: [PATCH 120/189] Align Ultravox Realtime service with OpenAI/Gemini patterns - Add InterruptionFrame handling with stop_all_metrics() - Add processing metrics (start/stop) at response boundaries - Fix agent transcript handling for voice and text modalities: - Voice mode: push LLMTextFrame (append_to_context=False) and TTSTextFrame for deltas, skip duplicated final text - Text mode: push LLMTextFrame with proper response lifecycle, no TTSTextFrame (downstream TTS handles audio) - Add output_medium parameter to AgentInputParams and OneShotInputParams - Improve TTFB measurement using VAD speech end time - Update example with user turn strategies and transcript events - Add text-only output example (50a-ultravox-realtime-text.py) --- changelog/3806.added.md | 1 + changelog/3806.changed.2.md | 1 + changelog/3806.changed.md | 1 + examples/foundational/50-ultravox-realtime.py | 42 ++- .../50a-ultravox-realtime-text.py | 263 ++++++++++++++++++ src/pipecat/services/ultravox/llm.py | 102 +++++-- 6 files changed, 384 insertions(+), 26 deletions(-) create mode 100644 changelog/3806.added.md create mode 100644 changelog/3806.changed.2.md create mode 100644 changelog/3806.changed.md create mode 100644 examples/foundational/50a-ultravox-realtime-text.py diff --git a/changelog/3806.added.md b/changelog/3806.added.md new file mode 100644 index 000000000..eeddc9825 --- /dev/null +++ b/changelog/3806.added.md @@ -0,0 +1 @@ +- Added `output_medium` parameter to `AgentInputParams` and `OneShotInputParams` in Ultravox service to control initial output medium (text or voice) at call creation time. diff --git a/changelog/3806.changed.2.md b/changelog/3806.changed.2.md new file mode 100644 index 000000000..9d6dfdf76 --- /dev/null +++ b/changelog/3806.changed.2.md @@ -0,0 +1 @@ +- Improved Ultravox TTFB measurement accuracy by using VAD speech end time instead of `UserStoppedSpeakingFrame` timing. diff --git a/changelog/3806.changed.md b/changelog/3806.changed.md new file mode 100644 index 000000000..c8e2fb68c --- /dev/null +++ b/changelog/3806.changed.md @@ -0,0 +1 @@ +- Aligned `UltravoxRealtimeLLMService` frame handling with OpenAI/Gemini realtime services: added `InterruptionFrame` handling with metrics cleanup, processing metrics at response boundaries, and improved agent transcript handling for both voice and text output modalities. diff --git a/examples/foundational/50-ultravox-realtime.py b/examples/foundational/50-ultravox-realtime.py index 5038cbb4c..0908c518c 100644 --- a/examples/foundational/50-ultravox-realtime.py +++ b/examples/foundational/50-ultravox-realtime.py @@ -12,11 +12,18 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.audio.vad.vad_analyzer import VADParams from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair +from pipecat.processors.aggregators.llm_response_universal import ( + AssistantTurnStoppedMessage, + LLMContextAggregatorPair, + LLMUserAggregatorParams, + UserTurnStoppedMessage, +) from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.llm_service import FunctionCallParams @@ -24,6 +31,8 @@ from pipecat.services.ultravox.llm import OneShotInputParams, UltravoxRealtimeLL from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.user_stop import SpeechTimeoutUserTurnStopStrategy +from pipecat.turns.user_turn_strategies import UserTurnStrategies # Load environment variables load_dotenv(override=True) @@ -168,8 +177,21 @@ There is also a secret menu that changes daily. If the user asks about it, use t llm.register_function("get_secret_menu", get_secret_menu) - # Necessary to complete the function call lifecycle in Pipecat. - user_aggregator, assistant_aggregator = LLMContextAggregatorPair(LLMContext([])) + context = LLMContext([]) + + # Necessary to complete the function call lifecycle in Pipecat and + # to produce user and assistant turn stopped events. + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams( + user_turn_strategies=UserTurnStrategies( + stop=[SpeechTimeoutUserTurnStopStrategy()], + ), + # Set the VAD analyzer to create reliable TTFB measurements and + # user stop events. + vad_analyzer=SileroVADAnalyzer(), + ), + ) # Build the pipeline pipeline = Pipeline( @@ -177,8 +199,8 @@ There is also a secret menu that changes daily. If the user asks about it, use t transport.input(), user_aggregator, llm, - assistant_aggregator, transport.output(), + assistant_aggregator, ] ) @@ -203,6 +225,18 @@ There is also a secret menu that changes daily. If the user asks about it, use t logger.info(f"Client disconnected") await task.cancel() + @user_aggregator.event_handler("on_user_turn_stopped") + async def on_user_turn_stopped(aggregator, strategy, message: UserTurnStoppedMessage): + timestamp = f"[{message.timestamp}] " if message.timestamp else "" + line = f"{timestamp}user: {message.content}" + logger.info(f"Transcript: {line}") + + @assistant_aggregator.event_handler("on_assistant_turn_stopped") + async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage): + timestamp = f"[{message.timestamp}] " if message.timestamp else "" + line = f"{timestamp}assistant: {message.content}" + logger.info(f"Transcript: {line}") + # Run the pipeline runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) await runner.run(task) diff --git a/examples/foundational/50a-ultravox-realtime-text.py b/examples/foundational/50a-ultravox-realtime-text.py new file mode 100644 index 000000000..8b876048a --- /dev/null +++ b/examples/foundational/50a-ultravox-realtime-text.py @@ -0,0 +1,263 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import datetime +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.adapters.schemas.function_schema import FunctionSchema +from pipecat.adapters.schemas.tools_schema import ToolsSchema +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.audio.vad.vad_analyzer import VADParams +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + AssistantTurnStoppedMessage, + LLMContextAggregatorPair, + LLMUserAggregatorParams, + UserTurnStoppedMessage, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.inworld.tts import InworldTTSService +from pipecat.services.llm_service import FunctionCallParams +from pipecat.services.ultravox.llm import OneShotInputParams, UltravoxRealtimeLLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.user_stop import SpeechTimeoutUserTurnStopStrategy +from pipecat.turns.user_turn_strategies import UserTurnStrategies + +# Load environment variables +load_dotenv(override=True) + + +# We use lambdas to defer transport parameter creation until the transport +# type is selected at runtime. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def get_secret_menu(params: FunctionCallParams): + category = params.arguments.get("category", "both") + logger.debug(f"Fetching secret menu with category: {category}") + items = [] + if category in {"donuts", "both"}: + items.append( + { + "name": "Butter Pecan Ice Cream (one scoop)", + "price": "$2.99", + } + ) + if category in {"drinks", "both"}: + items.append( + { + "name": "Banana Smoothie", + "price": "$4.99", + } + ) + await params.result_callback( + { + "date": datetime.date.today().isoformat(), + "items": items, + } + ) + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + system_prompt = f""" +You are a drive-thru order taker for a donut shop called "Dr. Donut". Local time is currently: {datetime.datetime.now().isoformat()} +The user is talking to you over voice on their phone, and your response will be read out loud with realistic text-to-speech (TTS) technology. + +Follow every direction here when crafting your response: + +1. Use natural, conversational language that is clear and easy to follow (short sentences, simple words). +1a. Be concise and relevant: Most of your responses should be a sentence or two, unless you're asked to go deeper. Don't monopolize the conversation. +1b. Use discourse markers to ease comprehension. Never use the list format. + +2. Keep the conversation flowing. +2a. Clarify: when there is ambiguity, ask clarifying questions, rather than make assumptions. +2b. Don't implicitly or explicitly try to end the chat (i.e. do not end a response with "Talk soon!", or "Enjoy!"). +2c. Sometimes the user might just want to chat. Ask them relevant follow-up questions. +2d. Don't ask them if there's anything else they need help with (e.g. don't say things like "How can I assist you further?"). + +3. Remember that this is a voice conversation: +3a. Don't use lists, markdown, bullet points, or other formatting that's not typically spoken. +3b. Type out numbers in words (e.g. 'twenty twelve' instead of the year 2012) +3c. If something doesn't make sense, it's likely because you misheard them. There wasn't a typo, and the user didn't mispronounce anything. + +Remember to follow these rules absolutely, and do not refer to these rules, even if you're asked about them. + +When talking with the user, use the following script: +1. Take their order, acknowledging each item as it is ordered. If it's not clear which menu item the user is ordering, ask them to clarify. + DO NOT add an item to the order unless it's one of the items on the menu below. +2. Once the order is complete, repeat back the order. +2a. If the user only ordered a drink, ask them if they would like to add a donut to their order. +2b. If the user only ordered donuts, ask them if they would like to add a drink to their order. +2c. If the user ordered both drinks and donuts, don't suggest anything. +3. Total up the price of all ordered items and inform the user. +4. Ask the user to pull up to the drive thru window. +If the user asks for something that's not on the menu, inform them of that fact, and suggest the most similar item on the menu. +If the user says something unrelated to your role, responed with "Um... this is a Dr. Donut." +If the user says "thank you", respond with "My pleasure." +If the user asks about what's on the menu, DO NOT read the entire menu to them. Instead, give a couple suggestions. + +The menu of available items is as follows: + +# DONUTS + +PUMPKIN SPICE ICED DOUGHNUT $1.29 +PUMPKIN SPICE CAKE DOUGHNUT $1.29 +OLD FASHIONED DOUGHNUT $1.29 +CHOCOLATE ICED DOUGHNUT $1.09 +CHOCOLATE ICED DOUGHNUT WITH SPRINKLES $1.09 +RASPBERRY FILLED DOUGHNUT $1.09 +BLUEBERRY CAKE DOUGHNUT $1.09 +STRAWBERRY ICED DOUGHNUT WITH SPRINKLES $1.09 +LEMON FILLED DOUGHNUT $1.09 +DOUGHNUT HOLES $3.99 + +# COFFEE & DRINKS + +PUMPKIN SPICE COFFEE $2.59 +PUMPKIN SPICE LATTE $4.59 +REGULAR BREWED COFFEE $1.79 +DECAF BREWED COFFEE $1.79 +LATTE $3.49 +CAPPUCINO $3.49 +CARAMEL MACCHIATO $3.49 +MOCHA LATTE $3.49 +CARAMEL MOCHA LATTE $3.49 + +There is also a secret menu that changes daily. If the user asks about it, use the get_secret_menu tool to look up today's secret menu items. +""" + + secret_menu_function = FunctionSchema( + name="get_secret_menu", + description="Get today's secret menu items", + properties={ + "category": { + "type": "string", + "enum": ["donuts", "drinks", "both"], + "description": "The category of secret menu items to retrieve. Defaults to both.", + }, + }, + required=[], + ) + + llm = UltravoxRealtimeLLMService( + params=OneShotInputParams( + api_key=os.getenv("ULTRAVOX_API_KEY"), + system_prompt=system_prompt, + temperature=0.3, + max_duration=datetime.timedelta(minutes=3), + output_medium="text", + ), + one_shot_selected_tools=ToolsSchema(standard_tools=[secret_menu_function]), + ) + + llm.register_function("get_secret_menu", get_secret_menu) + + tts = InworldTTSService( + api_key=os.getenv("INWORLD_API_KEY", ""), + voice_id="Ashley", + model="inworld-tts-1", + temperature=1.1, + ) + + context = LLMContext([]) + + # Necessary to complete the function call lifecycle in Pipecat and + # to produce user and assistant turn stopped events. + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams( + user_turn_strategies=UserTurnStrategies( + stop=[SpeechTimeoutUserTurnStopStrategy()], + ), + # Set the VAD analyzer to emulate timing of the model. + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), + ), + ) + + # Build the pipeline + pipeline = Pipeline( + [ + transport.input(), + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + # Configure the pipeline task + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + # Handle client connection event + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + + # Handle client disconnection events + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + @user_aggregator.event_handler("on_user_turn_stopped") + async def on_user_turn_stopped(aggregator, strategy, message: UserTurnStoppedMessage): + timestamp = f"[{message.timestamp}] " if message.timestamp else "" + line = f"{timestamp}user: {message.content}" + logger.info(f"Transcript: {line}") + + @assistant_aggregator.event_handler("on_assistant_turn_stopped") + async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage): + timestamp = f"[{message.timestamp}] " if message.timestamp else "" + line = f"{timestamp}assistant: {message.content}" + logger.info(f"Transcript: {line}") + + # Run the pipeline + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/src/pipecat/services/ultravox/llm.py b/src/pipecat/services/ultravox/llm.py index d14c3b9ca..07c3c34fe 100644 --- a/src/pipecat/services/ultravox/llm.py +++ b/src/pipecat/services/ultravox/llm.py @@ -31,6 +31,7 @@ from pipecat.frames.frames import ( Frame, InputAudioRawFrame, InputTextRawFrame, + InterruptionFrame, LLMContextFrame, LLMFullResponseEndFrame, LLMFullResponseStartFrame, @@ -42,7 +43,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, TTSTextFrame, UserAudioRawFrame, - UserStoppedSpeakingFrame, + VADUserStoppedSpeakingFrame, ) from pipecat.processors.aggregators.llm_context import LLMContext from pipecat.processors.aggregators.llm_response import ( @@ -90,6 +91,9 @@ class AgentInputParams(BaseModel): template_context: Context variables to use when instantiating a call with the agent. Defaults to an empty dict. metadata: Metadata to attach to the call. Default to an empty dict. + output_medium: The initial output medium for the agent. Use "text" for text + responses or "voice" for audio responses. Defaults to None, which uses the + agent's default. max_duration: The maximum duration of the call. Defaults to None, which will use the agent's default maximum duration. extra: Extra parameters to include in the agent call creation request. Defaults @@ -101,6 +105,7 @@ class AgentInputParams(BaseModel): agent_id: uuid.UUID template_context: Dict[str, Any] = Field(default_factory=dict) metadata: Dict[str, str] = Field(default_factory=dict) + output_medium: Optional[Literal["text", "voice"]] = None max_duration: Optional[datetime.timedelta] = Field( default=None, ge=datetime.timedelta(seconds=10), le=datetime.timedelta(hours=1) ) @@ -117,6 +122,8 @@ class OneShotInputParams(BaseModel): model: Model identifier to use. Defaults to "fixie-ai/ultravox". voice: Voice identifier for speech generation. Defaults to None. metadata: Metadata to attach to the call. Default to an empty dict. + output_medium: The initial output medium for the agent. Use "text" for text + responses or "voice" for audio responses. Defaults to None (voice). max_duration: The maximum duration of the call. Defaults to one hour. extra: Extra parameters to include in the call creation request. Defaults to an empty dict. See the Ultravox API documentation for valid arguments: @@ -129,6 +136,7 @@ class OneShotInputParams(BaseModel): model: Optional[str] = None voice: Optional[uuid.UUID] = None metadata: Dict[str, str] = Field(default_factory=dict) + output_medium: Optional[Literal["text", "voice"]] = None max_duration: datetime.timedelta = Field( default=datetime.timedelta(hours=1), ge=datetime.timedelta(seconds=10), @@ -210,6 +218,14 @@ class UltravoxRealtimeLLMService(LLMService): self._sample_rate = 48000 self._resampler = create_stream_resampler() + def can_generate_metrics(self) -> bool: + """Check if the service can generate usage metrics. + + Returns: + True if metrics generation is supported. + """ + return True + # # standard AIService frame handling # @@ -237,6 +253,14 @@ class UltravoxRealtimeLLMService(LLMService): except Exception as e: await self.push_error("Failed to connect to Ultravox", e, fatal=True) + @staticmethod + def _output_medium_to_api(medium: Optional[Literal["text", "voice"]]) -> Optional[str]: + if medium == "text": + return "MESSAGE_MEDIUM_TEXT" + elif medium == "voice": + return "MESSAGE_MEDIUM_VOICE" + return None + async def _start_agent_call(self, params: AgentInputParams) -> str: request_body = { "templateContext": params.template_context, @@ -247,6 +271,9 @@ class UltravoxRealtimeLLMService(LLMService): } }, } + initial_output_medium = self._output_medium_to_api(params.output_medium) + if initial_output_medium: + request_body["initialOutputMedium"] = initial_output_medium if params.max_duration: request_body["maxDuration"] = f"{params.max_duration.total_seconds():3f}s" request_body = request_body | params.extra @@ -277,7 +304,11 @@ class UltravoxRealtimeLLMService(LLMService): "inputSampleRate": self._sample_rate, } }, - } | params.extra + } + initial_output_medium = self._output_medium_to_api(params.output_medium) + if initial_output_medium: + request_body["initialOutputMedium"] = initial_output_medium + request_body = request_body | params.extra async with aiohttp.ClientSession() as session: async with session.post( "https://api.ultravox.ai/api/calls", @@ -367,18 +398,17 @@ class UltravoxRealtimeLLMService(LLMService): else LLMContext.from_openai_context(frame.context) ) await self._handle_context(context) + elif isinstance(frame, InterruptionFrame): + await self.stop_all_metrics() + await self.push_frame(frame, direction) elif isinstance(frame, InputTextRawFrame): await self._send_user_text(frame.text) await self.push_frame(frame, direction) elif isinstance(frame, InputAudioRawFrame): await self._send_user_audio(frame) await self.push_frame(frame, direction) - elif isinstance(frame, UserStoppedSpeakingFrame): - # This may or may not align with Ultravox's end of user speech detection, - # which relies on a more complex endpointing model. In particular it will - # yield a seemingly very slow TTFB in the case of endpointing false - # negatives. It will be close in the majority of cases though. - await self.start_ttfb_metrics() + elif isinstance(frame, VADUserStoppedSpeakingFrame): + await self._handle_vad_user_stopped_speaking(frame) await self.push_frame(frame, direction) else: await self.push_frame(frame, direction) @@ -399,6 +429,25 @@ class UltravoxRealtimeLLMService(LLMService): } await self._send(socket_message) + async def _handle_vad_user_stopped_speaking(self, frame: VADUserStoppedSpeakingFrame): + """Handle VAD user stopped speaking frame. + + Calculates the actual speech end time and starts a timeout task to wait + for the final transcription before reporting TTFB. + + Args: + frame: The VAD user stopped speaking frame. + """ + # Skip TTFB measurement if stop_secs is not set + if frame.stop_secs == 0.0: + return + + # Calculate the actual speech end time (current time minus VAD stop delay). + # This approximates when the last user audio was sent to the Ultravox service, + # which we use to measure against the eventual transcription response. + speech_end_time = frame.timestamp - frame.stop_secs + await self.start_ttfb_metrics(start_time=speech_end_time) + async def _send_user_audio(self, frame: InputAudioRawFrame): """Send user audio frame to Ultravox Realtime.""" if not self._socket: @@ -502,6 +551,7 @@ class UltravoxRealtimeLLMService(LLMService): if not audio: return if not self._bot_responding: + await self.start_processing_metrics() await self.stop_ttfb_metrics() await self.push_frame(LLMFullResponseStartFrame()) await self.push_frame(TTSStartedFrame()) @@ -509,6 +559,7 @@ class UltravoxRealtimeLLMService(LLMService): await self.push_frame(TTSAudioRawFrame(audio, self._sample_rate, 1)) async def _handle_response_end(self): + await self.stop_processing_metrics() if self._bot_responding == "voice": await self.push_frame(TTSStoppedFrame()) await self.push_frame(LLMFullResponseEndFrame()) @@ -542,22 +593,29 @@ class UltravoxRealtimeLLMService(LLMService): async def _handle_agent_transcript( self, medium: str, text: Optional[str], delta: Optional[str], final: bool ): - if text or delta: - frame = LLMTextFrame(text=text or delta) - frame.skip_tts = medium == "voice" - await self.push_frame(frame) - if medium == "text": - if text: - await self.stop_ttfb_metrics() - await self.push_frame(LLMFullResponseStartFrame()) - await self.push_frame(TTSStartedFrame()) - await self.push_frame(TTSTextFrame(text=text, aggregated_by=AggregationType.WORD)) - self._bot_responding = "text" - elif final: + if medium == "voice": + # In voice mode, audio is handled by _handle_audio(). Here we push + # text transcripts of the audio for downstream consumers. + if (text or delta) and not final: + frame = LLMTextFrame(text=text or delta) + frame.append_to_context = False + await self.push_frame(frame) + if delta: + tts_frame = TTSTextFrame(text=delta, aggregated_by=AggregationType.WORD) + tts_frame.includes_inter_frame_spaces = True + await self.push_frame(tts_frame) + elif medium == "text": + if final: + await self.stop_processing_metrics() await self.push_frame(LLMFullResponseEndFrame()) self._bot_responding = None - elif delta: - await self.push_frame(TTSTextFrame(text=delta, aggregated_by=AggregationType.WORD)) + elif text or delta: + if not self._bot_responding: + await self.start_processing_metrics() + await self.stop_ttfb_metrics() + await self.push_frame(LLMFullResponseStartFrame()) + self._bot_responding = "text" + await self.push_frame(LLMTextFrame(text=text or delta)) def create_context_aggregator( self, From faed775d9075064af3804be89f94322c36af2c8e Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Thu, 26 Feb 2026 11:02:44 -0500 Subject: [PATCH 121/189] Extract `_DeepgramSTTSettingsBase` with shared `_merge_live_options_delta` to deduplicate LiveOptions merge logic between `__init__` and `apply_update`, and between the Deepgram STT and SageMaker variants; make top-level model/language take precedence over conflicting live_options values in updates; remove unnecessary Language enum-to-string conversion (Language is a StrEnum) --- src/pipecat/services/deepgram/stt.py | 131 ++++++++++------ .../services/deepgram/stt_sagemaker.py | 145 ++---------------- tests/test_settings.py | 72 +++++---- 3 files changed, 134 insertions(+), 214 deletions(-) diff --git a/src/pipecat/services/deepgram/stt.py b/src/pipecat/services/deepgram/stt.py index 59107d0f8..497d6aae1 100644 --- a/src/pipecat/services/deepgram/stt.py +++ b/src/pipecat/services/deepgram/stt.py @@ -49,19 +49,20 @@ except ModuleNotFoundError as e: @dataclass -class DeepgramSTTSettings(STTSettings): - """Settings for the Deepgram STT service. +class _DeepgramSTTSettingsBase(STTSettings): + """Base settings for Deepgram STT services that use ``LiveOptions``. + + Shared by ``DeepgramSTTSettings`` and ``DeepgramSageMakerSTTSettings``. + Not intended for other Deepgram services that don't use ``LiveOptions``. Wraps the Deepgram SDK's ``LiveOptions`` in a single ``live_options`` - field. All Deepgram-specific options (``filler_words``, ``diarize``, - ``utterance_end_ms``, etc.) should be passed directly via - ``LiveOptions``. + field and provides delta-merge semantics: when used as a delta (e.g. + via ``STTUpdateSettingsFrame``), only the non-None fields of + ``live_options`` are merged into the stored options rather than + replacing them wholesale. - In **delta mode** (i.e. when carried by ``STTUpdateSettingsFrame``), - ``live_options`` is treated as a **delta** — its non-None fields are - merged into the stored ``LiveOptions``, not replaced wholesale. For - example, ``DeepgramSTTSettings(live_options=LiveOptions(punctuate=False))`` - changes only ``punctuate`` and leaves all other options intact. + ``model`` and ``language`` are kept in sync bidirectionally between + the top-level settings fields and the nested ``live_options``. Parameters: live_options: Deepgram ``LiveOptions`` for STT configuration. @@ -83,12 +84,56 @@ class DeepgramSTTSettings(STTSettings): } return cls._live_options_params + def _merge_live_options_delta(self, delta: LiveOptions) -> Dict[str, Any]: + """Merge a ``LiveOptions`` delta into the stored ``live_options``. + + Non-None fields from *delta* overwrite corresponding fields in the + stored ``LiveOptions``. ``model`` and ``language`` are synced to + the top-level settings fields when they change. + + Args: + delta: A ``LiveOptions`` whose non-None fields are the desired + overrides. + + Returns: + Dict mapping each changed key to its **previous** value (same + contract as ``apply_update``). + """ + old_dict = self.live_options.to_dict() # type: ignore[union-attr] + delta_dict = delta.to_dict() + + # Deepgram SDK bug: model initialised to the *string* "None". + if delta_dict.get("model") == "None": + del delta_dict["model"] + + if not delta_dict: + return {} + + merged = {**old_dict, **delta_dict} + self.live_options = LiveOptions(**merged) + + # Track what changed. + changed: Dict[str, Any] = {} + for key in delta_dict: + old_val = old_dict.get(key, NOT_GIVEN) + if old_val != delta_dict[key]: + changed[key] = old_val + + # Sync model/language from live_options delta to top-level fields. + if "model" in delta_dict and delta_dict["model"] != self.model: + changed.setdefault("model", self.model) + self.model = delta_dict["model"] + if "language" in delta_dict and delta_dict["language"] != self.language: + changed.setdefault("language", self.language) + self.language = delta_dict["language"] + + return changed + def apply_update(self: _S, delta: _S) -> Dict[str, Any]: """Merge a delta into this store, with delta-merge for ``live_options``. - ``live_options`` is merged field-by-field (non-None fields from the - delta overwrite corresponding fields in the stored options) rather - than being replaced wholesale. + ``live_options`` is merged field-by-field via + ``_merge_live_options_delta`` rather than being replaced wholesale. ``model`` and ``language`` are kept in sync bidirectionally between the top-level settings fields and ``live_options``. @@ -107,27 +152,17 @@ class DeepgramSTTSettings(STTSettings): if "language" in changed: self.live_options.language = self.language # type: ignore[union-attr] - # Merge live_options delta. + # Merge live_options delta. Top-level model/language take precedence + # over conflicting values in live_options, so write them into the + # delta before merging. if is_given(delta_lo): - old_dict = self.live_options.to_dict() # type: ignore[union-attr] - delta_dict = delta_lo.to_dict() + if "model" in changed: + delta_lo.model = self.model + if "language" in changed: + delta_lo.language = self.language - if delta_dict: - merged = {**old_dict, **delta_dict} - self.live_options = LiveOptions(**merged) - - for key in delta_dict: - old_val = old_dict.get(key, NOT_GIVEN) - if old_val != delta_dict[key]: - changed[key] = old_val - - # Sync model/language from live_options delta to top-level. - if "model" in delta_dict and delta_dict["model"] != self.model: - changed.setdefault("model", self.model) - self.model = delta_dict["model"] - if "language" in delta_dict and delta_dict["language"] != self.language: - changed.setdefault("language", self.language) - self.language = delta_dict["language"] + for key, old_val in self._merge_live_options_delta(delta_lo).items(): + changed.setdefault(key, old_val) return changed @@ -165,6 +200,16 @@ class DeepgramSTTSettings(STTSettings): return instance +@dataclass +class DeepgramSTTSettings(_DeepgramSTTSettingsBase): + """Settings for the Deepgram STT service. + + See ``_DeepgramSTTSettingsBase`` for full documentation. + """ + + pass + + class DeepgramSTTService(STTService): """Deepgram speech-to-text service. @@ -250,25 +295,13 @@ class DeepgramSTTService(STTService): vad_events=False, ) - merged_dict = default_options.to_dict() - if live_options: - default_model = default_options.model - merged_dict.update(live_options.to_dict()) - # NOTE(aleix): Fixes a bug in deepgram-sdk where `model` is initialized - # to the string "None" instead of the value `None`. - if "model" in merged_dict and merged_dict["model"] == "None": - merged_dict["model"] = default_model - - if "language" in merged_dict and isinstance(merged_dict["language"], Language): - merged_dict["language"] = merged_dict["language"].value - - # Sync model/language to top-level STTSettings fields - model = merged_dict.get("model") - language = merged_dict.get("language") - settings = DeepgramSTTSettings( - model=model, language=language, live_options=LiveOptions(**merged_dict) + model=default_options.model, + language=default_options.language, + live_options=default_options, ) + if live_options: + settings._merge_live_options_delta(live_options) super().__init__( sample_rate=sample_rate, diff --git a/src/pipecat/services/deepgram/stt_sagemaker.py b/src/pipecat/services/deepgram/stt_sagemaker.py index ee2121bec..ba4b7dfda 100644 --- a/src/pipecat/services/deepgram/stt_sagemaker.py +++ b/src/pipecat/services/deepgram/stt_sagemaker.py @@ -13,10 +13,9 @@ languages, and various Deepgram features. """ import asyncio -import inspect import json -from dataclasses import dataclass, field -from typing import Any, AsyncGenerator, Dict, Mapping, Optional, Type +from dataclasses import dataclass +from typing import Any, AsyncGenerator, Dict, Optional from loguru import logger @@ -33,7 +32,8 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.aws.sagemaker.bidi_client import SageMakerBidiClient -from pipecat.services.settings import _S, NOT_GIVEN, STTSettings, _NotGiven, is_given +from pipecat.services.deepgram.stt import _DeepgramSTTSettingsBase +from pipecat.services.settings import STTSettings from pipecat.services.stt_latency import DEEPGRAM_SAGEMAKER_TTFS_P99 from pipecat.services.stt_service import STTService from pipecat.transcriptions.language import Language @@ -51,120 +51,13 @@ except ModuleNotFoundError as e: @dataclass -class DeepgramSageMakerSTTSettings(STTSettings): +class DeepgramSageMakerSTTSettings(_DeepgramSTTSettingsBase): """Settings for the Deepgram SageMaker STT service. - Wraps the Deepgram SDK's ``LiveOptions`` in a single ``live_options`` - field. All Deepgram-specific options (``filler_words``, ``diarize``, - ``utterance_end_ms``, etc.) should be passed directly via - ``LiveOptions``. - - In **delta mode** (i.e. when carried by ``STTUpdateSettingsFrame``), - ``live_options`` is treated as a **delta** — its non-None fields are - merged into the stored ``LiveOptions``, not replaced wholesale. For - example, ``DeepgramSageMakerSTTSettings(live_options=LiveOptions(punctuate=False))`` - changes only ``punctuate`` and leaves all other options intact. - - Parameters: - live_options: Deepgram ``LiveOptions`` for STT configuration. - In delta mode only its non-None fields are merged into the - stored options. + See ``_DeepgramSTTSettingsBase`` for full documentation. """ - live_options: LiveOptions | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - - # Valid LiveOptions __init__ parameter names (cached at class level). - _live_options_params: set[str] | None = field(default=None, init=False, repr=False) - - @classmethod - def _get_live_options_params(cls) -> set[str]: - """Return the set of valid ``LiveOptions.__init__`` parameter names.""" - if cls._live_options_params is None: - cls._live_options_params = set(inspect.signature(LiveOptions.__init__).parameters) - { - "self" - } - return cls._live_options_params - - def apply_update(self: _S, delta: _S) -> Dict[str, Any]: - """Merge a delta into this store, with delta-merge for ``live_options``. - - ``live_options`` is merged field-by-field (non-None fields from the - delta overwrite corresponding fields in the stored options) rather - than being replaced wholesale. - - ``model`` and ``language`` are kept in sync bidirectionally between - the top-level settings fields and ``live_options``. - """ - # Pull live_options out of the delta so super() doesn't replace it. - delta_lo = getattr(delta, "live_options", NOT_GIVEN) - if is_given(delta_lo): - delta.live_options = NOT_GIVEN # type: ignore[assignment] - - # Let the base class handle model, language, extra. - changed = super().apply_update(delta) - - # Sync top-level model/language changes into stored live_options. - if "model" in changed: - self.live_options.model = self.model # type: ignore[union-attr] - if "language" in changed: - self.live_options.language = self.language # type: ignore[union-attr] - - # Merge live_options delta. - if is_given(delta_lo): - old_dict = self.live_options.to_dict() # type: ignore[union-attr] - delta_dict = delta_lo.to_dict() - - if delta_dict: - merged = {**old_dict, **delta_dict} - self.live_options = LiveOptions(**merged) - - for key in delta_dict: - old_val = old_dict.get(key, NOT_GIVEN) - if old_val != delta_dict[key]: - changed[key] = old_val - - # Sync model/language from live_options delta to top-level. - if "model" in delta_dict and delta_dict["model"] != self.model: - changed.setdefault("model", self.model) - self.model = delta_dict["model"] - if "language" in delta_dict and delta_dict["language"] != self.language: - changed.setdefault("language", self.language) - self.language = delta_dict["language"] - - return changed - - @classmethod - def from_mapping(cls: Type[_S], settings: Mapping[str, Any]) -> _S: - """Build a delta from a plain dict, routing LiveOptions keys correctly. - - Keys that are valid ``LiveOptions.__init__`` parameters (and not - top-level ``STTSettings`` fields like ``model`` / ``language``) are - collected into a ``LiveOptions`` object. ``model`` and ``language`` - are routed to the top-level settings fields. Truly unknown keys go - to ``extra``. - """ - lo_params = cls._get_live_options_params() - stt_field_names = {"model", "language"} - - kwargs: Dict[str, Any] = {} - lo_kwargs: Dict[str, Any] = {} - extra: Dict[str, Any] = {} - - for key, value in settings.items(): - canonical = cls._aliases.get(key, key) - if canonical in stt_field_names: - kwargs[canonical] = value - elif canonical in lo_params: - lo_kwargs[canonical] = value - else: - extra[key] = value - - if lo_kwargs: - kwargs["live_options"] = LiveOptions(**lo_kwargs) - - instance = cls(**kwargs) - instance.extra = extra - return instance + pass class DeepgramSageMakerSTTService(STTService): @@ -224,7 +117,6 @@ class DeepgramSageMakerSTTService(STTService): """ sample_rate = sample_rate or (live_options.sample_rate if live_options else None) - # Create default options similar to DeepgramSTTService default_options = LiveOptions( encoding="linear16", language=Language.EN, @@ -234,26 +126,13 @@ class DeepgramSageMakerSTTService(STTService): punctuate=True, ) - # Merge with provided options - merged_dict = default_options.to_dict() - if live_options: - default_model = default_options.model - merged_dict.update(live_options.to_dict()) - # Handle the "None" string bug from deepgram-sdk - if "model" in merged_dict and merged_dict["model"] == "None": - merged_dict["model"] = default_model - - # Convert Language enum to string if needed - if "language" in merged_dict and isinstance(merged_dict["language"], Language): - merged_dict["language"] = merged_dict["language"].value - - # Sync model/language to top-level STTSettings fields - model = merged_dict.get("model") - language = merged_dict.get("language") - settings = DeepgramSageMakerSTTSettings( - model=model, language=language, live_options=LiveOptions(**merged_dict) + model=default_options.model, + language=default_options.language, + live_options=default_options, ) + if live_options: + settings._merge_live_options_delta(live_options) super().__init__( sample_rate=sample_rate, diff --git a/tests/test_settings.py b/tests/test_settings.py index 3201e3c24..47cb6e4cf 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -407,6 +407,36 @@ class TestDeepgramSTTSettingsApplyUpdate: changed = current.apply_update(delta) assert changed == {} + def test_apply_update_top_level_model_takes_precedence_over_live_options(self): + """When both top-level model and live_options.model are set, top-level wins.""" + current = self._make_store() + assert current.model == "nova-3-general" + + delta = DeepgramSTTSettings( + model="nova-2", + live_options=LiveOptions(model="nova-3"), + ) + changed = current.apply_update(delta) + + assert current.model == "nova-2" + assert current.live_options.model == "nova-2" + assert "model" in changed + + def test_apply_update_top_level_language_takes_precedence_over_live_options(self): + """When both top-level language and live_options.language are set, top-level wins.""" + current = self._make_store() + assert current.language == "en" + + delta = DeepgramSTTSettings( + language="fr", + live_options=LiveOptions(language="es"), + ) + changed = current.apply_update(delta) + + assert current.language == "fr" + assert current.live_options.language == "fr" + assert "language" in changed + class TestDeepgramSTTSettingsFromMapping: def test_routes_live_options_kwargs(self): @@ -482,43 +512,21 @@ class TestDeepgramSTTSettingsFromMapping: # --------------------------------------------------------------------------- -# DeepgramSageMakerSTTSettings: same pattern +# DeepgramSageMakerSTTSettings: smoke test that the shared base is inherited # --------------------------------------------------------------------------- -class TestDeepgramSageMakerSTTSettingsApplyUpdate: - def _make_store(self, **lo_kwargs) -> DeepgramSageMakerSTTSettings: - defaults = dict( - encoding="linear16", - channels=1, - interim_results=True, - punctuate=True, - ) - defaults.update(lo_kwargs) - return DeepgramSageMakerSTTSettings( +class TestDeepgramSageMakerSTTSettings: + def test_inherits_live_options_behavior(self): + """Smoke test: SageMaker settings inherit the shared base correctly.""" + store = DeepgramSageMakerSTTSettings( model="nova-3", language="en", - live_options=LiveOptions(**defaults), + live_options=LiveOptions(encoding="linear16", channels=1, punctuate=True), ) - - def test_apply_update_merges_live_options_as_delta(self): - current = self._make_store() delta = DeepgramSageMakerSTTSettings(live_options=LiveOptions(punctuate=False)) - changed = current.apply_update(delta) - assert current.live_options.punctuate is False + changed = store.apply_update(delta) + + assert store.live_options.punctuate is False + assert store.live_options.encoding == "linear16" assert "punctuate" in changed - assert current.live_options.encoding == "linear16" - - def test_apply_update_syncs_model_from_live_options(self): - current = self._make_store() - delta = DeepgramSageMakerSTTSettings(live_options=LiveOptions(model="nova-2")) - current.apply_update(delta) - assert current.model == "nova-2" - - def test_from_mapping_routes_correctly(self): - delta = DeepgramSageMakerSTTSettings.from_mapping( - {"model": "nova-2", "punctuate": False, "unknown": "val"} - ) - assert delta.model == "nova-2" - assert delta.live_options.punctuate is False - assert delta.extra == {"unknown": "val"} From fff9db0d8f148954a518b5848de3253b7d433369 Mon Sep 17 00:00:00 2001 From: Rupesh Date: Thu, 26 Feb 2026 13:51:05 -0800 Subject: [PATCH 122/189] Remove verbose audio chunk logging from GenesysAudioHookSerializer Fixes #3777 --- src/pipecat/serializers/genesys.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pipecat/serializers/genesys.py b/src/pipecat/serializers/genesys.py index 24b68eb81..a25287b5c 100644 --- a/src/pipecat/serializers/genesys.py +++ b/src/pipecat/serializers/genesys.py @@ -642,7 +642,6 @@ class GenesysAudioHookSerializer(FrameSerializer): """ # Binary data = audio if isinstance(data, bytes): - logger.debug(f"[AUDIO IN] Received {len(data)} bytes from Genesys") return await self._deserialize_audio(data) # Text data = JSON control message From bbaa79fef038d50434d3c5008226418c85ce140f Mon Sep 17 00:00:00 2001 From: Rupesh Date: Thu, 26 Feb 2026 14:00:34 -0800 Subject: [PATCH 123/189] Add changelog for PR #3850 --- changelog/3850.fixed.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog/3850.fixed.md diff --git a/changelog/3850.fixed.md b/changelog/3850.fixed.md new file mode 100644 index 000000000..cfbdc6cf7 --- /dev/null +++ b/changelog/3850.fixed.md @@ -0,0 +1 @@ +- Removed verbose per-chunk audio logging from `GenesysAudioHookSerializer` that flooded production logs. From 3e04f5d05f7095fbf7c51392ab5de711afca95f7 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Fri, 27 Feb 2026 09:18:15 -0500 Subject: [PATCH 124/189] Add GitHub Actions workflow to auto-update docs on PR merge Runs Claude Code Action after PRs merge to main when source files in services/transports/serializers/processors/audio/turns/observers/pipeline are changed. Creates a docs PR on pipecat-ai/docs with targeted edits following the existing update-docs skill instructions. --- .github/workflows/update-docs.yml | 154 ++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 .github/workflows/update-docs.yml diff --git a/.github/workflows/update-docs.yml b/.github/workflows/update-docs.yml new file mode 100644 index 000000000..03323e87f --- /dev/null +++ b/.github/workflows/update-docs.yml @@ -0,0 +1,154 @@ +name: Update Documentation on PR Merge + +on: + pull_request: + types: [closed] + branches: [main] + paths: + - "src/pipecat/services/**" + - "src/pipecat/transports/**" + - "src/pipecat/serializers/**" + - "src/pipecat/processors/**" + - "src/pipecat/audio/**" + - "src/pipecat/turns/**" + - "src/pipecat/observers/**" + - "src/pipecat/pipeline/**" + workflow_dispatch: + inputs: + pr_number: + description: "PR number to generate docs for" + required: true + type: string + +jobs: + update-docs: + if: >- + github.event_name == 'workflow_dispatch' || + github.event.pull_request.merged == true + runs-on: ubuntu-latest + timeout-minutes: 15 + permissions: + contents: read + pull-requests: read + id-token: write + steps: + - name: Checkout pipecat + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Checkout docs + uses: actions/checkout@v4 + with: + repository: pipecat-ai/docs + token: ${{ secrets.DOCS_SYNC_TOKEN }} + path: _docs + + - name: Get version info + id: version + run: | + echo "release=$(git describe --tags --abbrev=0 2>/dev/null || echo 'unknown')" >> "$GITHUB_OUTPUT" + echo "dev=$(git describe --tags 2>/dev/null || echo 'unknown')" >> "$GITHUB_OUTPUT" + + - name: Resolve PR number + id: pr + run: | + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + echo "number=${{ inputs.pr_number }}" >> "$GITHUB_OUTPUT" + else + echo "number=${{ github.event.pull_request.number }}" >> "$GITHUB_OUTPUT" + fi + + - name: Update documentation + uses: anthropics/claude-code-action@v1 + env: + DOCS_SYNC_TOKEN: ${{ secrets.DOCS_SYNC_TOKEN }} + with: + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + prompt: | + You are updating documentation for the pipecat-ai/docs repository based on + changes merged in PR #${{ steps.pr.outputs.number }} of pipecat-ai/pipecat. + + ## Setup + + 1. Read the skill instructions at `.claude/skills/update-docs/SKILL.md` + 2. Read the source-to-doc mapping at `.claude/skills/update-docs/SOURCE_DOC_MAPPING.md` + 3. The docs repository is checked out at `./_docs/` + 4. Current pipecat version: ${{ steps.version.outputs.release }} (release), ${{ steps.version.outputs.dev }} (dev) + + ## Get the diff + + Run `gh pr diff ${{ steps.pr.outputs.number }}` to see what changed in the PR. + Also run `gh pr diff ${{ steps.pr.outputs.number }} --name-only` to get the list of changed files. + Filter to source files matching the directories listed in SKILL.md Step 3. + + If no relevant source files were changed, exit with "No documentation changes needed." + + ## Follow the skill instructions + + Apply the SKILL.md workflow (Steps 3-9) with these adaptations for automation: + + ### Docs path + Use `./_docs/` — it's already checked out. Do not ask for a path. + + ### Branch management + - Branch name: `docs/pr-${{ steps.pr.outputs.number }}` + - Work inside `./_docs/` for all doc edits and git operations + - Check if the branch already exists on the remote: + ```bash + cd _docs && git fetch origin docs/pr-${{ steps.pr.outputs.number }} 2>/dev/null + ``` + - If it exists: check it out (supports workflow re-runs) + - If not: create it from main + + ### Git config + Before committing in `_docs`, set: + ```bash + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + ``` + + ### No interactive questions + Do not ask questions. If you encounter gaps (unmapped files, missing sections, + ambiguous changes), note them in the PR body under "## Gaps identified". + + ### Creating the docs PR + After committing all changes in `_docs`, push and create a PR: + ```bash + cd _docs + git push -u origin docs/pr-${{ steps.pr.outputs.number }} + GH_TOKEN=$DOCS_SYNC_TOKEN gh pr create \ + --repo pipecat-ai/docs \ + --title "docs: update for pipecat PR #${{ steps.pr.outputs.number }}" \ + --body "$(cat <<'BODY' + Automated documentation update for [pipecat PR #${{ steps.pr.outputs.number }}](https://github.com/pipecat-ai/pipecat/pull/${{ steps.pr.outputs.number }}). + + Pipecat version: ${{ steps.version.outputs.release }} (${{ steps.version.outputs.dev }}) + + ## Changes + + + ## Gaps identified + + BODY + )" + ``` + + ### Re-run handling + If `gh pr create` fails because a PR from that branch already exists, + push the updated commits and use `gh pr edit` to update the body instead. + + ### No-op + If after analyzing the diff you determine no documentation changes are needed + (e.g., only skip-listed files changed, or changes don't affect public API docs), + exit cleanly without creating a branch or PR. Output "No documentation changes needed." + + ## Important rules + - Only modify files inside `./_docs/` — never modify pipecat source code + - Follow the conservative editing rules from SKILL.md Step 6 + - Read each doc page fully before editing (SKILL.md Guidelines) + - Use `GH_TOKEN=$DOCS_SYNC_TOKEN` for all `gh` commands targeting pipecat-ai/docs + claude_args: | + --model claude-sonnet-4-5-20250929 + --max-turns 30 + --allowedTools "Read,Write,Edit,Glob,Grep,Bash" From c259a6a73bcc8ad4b571aa974f4f7f078d5380af Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Thu, 26 Feb 2026 19:52:34 -0500 Subject: [PATCH 125/189] Deprecate processing metrics (ProcessingMetricsData) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add deprecation warnings to start_processing_metrics() and stop_processing_metrics() on FrameProcessorMetrics and FrameProcessor. Mark ProcessingMetricsData as deprecated in docstring. All existing behavior is preserved — the warnings inform users that these will be removed in a future version. --- changelog/3852.deprecated.md | 1 + src/pipecat/metrics/metrics.py | 4 ++++ src/pipecat/processors/frame_processor.py | 16 ++++++++++++++++ .../metrics/frame_processor_metrics.py | 8 ++++++++ 4 files changed, 29 insertions(+) create mode 100644 changelog/3852.deprecated.md diff --git a/changelog/3852.deprecated.md b/changelog/3852.deprecated.md new file mode 100644 index 000000000..666c7c58a --- /dev/null +++ b/changelog/3852.deprecated.md @@ -0,0 +1 @@ +- Deprecated `ProcessingMetricsData` and `start_processing_metrics()`/`stop_processing_metrics()` on `FrameProcessor` and `FrameProcessorMetrics`. These metrics don't accurately depict a service's performance. Instead, TTFB metrics are recommended. Processing metrics will be removed in the 1.0.0 version. diff --git a/src/pipecat/metrics/metrics.py b/src/pipecat/metrics/metrics.py index 2030306e5..37ab99447 100644 --- a/src/pipecat/metrics/metrics.py +++ b/src/pipecat/metrics/metrics.py @@ -41,6 +41,10 @@ class TTFBMetricsData(MetricsData): class ProcessingMetricsData(MetricsData): """General processing time metrics data. + .. deprecated:: 0.0.104 + Processing metrics are deprecated and will be removed in a future version. + Use TTFB metrics instead. + Parameters: value: Processing time measurement in seconds. """ diff --git a/src/pipecat/processors/frame_processor.py b/src/pipecat/processors/frame_processor.py index baa52cc70..3e90968fe 100644 --- a/src/pipecat/processors/frame_processor.py +++ b/src/pipecat/processors/frame_processor.py @@ -441,19 +441,35 @@ class FrameProcessor(BaseObject): if frame: await self.push_frame(frame) + _processing_metrics_warned = False + async def start_processing_metrics(self, *, start_time: Optional[float] = None): """Start processing metrics collection. + .. deprecated:: 0.0.104 + Processing metrics are deprecated and will be removed in a future version. + Use TTFB metrics instead. + Args: start_time: Optional timestamp to use as the start time. If None, uses the current time. """ if self.can_generate_metrics() and self.metrics_enabled: + if not FrameProcessor._processing_metrics_warned: + FrameProcessor._processing_metrics_warned = True + logger.warning( + "Processing metrics are deprecated and will be removed in a future version. " + "Use TTFB metrics instead." + ) await self._metrics.start_processing_metrics(start_time=start_time) async def stop_processing_metrics(self, *, end_time: Optional[float] = None): """Stop processing metrics collection and push results. + .. deprecated:: 0.0.104 + Processing metrics are deprecated and will be removed in a future version. + Use TTFB metrics instead. + Args: end_time: Optional timestamp to use as the end time. If None, uses the current time. diff --git a/src/pipecat/processors/metrics/frame_processor_metrics.py b/src/pipecat/processors/metrics/frame_processor_metrics.py index 7a52895a2..ef637b5ad 100644 --- a/src/pipecat/processors/metrics/frame_processor_metrics.py +++ b/src/pipecat/processors/metrics/frame_processor_metrics.py @@ -150,6 +150,10 @@ class FrameProcessorMetrics(BaseObject): async def start_processing_metrics(self, *, start_time: Optional[float] = None): """Start measuring processing time. + .. deprecated:: 0.0.104 + Processing metrics are deprecated and will be removed in a future version. + Use TTFB metrics instead. + Args: start_time: Optional timestamp to use as the start time. If None, uses the current time. @@ -159,6 +163,10 @@ class FrameProcessorMetrics(BaseObject): async def stop_processing_metrics(self, *, end_time: Optional[float] = None): """Stop processing time measurement and generate metrics frame. + .. deprecated:: 0.0.104 + Processing metrics are deprecated and will be removed in a future version. + Use TTFB metrics instead. + Args: end_time: Optional timestamp to use as the end time. If None, uses the current time. From deba2515f9706b2d37e7dfeb0f547eb0dc9d2041 Mon Sep 17 00:00:00 2001 From: filipi87 Date: Fri, 27 Feb 2026 11:56:36 -0300 Subject: [PATCH 126/189] Added a new LLMAssistantPushAggregationFrame control frame that signals LLMAssistantAggregator to immediately flush its text buffer to the conversation context --- src/pipecat/frames/frames.py | 10 ++++++++++ .../processors/aggregators/llm_response_universal.py | 3 +++ 2 files changed, 13 insertions(+) diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index 55ae975d1..bbc065969 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -1990,6 +1990,16 @@ class LLMFullResponseEndFrame(ControlFrame): self.skip_tts = None +@dataclass +class LLMAssistantPushAggregationFrame(ControlFrame): + """Frame that forces the LLM assistant aggregator to push its current aggregation to context. + + When received by ``LLMAssistantAggregator``, any text that has been accumulated + in the aggregation buffer is immediately committed to the conversation context as + an assistant message, without waiting for an ``LLMFullResponseEndFrame``. + """ + + @dataclass class LLMContextSummaryRequestFrame(ControlFrame): """Frame requesting context summarization from an LLM service. diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py index 4a28b38d5..b255748e0 100644 --- a/src/pipecat/processors/aggregators/llm_response_universal.py +++ b/src/pipecat/processors/aggregators/llm_response_universal.py @@ -35,6 +35,7 @@ from pipecat.frames.frames import ( InputAudioRawFrame, InterimTranscriptionFrame, InterruptionFrame, + LLMAssistantPushAggregationFrame, LLMContextAssistantTimestampFrame, LLMContextFrame, LLMContextSummaryRequestFrame, @@ -879,6 +880,8 @@ class LLMAssistantAggregator(LLMContextAggregator): elif isinstance(frame, (EndFrame, CancelFrame)): await self._handle_end_or_cancel(frame) await self.push_frame(frame, direction) + elif isinstance(frame, LLMAssistantPushAggregationFrame): + await self.push_aggregation() elif isinstance(frame, LLMFullResponseStartFrame): await self._handle_llm_start(frame) elif isinstance(frame, LLMFullResponseEndFrame): From bc6f8e51de242a18743af90126b9ac39c272c86c Mon Sep 17 00:00:00 2001 From: filipi87 Date: Fri, 27 Feb 2026 11:56:44 -0300 Subject: [PATCH 127/189] Fixed TTSSpeakFrame not automatically committing spoken text to the conversation context when used outside of an LLM response (e.g., for bot greeting messages or injected speech) --- src/pipecat/services/tts_service.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index c6d2672d6..4285e14f9 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -39,6 +39,7 @@ from pipecat.frames.frames import ( Frame, InterimTranscriptionFrame, InterruptionFrame, + LLMAssistantPushAggregationFrame, LLMFullResponseEndFrame, LLMFullResponseStartFrame, StartFrame, @@ -67,10 +68,16 @@ class TTSContext: """Context information for a TTS request. Attributes: - append_to_context: Whether this TTS output should be appended to the conversation context. + append_to_context: Whether this TTS output should be appended to the + conversation context after it is spoken. + push_assistant_aggregation: Whether to push an + ``LLMAssistantPushAggregationFrame`` after the TTS has finished + speaking, forcing the assistant aggregator to commit its current + text buffer to the conversation context. """ append_to_context: bool = True + push_assistant_aggregation: Optional[bool] = False class TextAggregationMode(str, Enum): @@ -641,10 +648,13 @@ class TTSService(AIService): elif isinstance(frame, TTSSpeakFrame): # Store if we were processing text or not so we can set it back. processing_text = self._processing_text + # If we are not receiving text from the LLM, we can assume that the SpeakFrame should be automatically added to the context + push_assistant_aggregation = frame.append_to_context and not self._llm_response_started # Assumption: text in TTSSpeakFrame does not include inter-frame spaces await self._push_tts_frames( AggregatedTextFrame(frame.text, AggregationType.SENTENCE), append_tts_text_to_context=frame.append_to_context, + push_assistant_aggregation=push_assistant_aggregation, ) # We pause processing incoming frames because we are sending data to # the TTS. We pause to avoid audio overlapping. @@ -809,6 +819,7 @@ class TTSService(AIService): src_frame: AggregatedTextFrame, includes_inter_frame_spaces: Optional[bool] = False, append_tts_text_to_context: Optional[bool] = True, + push_assistant_aggregation: Optional[bool] = False, ): type = src_frame.aggregated_by text = src_frame.text @@ -876,7 +887,8 @@ class TTSService(AIService): self._tts_contexts[context_id] = TTSContext( append_to_context=append_tts_text_to_context if append_tts_text_to_context is not None - else True + else True, + push_assistant_aggregation=push_assistant_aggregation, ) # Apply any final text preparation (e.g., trailing space) @@ -905,6 +917,8 @@ class TTSService(AIService): if append_tts_text_to_context is not None: frame.append_to_context = append_tts_text_to_context await self.push_frame(frame) + if push_assistant_aggregation: + await self.push_frame(LLMAssistantPushAggregationFrame()) async def _stop_frame_handler(self): has_started = False @@ -988,6 +1002,9 @@ class TTSService(AIService): frame = TTSStoppedFrame() frame.pts = last_pts frame.context_id = context_id + if context_id in self._tts_contexts: + if self._tts_contexts[context_id].push_assistant_aggregation: + await self.push_frame(LLMAssistantPushAggregationFrame()) else: # Assumption: word-by-word text frames don't include spaces, so # we can rely on the default includes_inter_frame_spaces=False From 1f45e80f9d6681004865caab9665fb5a5e37fdee Mon Sep 17 00:00:00 2001 From: filipi87 Date: Fri, 27 Feb 2026 11:56:52 -0300 Subject: [PATCH 128/189] Updated the 52-live-translation.py example to demonstrate the fix --- examples/foundational/52-live-translation.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/examples/foundational/52-live-translation.py b/examples/foundational/52-live-translation.py index 30583c1b8..861d23e37 100644 --- a/examples/foundational/52-live-translation.py +++ b/examples/foundational/52-live-translation.py @@ -11,6 +11,7 @@ from dotenv import load_dotenv from loguru import logger from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import TTSSpeakFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask @@ -110,6 +111,14 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): @transport.event_handler("on_client_connected") async def on_client_connected(transport, client): logger.info(f"Client connected") + await task.queue_frames( + [ + TTSSpeakFrame( + text="Hello, welcome to live translation. Everything you say will be automatically translated to Spanish. Let's begin!", + append_to_context=True, + ), + ] + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): From d701c3427c968c963670e8b9dc27c5fc24abb838 Mon Sep 17 00:00:00 2001 From: filipi87 Date: Fri, 27 Feb 2026 11:57:03 -0300 Subject: [PATCH 129/189] Changelog entry for the TTSSpeakFrame fix. --- changelog/3845.fixed.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog/3845.fixed.md diff --git a/changelog/3845.fixed.md b/changelog/3845.fixed.md new file mode 100644 index 000000000..423853700 --- /dev/null +++ b/changelog/3845.fixed.md @@ -0,0 +1 @@ +- Fixed `TTSSpeakFrame` not committing spoken text to the conversation context when used outside of an LLM response (e.g., bot greetings or injected speech). \ No newline at end of file From 3b427a47b64685a8f48b19ed80d101d2f57f0e3d Mon Sep 17 00:00:00 2001 From: filipi87 Date: Fri, 27 Feb 2026 11:57:11 -0300 Subject: [PATCH 130/189] Fixing Piper test. --- tests/test_piper_tts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_piper_tts.py b/tests/test_piper_tts.py index 0ce14bd85..662b9a40c 100644 --- a/tests/test_piper_tts.py +++ b/tests/test_piper_tts.py @@ -125,7 +125,7 @@ async def test_run_piper_tts_error(aiohttp_client): ) frames_to_send = [ - TTSSpeakFrame(text="Error case."), + TTSSpeakFrame(text="Error case.", append_to_context=False), ] expected_down_frames = [AggregatedTextFrame, TTSStoppedFrame, TTSTextFrame] From 3a32d91c66adbb3b5ea09b70f5f7a7bfbab299c1 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Fri, 27 Feb 2026 10:21:10 -0500 Subject: [PATCH 131/189] Set finalized flag on ElevenLabs Realtime STT transcriptions for manual commit strategy --- src/pipecat/services/elevenlabs/stt.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/pipecat/services/elevenlabs/stt.py b/src/pipecat/services/elevenlabs/stt.py index 5422fb193..0cf13121e 100644 --- a/src/pipecat/services/elevenlabs/stt.py +++ b/src/pipecat/services/elevenlabs/stt.py @@ -861,6 +861,8 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): await self._handle_transcription(text, True, language) + finalized = self._settings.commit_strategy == CommitStrategy.MANUAL + await self.push_frame( TranscriptionFrame( text, @@ -868,6 +870,7 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): time_now_iso8601(), language, result=data, + finalized=finalized, ) ) @@ -902,6 +905,8 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): await self._handle_transcription(text, True, language) + finalized = self._settings.commit_strategy == CommitStrategy.MANUAL + # This message is sent after committed_transcript when include_timestamps=true. # It contains the full transcript data including text and word-level timestamps. await self.push_frame( @@ -911,5 +916,6 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): time_now_iso8601(), language, result=data, + finalized=finalized, ) ) From 601822e3e5fda2aa6f9de79dda646f88266589c5 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Fri, 27 Feb 2026 10:25:48 -0500 Subject: [PATCH 132/189] Add changelog for PR #3865 --- changelog/3865.changed.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog/3865.changed.md diff --git a/changelog/3865.changed.md b/changelog/3865.changed.md new file mode 100644 index 000000000..7a70eb0d7 --- /dev/null +++ b/changelog/3865.changed.md @@ -0,0 +1 @@ +- `ElevenLabsRealtimeSTTService` now sets `TranscriptionFrame.finalized` to `True` when using `CommitStrategy.MANUAL`. From 41d6470e4aa137ee1fab2a106a5926f387497ba4 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Fri, 27 Feb 2026 10:39:37 -0500 Subject: [PATCH 133/189] Fix docs workflow: add auto-docs label, remove version info --- .github/workflows/update-docs.yml | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/.github/workflows/update-docs.yml b/.github/workflows/update-docs.yml index 03323e87f..27453e74e 100644 --- a/.github/workflows/update-docs.yml +++ b/.github/workflows/update-docs.yml @@ -44,12 +44,6 @@ jobs: token: ${{ secrets.DOCS_SYNC_TOKEN }} path: _docs - - name: Get version info - id: version - run: | - echo "release=$(git describe --tags --abbrev=0 2>/dev/null || echo 'unknown')" >> "$GITHUB_OUTPUT" - echo "dev=$(git describe --tags 2>/dev/null || echo 'unknown')" >> "$GITHUB_OUTPUT" - - name: Resolve PR number id: pr run: | @@ -74,7 +68,6 @@ jobs: 1. Read the skill instructions at `.claude/skills/update-docs/SKILL.md` 2. Read the source-to-doc mapping at `.claude/skills/update-docs/SOURCE_DOC_MAPPING.md` 3. The docs repository is checked out at `./_docs/` - 4. Current pipecat version: ${{ steps.version.outputs.release }} (release), ${{ steps.version.outputs.dev }} (dev) ## Get the diff @@ -119,12 +112,11 @@ jobs: git push -u origin docs/pr-${{ steps.pr.outputs.number }} GH_TOKEN=$DOCS_SYNC_TOKEN gh pr create \ --repo pipecat-ai/docs \ + --label auto-docs \ --title "docs: update for pipecat PR #${{ steps.pr.outputs.number }}" \ --body "$(cat <<'BODY' Automated documentation update for [pipecat PR #${{ steps.pr.outputs.number }}](https://github.com/pipecat-ai/pipecat/pull/${{ steps.pr.outputs.number }}). - Pipecat version: ${{ steps.version.outputs.release }} (${{ steps.version.outputs.dev }}) - ## Changes From aa6d3b38b38793f948c1aa8ac5fb3b1b8d644e5d Mon Sep 17 00:00:00 2001 From: filipi87 Date: Fri, 27 Feb 2026 12:53:25 -0300 Subject: [PATCH 134/189] Add explanatory comments for LLMSpecificMessage guards in context summarization, amd fixed the missing guard in LLMContextSummarizer._apply_summary when searching for the first system message. --- .../aggregators/llm_context_summarizer.py | 16 +++++++++++++--- .../utils/context/llm_context_summarization.py | 15 ++++++++++++++- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/pipecat/processors/aggregators/llm_context_summarizer.py b/src/pipecat/processors/aggregators/llm_context_summarizer.py index a1a613ccc..7886fcf12 100644 --- a/src/pipecat/processors/aggregators/llm_context_summarizer.py +++ b/src/pipecat/processors/aggregators/llm_context_summarizer.py @@ -18,7 +18,7 @@ from pipecat.frames.frames import ( LLMContextSummaryResultFrame, LLMFullResponseStartFrame, ) -from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_context import LLMContext, LLMSpecificMessage from pipecat.utils.asyncio.task_manager import BaseTaskManager from pipecat.utils.base_object import BaseObject from pipecat.utils.context.llm_context_summarization import ( @@ -290,8 +290,18 @@ class LLMContextSummarizer(BaseObject): """ messages = self._context.messages - # Find the first system message to preserve - first_system_msg = next((msg for msg in messages if msg.get("role") == "system"), None) + # Find the first system message to preserve. LLMSpecificMessage instances are excluded + # because they are not dict-like and never represent a system message; they hold + # service-specific metadata (e.g. thinking blocks) that is always paired with a + # standard message. + first_system_msg = next( + ( + msg + for msg in messages + if not isinstance(msg, LLMSpecificMessage) and msg.get("role") == "system" + ), + None, + ) # Get recent messages to keep recent_messages = messages[last_summarized_index + 1 :] diff --git a/src/pipecat/utils/context/llm_context_summarization.py b/src/pipecat/utils/context/llm_context_summarization.py index 06551e3bb..537cc91ab 100644 --- a/src/pipecat/utils/context/llm_context_summarization.py +++ b/src/pipecat/utils/context/llm_context_summarization.py @@ -188,6 +188,8 @@ class LLMContextSummarizationUtil: total = 0 for message in context.messages: + # LLMSpecificMessage holds service-specific data (e.g. thinking blocks, + # thought signatures). Skipping them here for now. if isinstance(message, LLMSpecificMessage): continue @@ -251,6 +253,9 @@ class LLMContextSummarizationUtil: for i in range(start_idx, len(messages)): msg = messages[i] + # LLMSpecificMessage instances (e.g. thinking blocks) never carry tool_call or + # tool_call_id fields, so they cannot affect the pending-call tracking. Skipping + # them avoids an AttributeError. if isinstance(msg, LLMSpecificMessage): continue @@ -302,7 +307,10 @@ class LLMContextSummarizationUtil: if len(messages) <= min_messages_to_keep: return LLMMessagesToSummarize(messages=[], last_summarized_index=-1) - # Find first system message index + # Find first system message index. LLMSpecificMessage instances are excluded because + # they are not dict-like and never represent a system message; they hold + # service-specific metadata (e.g. thinking blocks) that is always paired with a + # standard message. first_system_index = next( ( i @@ -367,6 +375,11 @@ class LLMContextSummarizationUtil: transcript_parts = [] for msg in messages: + # LLMSpecificMessage holds service-specific internal data (e.g. Anthropic thinking + # blocks, Gemini thought signatures). This data is not meaningful as plain text for + # a summarization transcript, and the summarizer LLM would not know how to interpret + # it. The conversational content of those turns is already captured by the + # accompanying standard assistant message. if isinstance(msg, LLMSpecificMessage): continue From 790c434a0833939e950b19ac37c5c7e61835bf89 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Thu, 26 Feb 2026 20:58:06 -0500 Subject: [PATCH 135/189] Update summary message role: use user instead of assistant The context summary is information provided to the assistant, not something the assistant said. --- src/pipecat/processors/aggregators/llm_context_summarizer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pipecat/processors/aggregators/llm_context_summarizer.py b/src/pipecat/processors/aggregators/llm_context_summarizer.py index 7886fcf12..4a32d5397 100644 --- a/src/pipecat/processors/aggregators/llm_context_summarizer.py +++ b/src/pipecat/processors/aggregators/llm_context_summarizer.py @@ -306,8 +306,8 @@ class LLMContextSummarizer(BaseObject): # Get recent messages to keep recent_messages = messages[last_summarized_index + 1 :] - # Create summary message as an assistant message - summary_message = {"role": "assistant", "content": f"Conversation summary: {summary}"} + # Create summary message as an user message + summary_message = {"role": "user", "content": f"Conversation summary: {summary}"} # Reconstruct context new_messages = [] From 945a523eed66536d1ae1330be16e9f710029aa75 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Thu, 26 Feb 2026 21:03:05 -0500 Subject: [PATCH 136/189] Add configurable summary_message_template to LLMContextSummarizationConfig Allows applications to customize how the summary is wrapped when injected into context (e.g., XML tags, custom delimiters) so system prompts can distinguish summaries from live conversation. --- .../processors/aggregators/llm_context_summarizer.py | 6 ++++-- src/pipecat/utils/context/llm_context_summarization.py | 6 ++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/pipecat/processors/aggregators/llm_context_summarizer.py b/src/pipecat/processors/aggregators/llm_context_summarizer.py index 4a32d5397..a3d65b894 100644 --- a/src/pipecat/processors/aggregators/llm_context_summarizer.py +++ b/src/pipecat/processors/aggregators/llm_context_summarizer.py @@ -306,8 +306,10 @@ class LLMContextSummarizer(BaseObject): # Get recent messages to keep recent_messages = messages[last_summarized_index + 1 :] - # Create summary message as an user message - summary_message = {"role": "user", "content": f"Conversation summary: {summary}"} + # Create summary message as a user message (the summary is context + # provided *to* the assistant, not something the assistant said) + summary_content = self._config.summary_message_template.format(summary=summary) + summary_message = {"role": "user", "content": summary_content} # Reconstruct context new_messages = [] diff --git a/src/pipecat/utils/context/llm_context_summarization.py b/src/pipecat/utils/context/llm_context_summarization.py index 537cc91ab..7cb07a00c 100644 --- a/src/pipecat/utils/context/llm_context_summarization.py +++ b/src/pipecat/utils/context/llm_context_summarization.py @@ -73,6 +73,11 @@ class LLMContextSummarizationConfig: immediate conversational context. summarization_prompt: Custom prompt for the LLM to use when generating summaries. If None, uses DEFAULT_SUMMARIZATION_PROMPT. + summary_message_template: Template for formatting the summary when + injected into context. Must contain ``{summary}`` as a placeholder + for the generated summary text. Allows applications to wrap the + summary in custom delimiters (e.g., XML tags) so that system + prompts can distinguish summaries from live conversation. """ max_context_tokens: int = 8000 @@ -80,6 +85,7 @@ class LLMContextSummarizationConfig: max_unsummarized_messages: int = 20 min_messages_after_summary: int = 4 summarization_prompt: Optional[str] = None + summary_message_template: str = "Conversation summary: {summary}" def __post_init__(self): """Validate configuration parameters.""" From a489bfaf00685eb304bc330e1835070dd9377f2b Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Thu, 26 Feb 2026 21:22:52 -0500 Subject: [PATCH 137/189] Add optional dedicated LLM for context summarization Adds an field to LLMContextSummarizationConfig that allows routing summarization to a separate LLM service (e.g., Gemini Flash) instead of the pipeline's primary model. This avoids paying for expensive inference when compressing context in long-running sessions. --- .../aggregators/llm_response_universal.py | 53 +++++++++++++++++-- .../context/llm_context_summarization.py | 11 +++- 2 files changed, 60 insertions(+), 4 deletions(-) diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py index b255748e0..217d930e7 100644 --- a/src/pipecat/processors/aggregators/llm_response_universal.py +++ b/src/pipecat/processors/aggregators/llm_response_universal.py @@ -16,7 +16,7 @@ import json import warnings from abc import abstractmethod from dataclasses import dataclass, field -from typing import Any, Dict, List, Literal, Optional, Set, Type +from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Set, Type from loguru import logger @@ -39,6 +39,7 @@ from pipecat.frames.frames import ( LLMContextAssistantTimestampFrame, LLMContextFrame, LLMContextSummaryRequestFrame, + LLMContextSummaryResultFrame, LLMFullResponseEndFrame, LLMFullResponseStartFrame, LLMMessagesAppendFrame, @@ -83,6 +84,9 @@ from pipecat.utils.context.llm_context_summarization import LLMContextSummarizat from pipecat.utils.string import TextPartForConcatenation, concatenate_aggregated_text from pipecat.utils.time import time_now_iso8601 +if TYPE_CHECKING: + from pipecat.services.llm_service import LLMService + @dataclass class LLMUserAggregatorParams: @@ -1248,13 +1252,56 @@ class LLMAssistantAggregator(LLMContextAggregator): ): """Handle summarization request from the summarizer. - Push the request frame UPSTREAM to the LLM service for processing. + If a dedicated summarization LLM is configured, generates the summary + directly and feeds the result to the summarizer. Otherwise, pushes the + request frame upstream to the pipeline's primary LLM service. Args: summarizer: The summarizer that generated the request. frame: The summarization request frame to broadcast. """ - await self.push_frame(frame, FrameDirection.UPSTREAM) + summarization_llm = ( + self._params.context_summarization_config.llm + if self._params.context_summarization_config + else None + ) + + if summarization_llm: + self.create_task(self._generate_summary_with_dedicated_llm(summarization_llm, frame)) + else: + await self.push_frame(frame, FrameDirection.UPSTREAM) + + async def _generate_summary_with_dedicated_llm( + self, llm: "LLMService", frame: LLMContextSummaryRequestFrame + ): + """Generate summary using a dedicated LLM service. + + Calls the dedicated LLM's _generate_summary directly and feeds the + result back to the summarizer, bypassing the pipeline. + + Args: + llm: The dedicated LLM service to use for summarization. + frame: The summarization request frame. + """ + try: + summary, last_index = await llm._generate_summary(frame) + result_frame = LLMContextSummaryResultFrame( + request_id=frame.request_id, + summary=summary, + last_summarized_index=last_index, + ) + except Exception as e: + error = f"Error generating context summary: {e}" + await self.push_error(error, exception=e) + result_frame = LLMContextSummaryResultFrame( + request_id=frame.request_id, + summary="", + last_summarized_index=-1, + error=f"Error generating context summary: {e}", + ) + + if self._summarizer: + await self._summarizer.process_frame(result_frame) class LLMContextAggregatorPair: diff --git a/src/pipecat/utils/context/llm_context_summarization.py b/src/pipecat/utils/context/llm_context_summarization.py index 7cb07a00c..2dcd28fce 100644 --- a/src/pipecat/utils/context/llm_context_summarization.py +++ b/src/pipecat/utils/context/llm_context_summarization.py @@ -11,7 +11,10 @@ context when token limits are reached, enabling efficient long-running conversat """ from dataclasses import dataclass -from typing import List, Optional +from typing import TYPE_CHECKING, List, Optional + +if TYPE_CHECKING: + from pipecat.services.llm_service import LLMService from loguru import logger @@ -78,6 +81,11 @@ class LLMContextSummarizationConfig: for the generated summary text. Allows applications to wrap the summary in custom delimiters (e.g., XML tags) so that system prompts can distinguish summaries from live conversation. + llm: Optional separate LLM service for generating summaries. When set, + summarization requests are sent to this service instead of the + pipeline's primary LLM. Useful for routing summarization to a + cheaper/faster model (e.g., Gemini Flash) while keeping an + expensive model for conversation. If None, uses the pipeline LLM. """ max_context_tokens: int = 8000 @@ -86,6 +94,7 @@ class LLMContextSummarizationConfig: min_messages_after_summary: int = 4 summarization_prompt: Optional[str] = None summary_message_template: str = "Conversation summary: {summary}" + llm: Optional["LLMService"] = None def __post_init__(self): """Validate configuration parameters.""" From 50710e9c3f7c07d4906a32faab9ca57930f6f20e Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Thu, 26 Feb 2026 21:31:04 -0500 Subject: [PATCH 138/189] Add summarization timeout to prevent hung LLM calls Adds a configurable summarization_timeout (default 120s) that cancels summary generation if the LLM hangs. On timeout, an error result is returned so _summarization_in_progress resets and future summarizations are unblocked. --- src/pipecat/frames/frames.py | 3 +++ .../aggregators/llm_context_summarizer.py | 1 + .../aggregators/llm_response_universal.py | 25 +++++++++++++++---- src/pipecat/services/llm_service.py | 12 ++++++++- .../context/llm_context_summarization.py | 5 ++++ 5 files changed, 40 insertions(+), 6 deletions(-) diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index bbc065969..fbb0294a3 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -2019,6 +2019,8 @@ class LLMContextSummaryRequestFrame(ControlFrame): the summary text. summarization_prompt: System prompt instructing the LLM how to generate the summary. + summarization_timeout: Maximum time in seconds for the LLM to generate a + summary. None means no timeout. """ request_id: str @@ -2026,6 +2028,7 @@ class LLMContextSummaryRequestFrame(ControlFrame): min_messages_to_keep: int target_context_tokens: int summarization_prompt: str + summarization_timeout: Optional[float] = None @dataclass diff --git a/src/pipecat/processors/aggregators/llm_context_summarizer.py b/src/pipecat/processors/aggregators/llm_context_summarizer.py index a3d65b894..2618b558b 100644 --- a/src/pipecat/processors/aggregators/llm_context_summarizer.py +++ b/src/pipecat/processors/aggregators/llm_context_summarizer.py @@ -218,6 +218,7 @@ class LLMContextSummarizer(BaseObject): min_messages_to_keep=min_keep, target_context_tokens=self._config.target_context_tokens, summarization_prompt=self._config.summary_prompt, + summarization_timeout=self._config.summarization_timeout, ) # Emit event for aggregator to broadcast diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py index 217d930e7..361b2c8a6 100644 --- a/src/pipecat/processors/aggregators/llm_response_universal.py +++ b/src/pipecat/processors/aggregators/llm_response_universal.py @@ -1284,20 +1284,35 @@ class LLMAssistantAggregator(LLMContextAggregator): frame: The summarization request frame. """ try: - summary, last_index = await llm._generate_summary(frame) + if frame.summarization_timeout: + summary, last_index = await asyncio.wait_for( + llm._generate_summary(frame), + timeout=frame.summarization_timeout, + ) + else: + summary, last_index = await llm._generate_summary(frame) result_frame = LLMContextSummaryResultFrame( request_id=frame.request_id, summary=summary, last_summarized_index=last_index, ) - except Exception as e: - error = f"Error generating context summary: {e}" - await self.push_error(error, exception=e) + except asyncio.TimeoutError: + error = f"Context summarization timed out after {frame.summarization_timeout}s" + logger.error(f"{self}: {error}") result_frame = LLMContextSummaryResultFrame( request_id=frame.request_id, summary="", last_summarized_index=-1, - error=f"Error generating context summary: {e}", + error=error, + ) + except Exception as e: + error = f"Error generating context summary: {e}" + await self.push_error(error_msg=error, exception=e) + result_frame = LLMContextSummaryResultFrame( + request_id=frame.request_id, + summary="", + last_summarized_index=-1, + error=error, ) if self._summarizer: diff --git a/src/pipecat/services/llm_service.py b/src/pipecat/services/llm_service.py index a06423754..86048ccbe 100644 --- a/src/pipecat/services/llm_service.py +++ b/src/pipecat/services/llm_service.py @@ -437,7 +437,17 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): error = None try: - summary, last_index = await self._generate_summary(frame) + if frame.summarization_timeout: + summary, last_index = await asyncio.wait_for( + self._generate_summary(frame), + timeout=frame.summarization_timeout, + ) + else: + summary, last_index = await self._generate_summary(frame) + except asyncio.TimeoutError: + await self.push_error( + error_msg=f"Context summarization timed out after {frame.summarization_timeout}s" + ) except Exception as e: error = f"Error generating context summary: {e}" await self.push_error(error, exception=e) diff --git a/src/pipecat/utils/context/llm_context_summarization.py b/src/pipecat/utils/context/llm_context_summarization.py index 2dcd28fce..00dd74fd8 100644 --- a/src/pipecat/utils/context/llm_context_summarization.py +++ b/src/pipecat/utils/context/llm_context_summarization.py @@ -86,6 +86,10 @@ class LLMContextSummarizationConfig: pipeline's primary LLM. Useful for routing summarization to a cheaper/faster model (e.g., Gemini Flash) while keeping an expensive model for conversation. If None, uses the pipeline LLM. + summarization_timeout: Maximum time in seconds to wait for the LLM to + generate a summary. If the call exceeds this timeout, summarization + is aborted with an error and future summarizations are unblocked. + Set to None to disable the timeout. """ max_context_tokens: int = 8000 @@ -95,6 +99,7 @@ class LLMContextSummarizationConfig: summarization_prompt: Optional[str] = None summary_message_template: str = "Conversation summary: {summary}" llm: Optional["LLMService"] = None + summarization_timeout: Optional[float] = 120.0 def __post_init__(self): """Validate configuration parameters.""" From be8ea818c877a2c4319c65749ae719ff876b895b Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Thu, 26 Feb 2026 21:34:01 -0500 Subject: [PATCH 139/189] Add on_summary_applied event for observability Emits a SummaryAppliedEvent after context summarization completes, providing message counts so applications can track compression metrics. --- .../aggregators/llm_context_summarizer.py | 47 ++++++++++++++++++- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/src/pipecat/processors/aggregators/llm_context_summarizer.py b/src/pipecat/processors/aggregators/llm_context_summarizer.py index 2618b558b..85da9bfa0 100644 --- a/src/pipecat/processors/aggregators/llm_context_summarizer.py +++ b/src/pipecat/processors/aggregators/llm_context_summarizer.py @@ -7,6 +7,7 @@ """This module defines a summarizer for managing LLM context summarization.""" import uuid +from dataclasses import dataclass from typing import Optional from loguru import logger @@ -27,6 +28,25 @@ from pipecat.utils.context.llm_context_summarization import ( ) +@dataclass +class SummaryAppliedEvent: + """Event data emitted when context summarization completes successfully. + + Parameters: + original_message_count: Number of messages before summarization. + new_message_count: Number of messages after summarization. + summarized_message_count: Number of messages that were compressed + into the summary. + preserved_message_count: Number of recent messages preserved + uncompressed. + """ + + original_message_count: int + new_message_count: int + summarized_message_count: int + preserved_message_count: int + + class LLMContextSummarizer(BaseObject): """Summarizer for managing LLM context summarization. @@ -39,6 +59,10 @@ class LLMContextSummarizer(BaseObject): - on_request_summarization: Emitted when summarization should be triggered. The aggregator should broadcast this frame to the LLM service. + - on_summary_applied: Emitted after a summary has been successfully applied + to the context. Receives a SummaryAppliedEvent with metrics about the + compression. + Example:: @summarizer.event_handler("on_request_summarization") @@ -49,6 +73,10 @@ class LLMContextSummarizer(BaseObject): context=frame.context, ... ) + + @summarizer.event_handler("on_summary_applied") + async def on_summary_applied(summarizer, event: SummaryAppliedEvent): + logger.info(f"Compressed {event.original_message_count} -> {event.new_message_count} messages") """ def __init__( @@ -74,6 +102,7 @@ class LLMContextSummarizer(BaseObject): self._pending_summary_request_id: Optional[str] = None self._register_event_handler("on_request_summarization", sync=True) + self._register_event_handler("on_summary_applied") @property def task_manager(self) -> BaseTaskManager: @@ -320,9 +349,23 @@ class LLMContextSummarizer(BaseObject): new_messages.extend(recent_messages) # Update context + original_message_count = len(messages) + num_system_preserved = 1 if first_system_msg else 0 self._context.set_messages(new_messages) + # Messages actually summarized = index range minus the preserved system message + summarized_count = last_summarized_index + 1 - num_system_preserved + logger.info( - f"{self}: Applied context summary, compressed {last_summarized_index + 1} messages " - f"into summary. Context now has {len(new_messages)} messages (was {len(messages)})" + f"{self}: Applied context summary, compressed {summarized_count} messages " + f"into summary. Context now has {len(new_messages)} messages (was {original_message_count})" ) + + # Emit event for observability + event = SummaryAppliedEvent( + original_message_count=original_message_count, + new_message_count=len(new_messages), + summarized_message_count=summarized_count, + preserved_message_count=len(recent_messages) + num_system_preserved, + ) + await self._call_event_handler("on_summary_applied", event) From 712305c5b14fbebd22fc6368309f4f7f13a9373d Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Thu, 26 Feb 2026 21:45:22 -0500 Subject: [PATCH 140/189] Add example 54c showing custom context summarization --- .../54-context-summarization-openai.py | 23 +- .../54a-context-summarization-google.py | 23 +- ...54c-context-summarization-dedicated-llm.py | 231 ++++++++++++++++++ 3 files changed, 261 insertions(+), 16 deletions(-) create mode 100644 examples/foundational/54c-context-summarization-dedicated-llm.py diff --git a/examples/foundational/54-context-summarization-openai.py b/examples/foundational/54-context-summarization-openai.py index 652a3af13..45f27854f 100644 --- a/examples/foundational/54-context-summarization-openai.py +++ b/examples/foundational/54-context-summarization-openai.py @@ -20,14 +20,13 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.audio.vad.vad_analyzer import VADParams from pipecat.frames.frames import LLMRunFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_context_summarizer import SummaryAppliedEvent from pipecat.processors.aggregators.llm_response_universal import ( LLMAssistantAggregatorParams, LLMContextAggregatorPair, @@ -42,8 +41,6 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams -from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy -from pipecat.turns.user_turn_strategies import UserTurnStrategies from pipecat.utils.context.llm_context_summarization import LLMContextSummarizationConfig load_dotenv(override=True) @@ -120,10 +117,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): user_aggregator, assistant_aggregator = LLMContextAggregatorPair( context, user_params=LLMUserAggregatorParams( - user_turn_strategies=UserTurnStrategies( - stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] - ), - vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), + vad_analyzer=SileroVADAnalyzer(), ), assistant_params=LLMAssistantAggregatorParams( enable_context_summarization=True, @@ -138,6 +132,19 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): ), ) + # Listen for summarization events + summarizer = assistant_aggregator._summarizer + if summarizer: + + @summarizer.event_handler("on_summary_applied") + async def on_summary_applied(summarizer, event: SummaryAppliedEvent): + logger.info( + f"Context summarized: {event.original_message_count} messages -> " + f"{event.new_message_count} messages " + f"({event.summarized_message_count} summarized, " + f"{event.preserved_message_count} preserved)" + ) + pipeline = Pipeline( [ transport.input(), # Transport user input diff --git a/examples/foundational/54a-context-summarization-google.py b/examples/foundational/54a-context-summarization-google.py index a7fe4ba5e..2ce29e959 100644 --- a/examples/foundational/54a-context-summarization-google.py +++ b/examples/foundational/54a-context-summarization-google.py @@ -20,14 +20,13 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.audio.vad.vad_analyzer import VADParams from pipecat.frames.frames import LLMRunFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_context_summarizer import SummaryAppliedEvent from pipecat.processors.aggregators.llm_response_universal import ( LLMAssistantAggregatorParams, LLMContextAggregatorPair, @@ -42,8 +41,6 @@ from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams -from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy -from pipecat.turns.user_turn_strategies import UserTurnStrategies from pipecat.utils.context.llm_context_summarization import LLMContextSummarizationConfig load_dotenv(override=True) @@ -120,10 +117,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): user_aggregator, assistant_aggregator = LLMContextAggregatorPair( context, user_params=LLMUserAggregatorParams( - user_turn_strategies=UserTurnStrategies( - stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] - ), - vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), + vad_analyzer=SileroVADAnalyzer(), ), assistant_params=LLMAssistantAggregatorParams( enable_context_summarization=True, @@ -138,6 +132,19 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): ), ) + # Listen for summarization events + summarizer = assistant_aggregator._summarizer + if summarizer: + + @summarizer.event_handler("on_summary_applied") + async def on_summary_applied(summarizer, event: SummaryAppliedEvent): + logger.info( + f"Context summarized: {event.original_message_count} messages -> " + f"{event.new_message_count} messages " + f"({event.summarized_message_count} summarized, " + f"{event.preserved_message_count} preserved)" + ) + pipeline = Pipeline( [ transport.input(), # Transport user input diff --git a/examples/foundational/54c-context-summarization-dedicated-llm.py b/examples/foundational/54c-context-summarization-dedicated-llm.py new file mode 100644 index 000000000..3b2195e80 --- /dev/null +++ b/examples/foundational/54c-context-summarization-dedicated-llm.py @@ -0,0 +1,231 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Example demonstrating advanced context summarization configuration. + +This example shows how to customize context summarization with: +- A dedicated cheap/fast LLM for generating summaries (Gemini Flash) +- A custom summary message template (XML tags) +- A custom summarization prompt +- A summarization timeout +- The on_summary_applied event for observability +""" + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.adapters.schemas.function_schema import FunctionSchema +from pipecat.adapters.schemas.tools_schema import ToolsSchema +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_context_summarizer import SummaryAppliedEvent +from pipecat.processors.aggregators.llm_response_universal import ( + LLMAssistantAggregatorParams, + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.google import GoogleLLMService +from pipecat.services.llm_service import FunctionCallParams +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.utils.context.llm_context_summarization import LLMContextSummarizationConfig + +load_dotenv(override=True) + +# We use lambdas to defer transport parameter creation until the transport +# type is selected at runtime. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + +# Custom summarization prompt tailored to the application +CUSTOM_SUMMARIZATION_PROMPT = """Summarize this conversation, preserving: +- Key decisions and agreements +- Important facts and user preferences +- Any pending action items or unresolved questions + +Be concise. Use clear, factual statements grouped by topic. +Omit greetings, small talk, and resolved tangents.""" + + +# Tool functions for the LLM +async def get_current_weather(params: FunctionCallParams): + """Get the current weather.""" + logger.info("Tool called: get_current_weather") + await asyncio.sleep(1) # Simulate some processing + await params.result_callback({"conditions": "nice", "temperature": "75"}) + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info("Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + # Primary LLM for conversation (could be any provider) + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + # Dedicated cheap/fast LLM for summarization only + summarization_llm = GoogleLLMService( + api_key=os.getenv("GOOGLE_API_KEY"), + model="gemini-2.5-flash", + ) + + # Register tool functions + llm.register_function("get_current_weather", get_current_weather) + + weather_function = FunctionSchema( + name="get_current_weather", + description="Get the current weather", + properties={ + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "format": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + "description": "The temperature unit to use. Infer this from the user's location.", + }, + }, + required=["location", "format"], + ) + tools = ToolsSchema(standard_tools=[weather_function]) + + messages = [ + { + "role": "system", + "content": ( + "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate " + "your capabilities in a succinct way. Your output will be spoken aloud, " + "so avoid special characters that can't easily be spoken. Respond to what " + "the user said in a creative and helpful way. You have access to tools to " + "get the current weather - use them when relevant.\n\n" + "When you see a block, it contains a compressed summary " + "of earlier conversation. Use it as reference but don't mention it to the user." + ), + }, + ] + + context = LLMContext(messages, tools=tools) + + # Create aggregators with custom summarization + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams( + vad_analyzer=SileroVADAnalyzer(), + ), + assistant_params=LLMAssistantAggregatorParams( + enable_context_summarization=True, + context_summarization_config=LLMContextSummarizationConfig( + # Trigger thresholds (low values to demonstrate quickly) + max_context_tokens=1000, + max_unsummarized_messages=10, + # Summary generation + target_context_tokens=800, + min_messages_after_summary=2, + summarization_prompt=CUSTOM_SUMMARIZATION_PROMPT, + # Custom summary format - wrap in XML tags so the system + # prompt can identify summaries vs. live conversation + summary_message_template="\n{summary}\n", + # Use a dedicated cheap LLM for summarization instead of + # the primary conversation model + llm=summarization_llm, + # Cancel summarization if it takes longer than 60 seconds + summarization_timeout=60.0, + ), + ), + ) + + # Listen for summarization events + summarizer = assistant_aggregator._summarizer + if summarizer: + + @summarizer.event_handler("on_summary_applied") + async def on_summary_applied(summarizer, event: SummaryAppliedEvent): + logger.info( + f"Context summarized: {event.original_message_count} messages -> " + f"{event.new_message_count} messages " + f"({event.summarized_message_count} summarized, " + f"{event.preserved_message_count} preserved)" + ) + + pipeline = Pipeline( + [ + transport.input(), # Transport user input + stt, + user_aggregator, # User responses + llm, # LLM + tts, # TTS + transport.output(), # Transport bot output + assistant_aggregator, # Assistant spoken responses + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info("Client connected") + # Kick off the conversation. + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info("Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() From ec9ddb31993e05ef0dcc64d962487480a534b941 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Thu, 26 Feb 2026 22:09:26 -0500 Subject: [PATCH 141/189] Add changelog entries for context summarization improvements (#3855) --- changelog/3855.added.2.md | 1 + changelog/3855.added.3.md | 1 + changelog/3855.added.4.md | 1 + changelog/3855.added.md | 1 + changelog/3855.changed.md | 1 + 5 files changed, 5 insertions(+) create mode 100644 changelog/3855.added.2.md create mode 100644 changelog/3855.added.3.md create mode 100644 changelog/3855.added.4.md create mode 100644 changelog/3855.added.md create mode 100644 changelog/3855.changed.md diff --git a/changelog/3855.added.2.md b/changelog/3855.added.2.md new file mode 100644 index 000000000..01cd23efe --- /dev/null +++ b/changelog/3855.added.2.md @@ -0,0 +1 @@ +- Added optional `llm` field to `LLMContextSummarizationConfig` for routing summarization to a dedicated LLM service (e.g., a cheaper/faster model) instead of the pipeline's primary model. diff --git a/changelog/3855.added.3.md b/changelog/3855.added.3.md new file mode 100644 index 000000000..b93fdec60 --- /dev/null +++ b/changelog/3855.added.3.md @@ -0,0 +1 @@ +- Added `summarization_timeout` to `LLMContextSummarizationConfig` (default 120s) to prevent hung LLM calls from permanently blocking future summarizations. diff --git a/changelog/3855.added.4.md b/changelog/3855.added.4.md new file mode 100644 index 000000000..b712b4ac9 --- /dev/null +++ b/changelog/3855.added.4.md @@ -0,0 +1 @@ +- Added `on_summary_applied` event to `LLMContextSummarizer` for observability, providing message counts before and after context summarization. diff --git a/changelog/3855.added.md b/changelog/3855.added.md new file mode 100644 index 000000000..79d37eeba --- /dev/null +++ b/changelog/3855.added.md @@ -0,0 +1 @@ +- Added `summary_message_template` to `LLMContextSummarizationConfig` for customizing how summaries are formatted when injected into context (e.g., wrapping in XML tags). diff --git a/changelog/3855.changed.md b/changelog/3855.changed.md new file mode 100644 index 000000000..2eac6785a --- /dev/null +++ b/changelog/3855.changed.md @@ -0,0 +1 @@ +- Updated context summarization to use `user` role instead of `assistant` for summary messages. From 98e737b4e94b2148f85193a8ddd9bd51deadc9bb Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Thu, 26 Feb 2026 22:56:10 -0500 Subject: [PATCH 142/189] Add tests for context summarization improvements Cover summary message role, template, on_summary_applied event, summarization timeout, and dedicated LLM routing/error handling. --- tests/test_context_summarization.py | 300 ++++++++++++++++++++++++++- tests/test_llm_context_summarizer.py | 251 +++++++++++++++++++++- 2 files changed, 548 insertions(+), 3 deletions(-) diff --git a/tests/test_context_summarization.py b/tests/test_context_summarization.py index 3bb1246e9..446bfb8bd 100644 --- a/tests/test_context_summarization.py +++ b/tests/test_context_summarization.py @@ -6,10 +6,11 @@ """Tests for context summarization feature.""" +import asyncio import unittest -from unittest.mock import AsyncMock, MagicMock, patch +from unittest.mock import AsyncMock -from pipecat.frames.frames import LLMContextSummaryRequestFrame +from pipecat.frames.frames import LLMContextSummaryRequestFrame, LLMContextSummaryResultFrame from pipecat.processors.aggregators.llm_context import LLMContext, LLMSpecificMessage from pipecat.services.llm_service import LLMService from pipecat.utils.context.llm_context_summarization import ( @@ -601,6 +602,301 @@ class TestSummaryGenerationExceptions(unittest.IsolatedAsyncioTestCase): self.assertGreater(last_index, -1) self.assertEqual(last_index, 1) # Should be the index of the last summarized message + async def test_generate_summary_task_timeout(self): + """Test that _generate_summary_task handles timeout correctly.""" + llm_service = LLMService() + + # Mock _generate_summary to hang + async def slow_summary(frame): + await asyncio.sleep(10) + return ("summary", 1) + + llm_service._generate_summary = slow_summary + + broadcast_calls = [] + + async def mock_broadcast(frame_class, **kwargs): + broadcast_calls.append((frame_class, kwargs)) + + llm_service.broadcast_frame = mock_broadcast + llm_service.push_error = AsyncMock() + + context = LLMContext() + context.add_message({"role": "user", "content": "Message 1"}) + context.add_message({"role": "assistant", "content": "Response 1"}) + context.add_message({"role": "user", "content": "Message 2"}) + + frame = LLMContextSummaryRequestFrame( + request_id="timeout_test", + context=context, + min_messages_to_keep=1, + target_context_tokens=1000, + summarization_prompt="Summarize this", + summarization_timeout=0.1, # Very short timeout + ) + + await llm_service._generate_summary_task(frame) + + # Should have broadcast an error result + self.assertEqual(len(broadcast_calls), 1) + _, kwargs = broadcast_calls[0] + self.assertEqual(kwargs["request_id"], "timeout_test") + self.assertEqual(kwargs["summary"], "") + self.assertEqual(kwargs["last_summarized_index"], -1) + # error is None for timeout path (push_error is called instead) + self.assertIsNone(kwargs["error"]) + + # push_error should have been called with timeout message + llm_service.push_error.assert_called_once() + call_args = llm_service.push_error.call_args + error_msg = call_args.kwargs.get("error_msg") or call_args.args[0] + self.assertIn("timed out", error_msg) + + +class TestDedicatedLLMSummarization(unittest.IsolatedAsyncioTestCase): + """Tests for dedicated LLM summarization in LLMAssistantAggregator.""" + + def _create_context_and_frame(self): + """Create a context with enough messages and a matching request frame.""" + context = LLMContext() + context.add_message({"role": "user", "content": "Message 1"}) + context.add_message({"role": "assistant", "content": "Response 1"}) + context.add_message({"role": "user", "content": "Message 2"}) + + frame = LLMContextSummaryRequestFrame( + request_id="dedicated_test", + context=context, + min_messages_to_keep=1, + target_context_tokens=1000, + summarization_prompt="Summarize this", + summarization_timeout=5.0, + ) + return context, frame + + async def test_dedicated_llm_success(self): + """Test that dedicated LLM generates summary and feeds result to summarizer.""" + from pipecat.processors.aggregators.llm_context_summarizer import LLMContextSummarizer + from pipecat.processors.aggregators.llm_response_universal import ( + LLMAssistantAggregator, + LLMAssistantAggregatorParams, + ) + from pipecat.utils.asyncio.task_manager import TaskManager, TaskManagerParams + + context, frame = self._create_context_and_frame() + + # Create a mock dedicated LLM + dedicated_llm = LLMService() + dedicated_llm._generate_summary = AsyncMock(return_value=("Dedicated summary", 1)) + + config = LLMContextSummarizationConfig( + max_context_tokens=50, + llm=dedicated_llm, + ) + params = LLMAssistantAggregatorParams( + enable_context_summarization=True, + context_summarization_config=config, + ) + aggregator = LLMAssistantAggregator(context, params=params) + + # Mock summarizer.process_frame to capture the result + result_frames = [] + original_process = aggregator._summarizer.process_frame + + async def capture_process(frame): + result_frames.append(frame) + await original_process(frame) + + aggregator._summarizer.process_frame = capture_process + + # Call the method directly + await aggregator._generate_summary_with_dedicated_llm(dedicated_llm, frame) + + # Verify the dedicated LLM was called + dedicated_llm._generate_summary.assert_called_once_with(frame) + + # Verify result was fed to the summarizer + self.assertEqual(len(result_frames), 1) + result = result_frames[0] + self.assertIsInstance(result, LLMContextSummaryResultFrame) + self.assertEqual(result.request_id, "dedicated_test") + self.assertEqual(result.summary, "Dedicated summary") + self.assertEqual(result.last_summarized_index, 1) + self.assertIsNone(result.error) + + async def test_dedicated_llm_timeout(self): + """Test that dedicated LLM timeout produces error result.""" + from pipecat.processors.aggregators.llm_response_universal import ( + LLMAssistantAggregator, + LLMAssistantAggregatorParams, + ) + + context, _ = self._create_context_and_frame() + + # Create a mock dedicated LLM that hangs + dedicated_llm = LLMService() + + async def slow_summary(frame): + await asyncio.sleep(10) + return ("summary", 1) + + dedicated_llm._generate_summary = slow_summary + + config = LLMContextSummarizationConfig( + max_context_tokens=50, + llm=dedicated_llm, + ) + params = LLMAssistantAggregatorParams( + enable_context_summarization=True, + context_summarization_config=config, + ) + aggregator = LLMAssistantAggregator(context, params=params) + + # Mock summarizer.process_frame to capture the result + result_frames = [] + + async def capture_process(frame): + result_frames.append(frame) + + aggregator._summarizer.process_frame = capture_process + + # Create frame with very short timeout + frame = LLMContextSummaryRequestFrame( + request_id="timeout_test", + context=context, + min_messages_to_keep=1, + target_context_tokens=1000, + summarization_prompt="Summarize this", + summarization_timeout=0.1, + ) + + await aggregator._generate_summary_with_dedicated_llm(dedicated_llm, frame) + + # Verify error result was fed to summarizer + self.assertEqual(len(result_frames), 1) + result = result_frames[0] + self.assertIsInstance(result, LLMContextSummaryResultFrame) + self.assertEqual(result.request_id, "timeout_test") + self.assertEqual(result.summary, "") + self.assertEqual(result.last_summarized_index, -1) + self.assertIn("timed out", result.error) + + async def test_dedicated_llm_exception(self): + """Test that dedicated LLM exceptions produce error result.""" + from pipecat.processors.aggregators.llm_response_universal import ( + LLMAssistantAggregator, + LLMAssistantAggregatorParams, + ) + + context, frame = self._create_context_and_frame() + + # Create a mock dedicated LLM that raises + dedicated_llm = LLMService() + dedicated_llm._generate_summary = AsyncMock( + side_effect=RuntimeError("LLM connection failed") + ) + + config = LLMContextSummarizationConfig( + max_context_tokens=50, + llm=dedicated_llm, + ) + params = LLMAssistantAggregatorParams( + enable_context_summarization=True, + context_summarization_config=config, + ) + aggregator = LLMAssistantAggregator(context, params=params) + aggregator.push_error = AsyncMock() + + # Mock summarizer.process_frame to capture the result + result_frames = [] + + async def capture_process(frame): + result_frames.append(frame) + + aggregator._summarizer.process_frame = capture_process + + await aggregator._generate_summary_with_dedicated_llm(dedicated_llm, frame) + + # Verify error result was fed to summarizer + self.assertEqual(len(result_frames), 1) + result = result_frames[0] + self.assertIsInstance(result, LLMContextSummaryResultFrame) + self.assertEqual(result.request_id, "dedicated_test") + self.assertEqual(result.summary, "") + self.assertEqual(result.last_summarized_index, -1) + self.assertIn("LLM connection failed", result.error) + + # push_error should have been called + aggregator.push_error.assert_called_once() + + async def test_on_request_summarization_routes_to_dedicated_llm(self): + """Test that _on_request_summarization routes to dedicated LLM when configured.""" + from pipecat.processors.aggregators.llm_response_universal import ( + LLMAssistantAggregator, + LLMAssistantAggregatorParams, + ) + + context, frame = self._create_context_and_frame() + + dedicated_llm = LLMService() + dedicated_llm._generate_summary = AsyncMock(return_value=("Summary", 1)) + + config = LLMContextSummarizationConfig( + max_context_tokens=50, + llm=dedicated_llm, + ) + params = LLMAssistantAggregatorParams( + enable_context_summarization=True, + context_summarization_config=config, + ) + aggregator = LLMAssistantAggregator(context, params=params) + aggregator.push_frame = AsyncMock() + + # Track what coroutine is passed to create_task + created_coros = [] + original_create_task = aggregator.create_task + + def mock_create_task(coro, *args, **kwargs): + created_coros.append(coro) + # Actually run the coroutine to avoid "never awaited" warning + task = asyncio.ensure_future(coro) + return task + + aggregator.create_task = mock_create_task + + await aggregator._on_request_summarization(aggregator._summarizer, frame) + + # Should NOT push frame upstream + aggregator.push_frame.assert_not_called() + + # Should have created a task for the dedicated LLM + self.assertEqual(len(created_coros), 1) + + # Wait for the task to complete + await asyncio.sleep(0.05) + + async def test_on_request_summarization_pushes_upstream_without_dedicated_llm(self): + """Test that _on_request_summarization pushes upstream when no dedicated LLM.""" + from pipecat.processors.aggregators.llm_response_universal import ( + LLMAssistantAggregator, + LLMAssistantAggregatorParams, + ) + from pipecat.processors.frame_processor import FrameDirection + + context, frame = self._create_context_and_frame() + + config = LLMContextSummarizationConfig(max_context_tokens=50) + params = LLMAssistantAggregatorParams( + enable_context_summarization=True, + context_summarization_config=config, + ) + aggregator = LLMAssistantAggregator(context, params=params) + aggregator.push_frame = AsyncMock() + + await aggregator._on_request_summarization(aggregator._summarizer, frame) + + # Should push frame upstream + aggregator.push_frame.assert_called_once_with(frame, FrameDirection.UPSTREAM) + class TestLLMSpecificMessageHandling(unittest.TestCase): """Tests that LLMSpecificMessage objects are correctly skipped in summarization.""" diff --git a/tests/test_llm_context_summarizer.py b/tests/test_llm_context_summarizer.py index 7555a8762..0439d403d 100644 --- a/tests/test_llm_context_summarizer.py +++ b/tests/test_llm_context_summarizer.py @@ -14,7 +14,10 @@ from pipecat.frames.frames import ( LLMFullResponseStartFrame, ) from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_context_summarizer import LLMContextSummarizer +from pipecat.processors.aggregators.llm_context_summarizer import ( + LLMContextSummarizer, + SummaryAppliedEvent, +) from pipecat.utils.asyncio.task_manager import TaskManager, TaskManagerParams from pipecat.utils.context.llm_context_summarization import LLMContextSummarizationConfig @@ -291,6 +294,252 @@ class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase): await summarizer.cleanup() + async def test_summary_message_role_is_user(self): + """Test that the summary message uses the user role.""" + config = LLMContextSummarizationConfig(max_context_tokens=50, min_messages_after_summary=2) + + summarizer = LLMContextSummarizer(context=self.context, config=config) + await summarizer.setup(self.task_manager) + + # Add messages and trigger summarization + for i in range(10): + self.context.add_message({"role": "user", "content": "Test message."}) + + request_frame = None + + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal request_frame + request_frame = frame + + await summarizer.process_frame(LLMFullResponseStartFrame()) + self.assertIsNotNone(request_frame) + + # Simulate receiving a summary result + summary_result = LLMContextSummaryResultFrame( + request_id=request_frame.request_id, + summary="This is a test summary.", + last_summarized_index=5, + ) + await summarizer.process_frame(summary_result) + + # Find the summary message and verify its role is "user" + summary_msg = next( + (msg for msg in self.context.messages if "summary" in msg.get("content", "").lower()), + None, + ) + self.assertIsNotNone(summary_msg) + self.assertEqual(summary_msg["role"], "user") + + await summarizer.cleanup() + + async def test_summary_message_default_template(self): + """Test that the default summary_message_template is used.""" + config = LLMContextSummarizationConfig(max_context_tokens=50, min_messages_after_summary=2) + + summarizer = LLMContextSummarizer(context=self.context, config=config) + await summarizer.setup(self.task_manager) + + for i in range(10): + self.context.add_message({"role": "user", "content": "Test message."}) + + request_frame = None + + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal request_frame + request_frame = frame + + await summarizer.process_frame(LLMFullResponseStartFrame()) + + summary_result = LLMContextSummaryResultFrame( + request_id=request_frame.request_id, + summary="Key facts from conversation.", + last_summarized_index=5, + ) + await summarizer.process_frame(summary_result) + + # Default template wraps with "Conversation summary: {summary}" + summary_msg = next( + ( + msg + for msg in self.context.messages + if "Conversation summary:" in msg.get("content", "") + ), + None, + ) + self.assertIsNotNone(summary_msg) + self.assertEqual( + summary_msg["content"], "Conversation summary: Key facts from conversation." + ) + + await summarizer.cleanup() + + async def test_summary_message_custom_template(self): + """Test that a custom summary_message_template is applied.""" + config = LLMContextSummarizationConfig( + max_context_tokens=50, + min_messages_after_summary=2, + summary_message_template="\n{summary}\n", + ) + + summarizer = LLMContextSummarizer(context=self.context, config=config) + await summarizer.setup(self.task_manager) + + for i in range(10): + self.context.add_message({"role": "user", "content": "Test message."}) + + request_frame = None + + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal request_frame + request_frame = frame + + await summarizer.process_frame(LLMFullResponseStartFrame()) + + summary_result = LLMContextSummaryResultFrame( + request_id=request_frame.request_id, + summary="Key facts from conversation.", + last_summarized_index=5, + ) + await summarizer.process_frame(summary_result) + + # Custom template wraps with XML tags + summary_msg = next( + (msg for msg in self.context.messages if "" in msg.get("content", "")), + None, + ) + self.assertIsNotNone(summary_msg) + self.assertEqual( + summary_msg["content"], + "\nKey facts from conversation.\n", + ) + + await summarizer.cleanup() + + async def test_on_summary_applied_event(self): + """Test that on_summary_applied event fires with correct data.""" + config = LLMContextSummarizationConfig(max_context_tokens=50, min_messages_after_summary=2) + + summarizer = LLMContextSummarizer(context=self.context, config=config) + await summarizer.setup(self.task_manager) + + # Add messages (1 system + 10 user = 11 total) + for i in range(10): + self.context.add_message({"role": "user", "content": "Test message."}) + + request_frame = None + applied_event = None + + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal request_frame + request_frame = frame + + @summarizer.event_handler("on_summary_applied") + async def on_summary_applied(summarizer, event): + nonlocal applied_event + applied_event = event + + original_count = len(self.context.messages) # 11 + await summarizer.process_frame(LLMFullResponseStartFrame()) + + # Summarize up to index 7 (system=0, user1..user7), keep last 3 (user8, user9, user10) + summary_result = LLMContextSummaryResultFrame( + request_id=request_frame.request_id, + summary="Test summary.", + last_summarized_index=7, + ) + await summarizer.process_frame(summary_result) + + # Allow async event handler to complete + await asyncio.sleep(0.05) + + # Verify event was fired + self.assertIsNotNone(applied_event) + self.assertIsInstance(applied_event, SummaryAppliedEvent) + self.assertEqual(applied_event.original_message_count, original_count) + + # After summarization: system + summary + 3 recent = 5 + self.assertEqual(applied_event.new_message_count, 5) + + # Summarized messages: indices 1-7 = 7 messages (excluding system at index 0) + self.assertEqual(applied_event.summarized_message_count, 7) + + # Preserved: system (1) + recent messages after index 7 (3) = 4 + self.assertEqual(applied_event.preserved_message_count, 4) + + await summarizer.cleanup() + + async def test_on_summary_applied_not_fired_on_error(self): + """Test that on_summary_applied event is NOT fired when summarization fails.""" + config = LLMContextSummarizationConfig(max_context_tokens=50, min_messages_after_summary=2) + + summarizer = LLMContextSummarizer(context=self.context, config=config) + await summarizer.setup(self.task_manager) + + for i in range(10): + self.context.add_message({"role": "user", "content": "Test message."}) + + request_frame = None + applied_event = None + + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal request_frame + request_frame = frame + + @summarizer.event_handler("on_summary_applied") + async def on_summary_applied(summarizer, event): + nonlocal applied_event + applied_event = event + + await summarizer.process_frame(LLMFullResponseStartFrame()) + + # Send a result with an error + error_result = LLMContextSummaryResultFrame( + request_id=request_frame.request_id, + summary="", + last_summarized_index=-1, + error="Summarization timed out", + ) + await summarizer.process_frame(error_result) + + await asyncio.sleep(0.05) + + # Event should NOT have fired + self.assertIsNone(applied_event) + + await summarizer.cleanup() + + async def test_request_frame_includes_timeout(self): + """Test that the request frame includes the configured summarization_timeout.""" + config = LLMContextSummarizationConfig( + max_context_tokens=50, + summarization_timeout=60.0, + ) + + summarizer = LLMContextSummarizer(context=self.context, config=config) + await summarizer.setup(self.task_manager) + + request_frame = None + + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal request_frame + request_frame = frame + + for i in range(10): + self.context.add_message({"role": "user", "content": "Test message to add tokens."}) + + await summarizer.process_frame(LLMFullResponseStartFrame()) + + self.assertIsNotNone(request_frame) + self.assertEqual(request_frame.summarization_timeout, 60.0) + + await summarizer.cleanup() + if __name__ == "__main__": unittest.main() From 82c249608ff9428e1b230828b1f61b2a66f90717 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Fri, 27 Feb 2026 11:47:55 -0500 Subject: [PATCH 143/189] Move dedicated LLM summarization into LLMContextSummarizer The dedicated LLM logic lived in LLMAssistantAggregator, creating two code paths and requiring the aggregator to call a private LLMService method. Move it into the summarizer which already owns the config and summarization lifecycle, keeping the aggregator handler as a single-line upstream push. --- .../aggregators/llm_context_summarizer.py | 69 +++- .../aggregators/llm_response_universal.py | 68 +--- tests/test_context_summarization.py | 305 ++++++++---------- 3 files changed, 194 insertions(+), 248 deletions(-) diff --git a/src/pipecat/processors/aggregators/llm_context_summarizer.py b/src/pipecat/processors/aggregators/llm_context_summarizer.py index 85da9bfa0..44ec985bd 100644 --- a/src/pipecat/processors/aggregators/llm_context_summarizer.py +++ b/src/pipecat/processors/aggregators/llm_context_summarizer.py @@ -6,9 +6,10 @@ """This module defines a summarizer for managing LLM context summarization.""" +import asyncio import uuid from dataclasses import dataclass -from typing import Optional +from typing import TYPE_CHECKING, Optional from loguru import logger @@ -27,6 +28,9 @@ from pipecat.utils.context.llm_context_summarization import ( LLMContextSummarizationUtil, ) +if TYPE_CHECKING: + from pipecat.services.llm_service import LLMService + @dataclass class SummaryAppliedEvent: @@ -227,8 +231,10 @@ class LLMContextSummarizer(BaseObject): async def _request_summarization(self): """Request context summarization from LLM service. - Creates a summarization request frame and emits it via event handler. - Tracks the request ID to match async responses and prevent race conditions. + Creates a summarization request frame and either handles it directly + using a dedicated LLM (if configured) or emits it via event handler + for the pipeline's primary LLM. Tracks the request ID to match async + responses and prevent race conditions. """ # Generate unique request ID request_id = str(uuid.uuid4()) @@ -250,8 +256,61 @@ class LLMContextSummarizer(BaseObject): summarization_timeout=self._config.summarization_timeout, ) - # Emit event for aggregator to broadcast - await self._call_event_handler("on_request_summarization", request_frame) + if self._config.llm: + # Use dedicated LLM directly — no need to involve the pipeline + self.task_manager.create_task( + self._generate_summary_with_dedicated_llm(self._config.llm, request_frame), + f"{self}-dedicated-llm-summary", + ) + else: + # Emit event for aggregator to broadcast to the pipeline LLM + await self._call_event_handler("on_request_summarization", request_frame) + + async def _generate_summary_with_dedicated_llm( + self, llm: "LLMService", frame: LLMContextSummaryRequestFrame + ): + """Generate summary using a dedicated LLM service. + + Calls the dedicated LLM's _generate_summary directly and feeds the + result back through _handle_summary_result, bypassing the pipeline. + + Args: + llm: The dedicated LLM service to use for summarization. + frame: The summarization request frame. + """ + try: + if frame.summarization_timeout: + summary, last_index = await asyncio.wait_for( + llm._generate_summary(frame), + timeout=frame.summarization_timeout, + ) + else: + summary, last_index = await llm._generate_summary(frame) + result_frame = LLMContextSummaryResultFrame( + request_id=frame.request_id, + summary=summary, + last_summarized_index=last_index, + ) + except asyncio.TimeoutError: + error = f"Context summarization timed out after {frame.summarization_timeout}s" + logger.error(f"{self}: {error}") + result_frame = LLMContextSummaryResultFrame( + request_id=frame.request_id, + summary="", + last_summarized_index=-1, + error=error, + ) + except Exception as e: + error = f"Error generating context summary: {e}" + logger.error(f"{self}: {error}") + result_frame = LLMContextSummaryResultFrame( + request_id=frame.request_id, + summary="", + last_summarized_index=-1, + error=error, + ) + + await self._handle_summary_result(result_frame) async def _handle_summary_result(self, frame: LLMContextSummaryResultFrame): """Handle context summarization result from LLM service. diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py index 361b2c8a6..b255748e0 100644 --- a/src/pipecat/processors/aggregators/llm_response_universal.py +++ b/src/pipecat/processors/aggregators/llm_response_universal.py @@ -16,7 +16,7 @@ import json import warnings from abc import abstractmethod from dataclasses import dataclass, field -from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Set, Type +from typing import Any, Dict, List, Literal, Optional, Set, Type from loguru import logger @@ -39,7 +39,6 @@ from pipecat.frames.frames import ( LLMContextAssistantTimestampFrame, LLMContextFrame, LLMContextSummaryRequestFrame, - LLMContextSummaryResultFrame, LLMFullResponseEndFrame, LLMFullResponseStartFrame, LLMMessagesAppendFrame, @@ -84,9 +83,6 @@ from pipecat.utils.context.llm_context_summarization import LLMContextSummarizat from pipecat.utils.string import TextPartForConcatenation, concatenate_aggregated_text from pipecat.utils.time import time_now_iso8601 -if TYPE_CHECKING: - from pipecat.services.llm_service import LLMService - @dataclass class LLMUserAggregatorParams: @@ -1252,71 +1248,13 @@ class LLMAssistantAggregator(LLMContextAggregator): ): """Handle summarization request from the summarizer. - If a dedicated summarization LLM is configured, generates the summary - directly and feeds the result to the summarizer. Otherwise, pushes the - request frame upstream to the pipeline's primary LLM service. + Push the request frame UPSTREAM to the LLM service for processing. Args: summarizer: The summarizer that generated the request. frame: The summarization request frame to broadcast. """ - summarization_llm = ( - self._params.context_summarization_config.llm - if self._params.context_summarization_config - else None - ) - - if summarization_llm: - self.create_task(self._generate_summary_with_dedicated_llm(summarization_llm, frame)) - else: - await self.push_frame(frame, FrameDirection.UPSTREAM) - - async def _generate_summary_with_dedicated_llm( - self, llm: "LLMService", frame: LLMContextSummaryRequestFrame - ): - """Generate summary using a dedicated LLM service. - - Calls the dedicated LLM's _generate_summary directly and feeds the - result back to the summarizer, bypassing the pipeline. - - Args: - llm: The dedicated LLM service to use for summarization. - frame: The summarization request frame. - """ - try: - if frame.summarization_timeout: - summary, last_index = await asyncio.wait_for( - llm._generate_summary(frame), - timeout=frame.summarization_timeout, - ) - else: - summary, last_index = await llm._generate_summary(frame) - result_frame = LLMContextSummaryResultFrame( - request_id=frame.request_id, - summary=summary, - last_summarized_index=last_index, - ) - except asyncio.TimeoutError: - error = f"Context summarization timed out after {frame.summarization_timeout}s" - logger.error(f"{self}: {error}") - result_frame = LLMContextSummaryResultFrame( - request_id=frame.request_id, - summary="", - last_summarized_index=-1, - error=error, - ) - except Exception as e: - error = f"Error generating context summary: {e}" - await self.push_error(error_msg=error, exception=e) - result_frame = LLMContextSummaryResultFrame( - request_id=frame.request_id, - summary="", - last_summarized_index=-1, - error=error, - ) - - if self._summarizer: - await self._summarizer.process_frame(result_frame) + await self.push_frame(frame, FrameDirection.UPSTREAM) class LLMContextAggregatorPair: diff --git a/tests/test_context_summarization.py b/tests/test_context_summarization.py index 446bfb8bd..ca56e7a32 100644 --- a/tests/test_context_summarization.py +++ b/tests/test_context_summarization.py @@ -654,85 +654,79 @@ class TestSummaryGenerationExceptions(unittest.IsolatedAsyncioTestCase): class TestDedicatedLLMSummarization(unittest.IsolatedAsyncioTestCase): - """Tests for dedicated LLM summarization in LLMAssistantAggregator.""" + """Tests for dedicated LLM summarization in LLMContextSummarizer.""" - def _create_context_and_frame(self): - """Create a context with enough messages and a matching request frame.""" - context = LLMContext() - context.add_message({"role": "user", "content": "Message 1"}) - context.add_message({"role": "assistant", "content": "Response 1"}) - context.add_message({"role": "user", "content": "Message 2"}) - - frame = LLMContextSummaryRequestFrame( - request_id="dedicated_test", - context=context, - min_messages_to_keep=1, - target_context_tokens=1000, - summarization_prompt="Summarize this", - summarization_timeout=5.0, - ) - return context, frame - - async def test_dedicated_llm_success(self): - """Test that dedicated LLM generates summary and feeds result to summarizer.""" - from pipecat.processors.aggregators.llm_context_summarizer import LLMContextSummarizer - from pipecat.processors.aggregators.llm_response_universal import ( - LLMAssistantAggregator, - LLMAssistantAggregatorParams, - ) + async def asyncSetUp(self): from pipecat.utils.asyncio.task_manager import TaskManager, TaskManagerParams - context, frame = self._create_context_and_frame() + self.task_manager = TaskManager() + self.task_manager.setup(TaskManagerParams(loop=asyncio.get_running_loop())) - # Create a mock dedicated LLM - dedicated_llm = LLMService() - dedicated_llm._generate_summary = AsyncMock(return_value=("Dedicated summary", 1)) + def _create_context_and_config(self, dedicated_llm): + """Create a context with enough messages and a config with a dedicated LLM.""" + context = LLMContext() + for i in range(10): + context.add_message( + {"role": "user", "content": f"Test message {i} that adds tokens to context."} + ) config = LLMContextSummarizationConfig( - max_context_tokens=50, + max_context_tokens=50, # Very low to trigger easily llm=dedicated_llm, + summarization_timeout=5.0, ) - params = LLMAssistantAggregatorParams( - enable_context_summarization=True, - context_summarization_config=config, - ) - aggregator = LLMAssistantAggregator(context, params=params) + return context, config - # Mock summarizer.process_frame to capture the result - result_frames = [] - original_process = aggregator._summarizer.process_frame + async def test_dedicated_llm_success(self): + """Test that dedicated LLM generates summary and applies result.""" + from pipecat.processors.aggregators.llm_context_summarizer import LLMContextSummarizer - async def capture_process(frame): - result_frames.append(frame) - await original_process(frame) + dedicated_llm = LLMService() + dedicated_llm._generate_summary = AsyncMock(return_value=("Dedicated summary", 5)) - aggregator._summarizer.process_frame = capture_process + context, config = self._create_context_and_config(dedicated_llm) + original_message_count = len(context.messages) + summarizer = LLMContextSummarizer(context=context, config=config) + await summarizer.setup(self.task_manager) - # Call the method directly - await aggregator._generate_summary_with_dedicated_llm(dedicated_llm, frame) + # Track whether on_request_summarization event fires (it should NOT) + event_fired = False + + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal event_fired + event_fired = True + + # Trigger summarization via LLM response start + from pipecat.frames.frames import LLMFullResponseStartFrame + + await summarizer.process_frame(LLMFullResponseStartFrame()) + + # Wait for the background task to complete + await asyncio.sleep(0.1) + + # The event should NOT have fired (dedicated LLM handles it internally) + self.assertFalse(event_fired) # Verify the dedicated LLM was called - dedicated_llm._generate_summary.assert_called_once_with(frame) + dedicated_llm._generate_summary.assert_called_once() - # Verify result was fed to the summarizer - self.assertEqual(len(result_frames), 1) - result = result_frames[0] - self.assertIsInstance(result, LLMContextSummaryResultFrame) - self.assertEqual(result.request_id, "dedicated_test") - self.assertEqual(result.summary, "Dedicated summary") - self.assertEqual(result.last_summarized_index, 1) - self.assertIsNone(result.error) + # Verify summary was applied to context (message count should decrease) + self.assertLess(len(context.messages), original_message_count) + + # Verify summary message is present + summary_messages = [ + msg for msg in context.messages if "Conversation summary:" in msg.get("content", "") + ] + self.assertEqual(len(summary_messages), 1) + self.assertIn("Dedicated summary", summary_messages[0]["content"]) + + await summarizer.cleanup() async def test_dedicated_llm_timeout(self): - """Test that dedicated LLM timeout produces error result.""" - from pipecat.processors.aggregators.llm_response_universal import ( - LLMAssistantAggregator, - LLMAssistantAggregatorParams, - ) + """Test that dedicated LLM timeout produces error and clears state.""" + from pipecat.processors.aggregators.llm_context_summarizer import LLMContextSummarizer - context, _ = self._create_context_and_frame() - - # Create a mock dedicated LLM that hangs dedicated_llm = LLMService() async def slow_summary(frame): @@ -741,161 +735,116 @@ class TestDedicatedLLMSummarization(unittest.IsolatedAsyncioTestCase): dedicated_llm._generate_summary = slow_summary - config = LLMContextSummarizationConfig( - max_context_tokens=50, - llm=dedicated_llm, - ) - params = LLMAssistantAggregatorParams( - enable_context_summarization=True, - context_summarization_config=config, - ) - aggregator = LLMAssistantAggregator(context, params=params) + context, config = self._create_context_and_config(dedicated_llm) + config.summarization_timeout = 0.1 # Very short timeout + summarizer = LLMContextSummarizer(context=context, config=config) + await summarizer.setup(self.task_manager) - # Mock summarizer.process_frame to capture the result - result_frames = [] + original_message_count = len(context.messages) - async def capture_process(frame): - result_frames.append(frame) + # Trigger summarization + from pipecat.frames.frames import LLMFullResponseStartFrame - aggregator._summarizer.process_frame = capture_process + await summarizer.process_frame(LLMFullResponseStartFrame()) - # Create frame with very short timeout - frame = LLMContextSummaryRequestFrame( - request_id="timeout_test", - context=context, - min_messages_to_keep=1, - target_context_tokens=1000, - summarization_prompt="Summarize this", - summarization_timeout=0.1, - ) + # Wait for the background task to complete (timeout + some buffer) + await asyncio.sleep(0.3) - await aggregator._generate_summary_with_dedicated_llm(dedicated_llm, frame) + # Context should be unchanged (timeout = error = no summary applied) + self.assertEqual(len(context.messages), original_message_count) - # Verify error result was fed to summarizer - self.assertEqual(len(result_frames), 1) - result = result_frames[0] - self.assertIsInstance(result, LLMContextSummaryResultFrame) - self.assertEqual(result.request_id, "timeout_test") - self.assertEqual(result.summary, "") - self.assertEqual(result.last_summarized_index, -1) - self.assertIn("timed out", result.error) + # Summarization state should be cleared so new requests can be made + self.assertFalse(summarizer._summarization_in_progress) + + await summarizer.cleanup() async def test_dedicated_llm_exception(self): - """Test that dedicated LLM exceptions produce error result.""" - from pipecat.processors.aggregators.llm_response_universal import ( - LLMAssistantAggregator, - LLMAssistantAggregatorParams, - ) + """Test that dedicated LLM exceptions produce error and clear state.""" + from pipecat.processors.aggregators.llm_context_summarizer import LLMContextSummarizer - context, frame = self._create_context_and_frame() - - # Create a mock dedicated LLM that raises dedicated_llm = LLMService() dedicated_llm._generate_summary = AsyncMock( side_effect=RuntimeError("LLM connection failed") ) - config = LLMContextSummarizationConfig( - max_context_tokens=50, - llm=dedicated_llm, - ) - params = LLMAssistantAggregatorParams( - enable_context_summarization=True, - context_summarization_config=config, - ) - aggregator = LLMAssistantAggregator(context, params=params) - aggregator.push_error = AsyncMock() + context, config = self._create_context_and_config(dedicated_llm) + summarizer = LLMContextSummarizer(context=context, config=config) + await summarizer.setup(self.task_manager) - # Mock summarizer.process_frame to capture the result - result_frames = [] + original_message_count = len(context.messages) - async def capture_process(frame): - result_frames.append(frame) + # Trigger summarization + from pipecat.frames.frames import LLMFullResponseStartFrame - aggregator._summarizer.process_frame = capture_process + await summarizer.process_frame(LLMFullResponseStartFrame()) - await aggregator._generate_summary_with_dedicated_llm(dedicated_llm, frame) + # Wait for the background task to complete + await asyncio.sleep(0.1) - # Verify error result was fed to summarizer - self.assertEqual(len(result_frames), 1) - result = result_frames[0] - self.assertIsInstance(result, LLMContextSummaryResultFrame) - self.assertEqual(result.request_id, "dedicated_test") - self.assertEqual(result.summary, "") - self.assertEqual(result.last_summarized_index, -1) - self.assertIn("LLM connection failed", result.error) + # Context should be unchanged (exception = error = no summary applied) + self.assertEqual(len(context.messages), original_message_count) - # push_error should have been called - aggregator.push_error.assert_called_once() + # Summarization state should be cleared + self.assertFalse(summarizer._summarization_in_progress) - async def test_on_request_summarization_routes_to_dedicated_llm(self): - """Test that _on_request_summarization routes to dedicated LLM when configured.""" - from pipecat.processors.aggregators.llm_response_universal import ( - LLMAssistantAggregator, - LLMAssistantAggregatorParams, - ) + await summarizer.cleanup() - context, frame = self._create_context_and_frame() + async def test_dedicated_llm_does_not_emit_event(self): + """Test that summarizer does NOT emit on_request_summarization when dedicated LLM is set.""" + from pipecat.processors.aggregators.llm_context_summarizer import LLMContextSummarizer dedicated_llm = LLMService() dedicated_llm._generate_summary = AsyncMock(return_value=("Summary", 1)) - config = LLMContextSummarizationConfig( - max_context_tokens=50, - llm=dedicated_llm, - ) - params = LLMAssistantAggregatorParams( - enable_context_summarization=True, - context_summarization_config=config, - ) - aggregator = LLMAssistantAggregator(context, params=params) - aggregator.push_frame = AsyncMock() + context, config = self._create_context_and_config(dedicated_llm) + summarizer = LLMContextSummarizer(context=context, config=config) + await summarizer.setup(self.task_manager) - # Track what coroutine is passed to create_task - created_coros = [] - original_create_task = aggregator.create_task + event_fired = False - def mock_create_task(coro, *args, **kwargs): - created_coros.append(coro) - # Actually run the coroutine to avoid "never awaited" warning - task = asyncio.ensure_future(coro) - return task + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal event_fired + event_fired = True - aggregator.create_task = mock_create_task + from pipecat.frames.frames import LLMFullResponseStartFrame - await aggregator._on_request_summarization(aggregator._summarizer, frame) + await summarizer.process_frame(LLMFullResponseStartFrame()) + await asyncio.sleep(0.1) - # Should NOT push frame upstream - aggregator.push_frame.assert_not_called() + self.assertFalse(event_fired) - # Should have created a task for the dedicated LLM - self.assertEqual(len(created_coros), 1) + await summarizer.cleanup() - # Wait for the task to complete - await asyncio.sleep(0.05) + async def test_no_dedicated_llm_emits_event(self): + """Test that summarizer emits on_request_summarization when no dedicated LLM.""" + from pipecat.processors.aggregators.llm_context_summarizer import LLMContextSummarizer - async def test_on_request_summarization_pushes_upstream_without_dedicated_llm(self): - """Test that _on_request_summarization pushes upstream when no dedicated LLM.""" - from pipecat.processors.aggregators.llm_response_universal import ( - LLMAssistantAggregator, - LLMAssistantAggregatorParams, - ) - from pipecat.processors.frame_processor import FrameDirection - - context, frame = self._create_context_and_frame() + context = LLMContext() + for i in range(10): + context.add_message( + {"role": "user", "content": f"Test message {i} that adds tokens to context."} + ) config = LLMContextSummarizationConfig(max_context_tokens=50) - params = LLMAssistantAggregatorParams( - enable_context_summarization=True, - context_summarization_config=config, - ) - aggregator = LLMAssistantAggregator(context, params=params) - aggregator.push_frame = AsyncMock() + summarizer = LLMContextSummarizer(context=context, config=config) + await summarizer.setup(self.task_manager) - await aggregator._on_request_summarization(aggregator._summarizer, frame) + request_frame = None - # Should push frame upstream - aggregator.push_frame.assert_called_once_with(frame, FrameDirection.UPSTREAM) + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal request_frame + request_frame = frame + + from pipecat.frames.frames import LLMFullResponseStartFrame + + await summarizer.process_frame(LLMFullResponseStartFrame()) + + self.assertIsNotNone(request_frame) + self.assertIsInstance(request_frame, LLMContextSummaryRequestFrame) + + await summarizer.cleanup() class TestLLMSpecificMessageHandling(unittest.TestCase): From f74af9b9c7e26cef7db3062c788782ad99140fc6 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Fri, 27 Feb 2026 11:57:22 -0500 Subject: [PATCH 144/189] Always apply a timeout to summarization LLM calls Even when summarization_timeout is explicitly set to None, use a DEFAULT_SUMMARIZATION_TIMEOUT (120s) fallback so the LLM call can never hang indefinitely. Applied in both LLMService and the dedicated LLM path in LLMContextSummarizer. --- src/pipecat/frames/frames.py | 2 +- .../aggregators/llm_context_summarizer.py | 16 ++++++++-------- src/pipecat/services/llm_service.py | 18 ++++++++---------- .../utils/context/llm_context_summarization.py | 6 ++++-- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index fbb0294a3..e1d2c37ff 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -2020,7 +2020,7 @@ class LLMContextSummaryRequestFrame(ControlFrame): summarization_prompt: System prompt instructing the LLM how to generate the summary. summarization_timeout: Maximum time in seconds for the LLM to generate a - summary. None means no timeout. + summary. When None, a default timeout of 120s is applied. """ request_id: str diff --git a/src/pipecat/processors/aggregators/llm_context_summarizer.py b/src/pipecat/processors/aggregators/llm_context_summarizer.py index 44ec985bd..bfdbbceb0 100644 --- a/src/pipecat/processors/aggregators/llm_context_summarizer.py +++ b/src/pipecat/processors/aggregators/llm_context_summarizer.py @@ -24,6 +24,7 @@ from pipecat.processors.aggregators.llm_context import LLMContext, LLMSpecificMe from pipecat.utils.asyncio.task_manager import BaseTaskManager from pipecat.utils.base_object import BaseObject from pipecat.utils.context.llm_context_summarization import ( + DEFAULT_SUMMARIZATION_TIMEOUT, LLMContextSummarizationConfig, LLMContextSummarizationUtil, ) @@ -278,21 +279,20 @@ class LLMContextSummarizer(BaseObject): llm: The dedicated LLM service to use for summarization. frame: The summarization request frame. """ + timeout = frame.summarization_timeout or DEFAULT_SUMMARIZATION_TIMEOUT + try: - if frame.summarization_timeout: - summary, last_index = await asyncio.wait_for( - llm._generate_summary(frame), - timeout=frame.summarization_timeout, - ) - else: - summary, last_index = await llm._generate_summary(frame) + summary, last_index = await asyncio.wait_for( + llm._generate_summary(frame), + timeout=timeout, + ) result_frame = LLMContextSummaryResultFrame( request_id=frame.request_id, summary=summary, last_summarized_index=last_index, ) except asyncio.TimeoutError: - error = f"Context summarization timed out after {frame.summarization_timeout}s" + error = f"Context summarization timed out after {timeout}s" logger.error(f"{self}: {error}") result_frame = LLMContextSummaryResultFrame( request_id=frame.request_id, diff --git a/src/pipecat/services/llm_service.py b/src/pipecat/services/llm_service.py index 86048ccbe..da0d57d66 100644 --- a/src/pipecat/services/llm_service.py +++ b/src/pipecat/services/llm_service.py @@ -62,6 +62,7 @@ from pipecat.services.ai_service import AIService from pipecat.services.settings import LLMSettings from pipecat.turns.user_turn_completion_mixin import UserTurnCompletionLLMServiceMixin from pipecat.utils.context.llm_context_summarization import ( + DEFAULT_SUMMARIZATION_TIMEOUT, LLMContextSummarizationUtil, ) @@ -436,18 +437,15 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): last_index = -1 error = None + timeout = frame.summarization_timeout or DEFAULT_SUMMARIZATION_TIMEOUT + try: - if frame.summarization_timeout: - summary, last_index = await asyncio.wait_for( - self._generate_summary(frame), - timeout=frame.summarization_timeout, - ) - else: - summary, last_index = await self._generate_summary(frame) - except asyncio.TimeoutError: - await self.push_error( - error_msg=f"Context summarization timed out after {frame.summarization_timeout}s" + summary, last_index = await asyncio.wait_for( + self._generate_summary(frame), + timeout=timeout, ) + except asyncio.TimeoutError: + await self.push_error(error_msg=f"Context summarization timed out after {timeout}s") except Exception as e: error = f"Error generating context summary: {e}" await self.push_error(error, exception=e) diff --git a/src/pipecat/utils/context/llm_context_summarization.py b/src/pipecat/utils/context/llm_context_summarization.py index 00dd74fd8..0bdebb3a2 100644 --- a/src/pipecat/utils/context/llm_context_summarization.py +++ b/src/pipecat/utils/context/llm_context_summarization.py @@ -20,6 +20,9 @@ from loguru import logger from pipecat.processors.aggregators.llm_context import LLMContext, LLMSpecificMessage +# Fallback timeout (seconds) used when summarization_timeout is None. +DEFAULT_SUMMARIZATION_TIMEOUT = 120.0 + # Token estimation constants CHARS_PER_TOKEN = 4 # Industry-standard heuristic: 1 token ≈ 4 characters TOKEN_OVERHEAD_PER_MESSAGE = 10 # Estimated structural overhead per message @@ -89,7 +92,6 @@ class LLMContextSummarizationConfig: summarization_timeout: Maximum time in seconds to wait for the LLM to generate a summary. If the call exceeds this timeout, summarization is aborted with an error and future summarizations are unblocked. - Set to None to disable the timeout. """ max_context_tokens: int = 8000 @@ -99,7 +101,7 @@ class LLMContextSummarizationConfig: summarization_prompt: Optional[str] = None summary_message_template: str = "Conversation summary: {summary}" llm: Optional["LLMService"] = None - summarization_timeout: Optional[float] = 120.0 + summarization_timeout: float = DEFAULT_SUMMARIZATION_TIMEOUT def __post_init__(self): """Validate configuration parameters.""" From 6ebfea474614819f5d5e9434f30ff511e20a4518 Mon Sep 17 00:00:00 2001 From: Matt <1610241+wollerman@users.noreply.github.com> Date: Fri, 27 Feb 2026 12:44:31 -0500 Subject: [PATCH 145/189] update numba version pin to >= --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2dea9005c..2cf46c3cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ dependencies = [ "soxr~=0.5.0", "openai>=1.74.0,<3", # Pinning numba to resolve package dependencies - "numba==0.61.2", + "numba>=0.61.2", "wait_for2>=0.4.1; python_version<'3.12'", # Required by LocalSmartTurnAnalyzerV3 # Inlined here instead of using a self-referential extra for Poetry compatibility. From acff172bf27a1341348c6acebcfaa21843375a6c Mon Sep 17 00:00:00 2001 From: Matt <1610241+wollerman@users.noreply.github.com> Date: Fri, 27 Feb 2026 14:52:37 -0500 Subject: [PATCH 146/189] create changelog entry --- changelog/3868.changed.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog/3868.changed.md diff --git a/changelog/3868.changed.md b/changelog/3868.changed.md new file mode 100644 index 000000000..4f019cca2 --- /dev/null +++ b/changelog/3868.changed.md @@ -0,0 +1 @@ +- Updated numba version pin from == to >=0.61.2 From 6f33aff0c6c1396b43e63b63c7764afac4dbbdff Mon Sep 17 00:00:00 2001 From: Rupesh Date: Fri, 27 Feb 2026 13:29:01 -0800 Subject: [PATCH 147/189] Fix PipelineTask double-inserting RTVIProcessor when custom RTVIObserver is provided When the user places an RTVIProcessor inside their pipeline and provides a custom RTVIObserver subclass in observers, PipelineTask correctly detects both and logs "skipping default ones." However it then unconditionally prepends self._rtvi to the pipeline, causing the processor to appear twice in the frame chain. Track whether the RTVIProcessor was found externally (inside the user pipeline) vs created internally. Only prepend it when created internally. Fixes #3867 --- changelog/3867.fixed.md | 1 + src/pipecat/pipeline/task.py | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 changelog/3867.fixed.md diff --git a/changelog/3867.fixed.md b/changelog/3867.fixed.md new file mode 100644 index 000000000..41ee584a2 --- /dev/null +++ b/changelog/3867.fixed.md @@ -0,0 +1 @@ +- Fixed `PipelineTask` double-inserting `RTVIProcessor` into the frame chain when the user provides both an `RTVIProcessor` in the pipeline and a custom `RTVIObserver` subclass in observers. diff --git a/src/pipecat/pipeline/task.py b/src/pipecat/pipeline/task.py index 2cfe26606..1db23e7d4 100644 --- a/src/pipecat/pipeline/task.py +++ b/src/pipecat/pipeline/task.py @@ -330,6 +330,7 @@ class PipelineTask(BasePipelineTask): # RTVI support self._rtvi = None + self._rtvi_external = False external_rtvi = self._find_processor(pipeline, RTVIProcessor) external_observer_found = any(isinstance(o, RTVIObserver) for o in observers) @@ -349,6 +350,7 @@ class PipelineTask(BasePipelineTask): "They are both added by default, no need to add them yourself." ) self._rtvi = external_rtvi + self._rtvi_external = True elif enable_rtvi: self._rtvi = rtvi_processor or RTVIProcessor() observers.append(self._rtvi.create_rtvi_observer(params=rtvi_observer_params)) @@ -388,7 +390,13 @@ class PipelineTask(BasePipelineTask): # allows us to receive and react to downstream frames. source = PipelineSource(self._source_push_frame, name=f"{self}::Source") sink = PipelineSink(self._sink_push_frame, name=f"{self}::Sink") - processors = [self._rtvi, pipeline] if self._rtvi else [pipeline] + # Only prepend the RTVIProcessor if we created it ourselves. When the + # user already placed it inside their pipeline we must not insert it + # again or it will appear twice in the frame chain. + if self._rtvi and not self._rtvi_external: + processors = [self._rtvi, pipeline] + else: + processors = [pipeline] self._pipeline = Pipeline(processors, source=source, sink=sink) # The task observer acts as a proxy to the provided observers. This way, From 51a3310e78f63a2ade4d99959b00994b3136f44a Mon Sep 17 00:00:00 2001 From: filipi87 Date: Fri, 27 Feb 2026 18:39:57 -0300 Subject: [PATCH 148/189] Added LLMSummarizeContextFrame: push this frame anywhere in the pipeline to trigger on-demand context summarization (e.g. from a function call tool). --- src/pipecat/frames/frames.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index e1d2c37ff..126f3c001 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -43,6 +43,7 @@ if TYPE_CHECKING: from pipecat.processors.aggregators.llm_context import LLMContext, NotGiven from pipecat.processors.frame_processor import FrameProcessor from pipecat.services.settings import ServiceSettings + from pipecat.utils.context.llm_context_summarization import LLMContextSummaryConfig from pipecat.utils.tracing.tracing_context import TracingContext @@ -2000,6 +2001,22 @@ class LLMAssistantPushAggregationFrame(ControlFrame): """ +@dataclass +class LLMSummarizeContextFrame(ControlFrame): + """Frame requesting on-demand context summarization. + + Push this frame into the pipeline to trigger a manual context summarization. + + Parameters: + config: Optional per-request override for summary generation settings + (prompt, token budget, messages to keep). If ``None``, the + summarizer's default :class:`~pipecat.utils.context.llm_context_summarization.LLMContextSummaryConfig` + is used. + """ + + config: Optional["LLMContextSummaryConfig"] = None + + @dataclass class LLMContextSummaryRequestFrame(ControlFrame): """Frame requesting context summarization from an LLM service. From f11d4b694415c625f3c651eb907e6d8afd65df2c Mon Sep 17 00:00:00 2001 From: filipi87 Date: Fri, 27 Feb 2026 18:40:41 -0300 Subject: [PATCH 149/189] Refactored LLMContextSummarizationConfig into two focused classes, LLMContextSummaryConfig and LLMAutoContextSummarizationConfig. --- .../context/llm_context_summarization.py | 132 ++++++++++++++++-- 1 file changed, 119 insertions(+), 13 deletions(-) diff --git a/src/pipecat/utils/context/llm_context_summarization.py b/src/pipecat/utils/context/llm_context_summarization.py index 0bdebb3a2..e68311942 100644 --- a/src/pipecat/utils/context/llm_context_summarization.py +++ b/src/pipecat/utils/context/llm_context_summarization.py @@ -10,7 +10,8 @@ This module provides reusable functionality for automatically compressing conver context when token limits are reached, enabling efficient long-running conversations. """ -from dataclasses import dataclass +import warnings +from dataclasses import dataclass, field from typing import TYPE_CHECKING, List, Optional if TYPE_CHECKING: @@ -54,26 +55,18 @@ The conversation transcript follows. Generate only the summary, no other text."" @dataclass -class LLMContextSummarizationConfig: - """Configuration for context summarization behavior. +class LLMContextSummaryConfig: + """Configuration for summary generation parameters. - Controls when and how conversation context is automatically compressed - to manage token limits in long-running conversations. + Contains settings that control how a summary is generated. Used by both + automatic and manual summarization modes. Parameters: - max_context_tokens: Maximum allowed context size in tokens. When this - limit is reached, summarization is triggered to compress the context. - The tokens are calculated using the industry-standard approximation - of 1 token ≈ 4 characters. target_context_tokens: Maximum token size for the generated summary. This value is passed directly to the LLM as the max_tokens parameter when generating the summary. Should be sized appropriately to allow the summary plus recent preserved messages to fit within reasonable context limits. - max_unsummarized_messages: Maximum number of new messages that can - accumulate since the last summary before triggering a new - summarization. This ensures regular compression even if token - limits are not reached. min_messages_after_summary: Number of recent messages to preserve uncompressed after each summarization. These messages maintain immediate conversational context. @@ -94,6 +87,94 @@ class LLMContextSummarizationConfig: is aborted with an error and future summarizations are unblocked. """ + target_context_tokens: int = 6000 + min_messages_after_summary: int = 4 + summarization_prompt: Optional[str] = None + summary_message_template: str = "Conversation summary: {summary}" + llm: Optional["LLMService"] = None + summarization_timeout: float = DEFAULT_SUMMARIZATION_TIMEOUT + + def __post_init__(self): + """Validate configuration parameters.""" + if self.target_context_tokens <= 0: + raise ValueError("target_context_tokens must be positive") + if self.min_messages_after_summary < 0: + raise ValueError("min_messages_after_summary must be non-negative") + + @property + def summary_prompt(self) -> str: + """Get the summarization prompt to use. + + Returns: + The custom prompt if set, otherwise the default summarization prompt. + """ + return self.summarization_prompt or DEFAULT_SUMMARIZATION_PROMPT + + +@dataclass +class LLMAutoContextSummarizationConfig: + """Configuration for automatic context summarization. + + Controls when conversation context is automatically compressed and how + that summary is generated. Summarization is triggered when either the + token limit or the unsummarized message count threshold is exceeded. + + Parameters: + max_context_tokens: Maximum allowed context size in tokens. When this + limit is reached, summarization is triggered to compress the context. + The tokens are calculated using the industry-standard approximation + of 1 token ≈ 4 characters. + max_unsummarized_messages: Maximum number of new messages that can + accumulate since the last summary before triggering a new + summarization. This ensures regular compression even if token + limits are not reached. + summary_config: Configuration for summary generation parameters + (prompt, token budget, messages to keep). If not provided, uses + default ``LLMContextSummaryConfig`` values. + """ + + max_context_tokens: int = 8000 + max_unsummarized_messages: int = 20 + summary_config: LLMContextSummaryConfig = field(default_factory=LLMContextSummaryConfig) + + def __post_init__(self): + """Validate configuration parameters.""" + if self.max_context_tokens <= 0: + raise ValueError("max_context_tokens must be positive") + if self.max_unsummarized_messages < 1: + raise ValueError("max_unsummarized_messages must be at least 1") + + # Auto-adjust target_context_tokens if it exceeds max_context_tokens + if self.summary_config.target_context_tokens > self.max_context_tokens: + # Use 80% of max_context_tokens as a reasonable default + self.summary_config.target_context_tokens = int(self.max_context_tokens * 0.8) + + +@dataclass +class LLMContextSummarizationConfig: + """Configuration for context summarization behavior. + + .. deprecated:: + Use :class:`LLMAutoContextSummarizationConfig` with a nested + :class:`LLMContextSummaryConfig` instead:: + + LLMAutoContextSummarizationConfig( + max_context_tokens=8000, + max_unsummarized_messages=20, + summary_config=LLMContextSummaryConfig( + target_context_tokens=6000, + min_messages_after_summary=4, + ), + ) + + Parameters: + max_context_tokens: Maximum allowed context size in tokens. + target_context_tokens: Maximum token size for the generated summary. + max_unsummarized_messages: Maximum new messages before triggering summarization. + min_messages_after_summary: Number of recent messages to preserve. + summarization_prompt: Custom prompt for summary generation. + """ + max_context_tokens: int = 8000 target_context_tokens: int = 6000 max_unsummarized_messages: int = 20 @@ -105,6 +186,12 @@ class LLMContextSummarizationConfig: def __post_init__(self): """Validate configuration parameters.""" + warnings.warn( + "LLMContextSummarizationConfig is deprecated. " + "Use LLMAutoContextSummarizationConfig with a nested LLMContextSummaryConfig instead.", + DeprecationWarning, + stacklevel=2, + ) if self.max_context_tokens <= 0: raise ValueError("max_context_tokens must be positive") if self.target_context_tokens <= 0: @@ -129,6 +216,25 @@ class LLMContextSummarizationConfig: """ return self.summarization_prompt or DEFAULT_SUMMARIZATION_PROMPT + def to_auto_config(self) -> LLMAutoContextSummarizationConfig: + """Convert to the new :class:`LLMAutoContextSummarizationConfig`. + + Returns: + An equivalent ``LLMAutoContextSummarizationConfig`` instance. + """ + return LLMAutoContextSummarizationConfig( + max_context_tokens=self.max_context_tokens, + max_unsummarized_messages=self.max_unsummarized_messages, + summary_config=LLMContextSummaryConfig( + target_context_tokens=self.target_context_tokens, + min_messages_after_summary=self.min_messages_after_summary, + summarization_prompt=self.summarization_prompt, + summary_message_template=self.summary_message_template, + llm=self.llm, + summarization_timeout=self.summarization_timeout, + ), + ) + @dataclass class LLMMessagesToSummarize: From 08d93ce9b662fef96faf64007939741cecc9ebd6 Mon Sep 17 00:00:00 2001 From: filipi87 Date: Fri, 27 Feb 2026 18:41:17 -0300 Subject: [PATCH 150/189] Renamed LLMAssistantAggregatorParams fields for clarity. --- .../aggregators/llm_response_universal.py | 77 ++++++++++++++----- 1 file changed, 59 insertions(+), 18 deletions(-) diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py index b255748e0..c43cc279d 100644 --- a/src/pipecat/processors/aggregators/llm_response_universal.py +++ b/src/pipecat/processors/aggregators/llm_response_universal.py @@ -79,7 +79,10 @@ from pipecat.turns.user_stop import BaseUserTurnStopStrategy, UserTurnStoppedPar from pipecat.turns.user_turn_completion_mixin import UserTurnCompletionConfig from pipecat.turns.user_turn_controller import UserTurnController from pipecat.turns.user_turn_strategies import ExternalUserTurnStrategies, UserTurnStrategies -from pipecat.utils.context.llm_context_summarization import LLMContextSummarizationConfig +from pipecat.utils.context.llm_context_summarization import ( + LLMAutoContextSummarizationConfig, + LLMContextSummarizationConfig, +) from pipecat.utils.string import TextPartForConcatenation, concatenate_aggregated_text from pipecat.utils.time import time_now_iso8601 @@ -125,18 +128,54 @@ class LLMAssistantAggregatorParams: in text frames by adding spaces between tokens. This parameter is ignored when used with the newer LLMAssistantAggregator, which handles word spacing automatically. - enable_context_summarization: Enable automatic context summarization when token - limits are reached (disabled by default). When enabled, older conversation - messages are automatically compressed into summaries to manage context size. - context_summarization_config: Configuration for context summarization behavior. - Controls thresholds, message preservation, and summarization prompts. If None - and summarization is enabled, uses default configuration values. + enable_auto_context_summarization: Enable automatic context summarization when token + or message-count limits are reached (disabled by default). When enabled, + older conversation messages are automatically compressed into summaries to + manage context size. + auto_context_summarization_config: Configuration for automatic context + summarization. Controls trigger thresholds, message preservation, and + summarization prompts. If None, uses default + ``LLMAutoContextSummarizationConfig`` values. """ expect_stripped_words: bool = True - enable_context_summarization: bool = False + enable_auto_context_summarization: bool = False + auto_context_summarization_config: Optional[LLMAutoContextSummarizationConfig] = None + + # --------------------------------------------------------------------------- + # Deprecated field names — kept for backward compatibility. + # Use enable_auto_context_summarization and auto_context_summarization_config instead. + # --------------------------------------------------------------------------- + enable_context_summarization: Optional[bool] = None context_summarization_config: Optional[LLMContextSummarizationConfig] = None + def __post_init__(self): + if self.enable_context_summarization is not None: + warnings.warn( + "LLMAssistantAggregatorParams.enable_context_summarization is deprecated. " + "Use enable_auto_context_summarization instead.", + DeprecationWarning, + stacklevel=2, + ) + self.enable_auto_context_summarization = self.enable_context_summarization + self.enable_context_summarization = None + + if self.context_summarization_config is not None: + warnings.warn( + "LLMAssistantAggregatorParams.context_summarization_config is deprecated. " + "Use auto_context_summarization_config (LLMAutoContextSummarizationConfig) instead.", + DeprecationWarning, + stacklevel=2, + ) + if isinstance(self.context_summarization_config, LLMContextSummarizationConfig): + self.auto_context_summarization_config = ( + self.context_summarization_config.to_auto_config() + ) + else: + # Accept LLMAutoContextSummarizationConfig passed to the deprecated field + self.auto_context_summarization_config = self.context_summarization_config # type: ignore[assignment] + self.context_summarization_config = None + @dataclass class UserTurnStoppedMessage: @@ -825,16 +864,18 @@ class LLMAssistantAggregator(LLMContextAggregator): self._thought_aggregation: List[TextPartForConcatenation] = [] self._thought_start_time: str = "" - # Context summarization - self._summarizer: Optional[LLMContextSummarizer] = None - if self._params.enable_context_summarization: - self._summarizer = LLMContextSummarizer( - context=self._context, - config=self._params.context_summarization_config, - ) - self._summarizer.add_event_handler( - "on_request_summarization", self._on_request_summarization - ) + # Context summarization — always create the summarizer so that manually + # pushed LLMSummarizeContextFrame frames are always handled. + # Auto-triggering based on thresholds is only enabled when + # enable_auto_context_summarization is True. + self._summarizer: Optional[LLMContextSummarizer] = LLMContextSummarizer( + context=self._context, + config=self._params.auto_context_summarization_config, + auto_trigger=self._params.enable_auto_context_summarization, + ) + self._summarizer.add_event_handler( + "on_request_summarization", self._on_request_summarization + ) self._register_event_handler("on_assistant_turn_started") self._register_event_handler("on_assistant_turn_stopped") From ed7f0a2c08b229996c6d8722591233b9be61e1af Mon Sep 17 00:00:00 2001 From: filipi87 Date: Fri, 27 Feb 2026 18:41:55 -0300 Subject: [PATCH 151/189] Adding support for on-demand summarization --- .../aggregators/llm_context_summarizer.py | 107 +++++++++++++----- 1 file changed, 78 insertions(+), 29 deletions(-) diff --git a/src/pipecat/processors/aggregators/llm_context_summarizer.py b/src/pipecat/processors/aggregators/llm_context_summarizer.py index bfdbbceb0..54879a8bb 100644 --- a/src/pipecat/processors/aggregators/llm_context_summarizer.py +++ b/src/pipecat/processors/aggregators/llm_context_summarizer.py @@ -19,14 +19,16 @@ from pipecat.frames.frames import ( LLMContextSummaryRequestFrame, LLMContextSummaryResultFrame, LLMFullResponseStartFrame, + LLMSummarizeContextFrame, ) from pipecat.processors.aggregators.llm_context import LLMContext, LLMSpecificMessage from pipecat.utils.asyncio.task_manager import BaseTaskManager from pipecat.utils.base_object import BaseObject from pipecat.utils.context.llm_context_summarization import ( DEFAULT_SUMMARIZATION_TIMEOUT, - LLMContextSummarizationConfig, + LLMAutoContextSummarizationConfig, LLMContextSummarizationUtil, + LLMContextSummaryConfig, ) if TYPE_CHECKING: @@ -55,9 +57,20 @@ class SummaryAppliedEvent: class LLMContextSummarizer(BaseObject): """Summarizer for managing LLM context summarization. - This class manages automatic context summarization when token or message - limits are reached. It monitors the LLM context size, triggers - summarization requests, and applies the results to compress conversation history. + This class manages context summarization, either automatically when token or + message limits are reached, or on-demand when an ``LLMSummarizeContextFrame`` + is received. It monitors the LLM context size, triggers summarization requests, + and applies the results to compress conversation history. + + When ``auto_trigger=True`` (the default), summarization is triggered + automatically based on the configured thresholds in + ``LLMAutoContextSummarizationConfig``. When ``auto_trigger=False``, + threshold checks are skipped and summarization only happens when an + ``LLMSummarizeContextFrame`` is explicitly pushed into the pipeline. + + Both modes can coexist: set ``auto_trigger=True`` and also push + ``LLMSummarizeContextFrame`` at any time to force an immediate summarization + (subject to the ``_summarization_in_progress`` guard). Event handlers available: @@ -88,18 +101,26 @@ class LLMContextSummarizer(BaseObject): self, *, context: LLMContext, - config: Optional[LLMContextSummarizationConfig] = None, + config: Optional[LLMAutoContextSummarizationConfig] = None, + auto_trigger: bool = True, ): """Initialize the context summarizer. Args: context: The LLM context to monitor and summarize. - config: Configuration for summarization behavior. If None, uses default config. + config: Auto-summarization configuration controlling both trigger + thresholds and default summary generation parameters. If None, + uses default ``LLMAutoContextSummarizationConfig`` values. + auto_trigger: Whether to automatically trigger summarization when + thresholds are reached. When False, summarization only happens + when an ``LLMSummarizeContextFrame`` is pushed into the pipeline. + Defaults to True. """ super().__init__() self._context = context - self._config = config or LLMContextSummarizationConfig() + self._auto_config = config or LLMAutoContextSummarizationConfig() + self._auto_trigger = auto_trigger self._task_manager: Optional[BaseTaskManager] = None @@ -137,6 +158,8 @@ class LLMContextSummarizer(BaseObject): """ if isinstance(frame, LLMFullResponseStartFrame): await self._handle_llm_response_start(frame) + elif isinstance(frame, LLMSummarizeContextFrame): + await self._handle_manual_summarization_request(frame) elif isinstance(frame, LLMContextSummaryResultFrame): await self._handle_summary_result(frame) elif isinstance(frame, InterruptionFrame): @@ -151,12 +174,24 @@ class LLMContextSummarizer(BaseObject): if self._should_summarize(): await self._request_summarization() - async def _handle_interruption(self): - """Handle interruption by canceling summarization in progress. + async def _handle_manual_summarization_request(self, frame: LLMSummarizeContextFrame): + """Handle an explicit on-demand summarization request. + + Reuses the same ``_request_summarization()`` code path as auto mode, + so bookkeeping (``_summarization_in_progress``, + ``_pending_summary_request_id``) is always updated correctly. Args: - frame: The interruption frame. + frame: The manual summarization request frame, optionally carrying + a per-request :class:`~pipecat.utils.context.llm_context_summarization.LLMContextSummaryConfig`. """ + if self._summarization_in_progress: + logger.debug(f"{self}: Summarization already in progress, ignoring manual request") + return + await self._request_summarization(config_override=frame.config) + + async def _handle_interruption(self): + """Handle interruption by canceling summarization in progress.""" # Reset summarization state to allow new requests. This is necessary because # the request frame (LLMContextSummaryRequestFrame) may have been cancelled # during interruption. We preserve _pending_summary_request_id to handle the @@ -179,13 +214,17 @@ class LLMContextSummarizer(BaseObject): Returns: True if all conditions are met: + - ``auto_trigger`` is enabled - No summarization currently in progress - AND either: - - Token count exceeds max_context_tokens - - OR message count exceeds max_unsummarized_messages since last summary + - Token count exceeds ``max_context_tokens`` + - OR message count exceeds ``max_unsummarized_messages`` since last summary """ logger.trace(f"{self}: Checking if context summarization is needed") + if not self._auto_trigger: + return False + if self._summarization_in_progress: logger.debug(f"{self}: Summarization already in progress") return False @@ -195,20 +234,20 @@ class LLMContextSummarizer(BaseObject): num_messages = len(self._context.messages) # Check if we've reached the token limit - token_limit = self._config.max_context_tokens + token_limit = self._auto_config.max_context_tokens token_limit_exceeded = total_tokens >= token_limit # Check if we've exceeded max unsummarized messages messages_since_summary = len(self._context.messages) - 1 message_threshold_exceeded = ( - messages_since_summary >= self._config.max_unsummarized_messages + messages_since_summary >= self._auto_config.max_unsummarized_messages ) logger.trace( f"{self}: Context has {num_messages} messages, " f"~{total_tokens} tokens (limit: {token_limit}), " f"{messages_since_summary} messages since last summary " - f"(message threshold: {self._config.max_unsummarized_messages})" + f"(message threshold: {self._auto_config.max_unsummarized_messages})" ) # Trigger if either limit is exceeded @@ -223,23 +262,30 @@ class LLMContextSummarizer(BaseObject): reason.append(f"~{total_tokens} tokens (>={token_limit} limit)") if message_threshold_exceeded: reason.append( - f"{messages_since_summary} messages (>={self._config.max_unsummarized_messages} threshold)" + f"{messages_since_summary} messages (>={self._auto_config.max_unsummarized_messages} threshold)" ) logger.debug(f"{self}: ✓ Summarization needed - {', '.join(reason)}") return True - async def _request_summarization(self): + async def _request_summarization( + self, config_override: Optional[LLMContextSummaryConfig] = None + ): """Request context summarization from LLM service. Creates a summarization request frame and either handles it directly using a dedicated LLM (if configured) or emits it via event handler - for the pipeline's primary LLM. Tracks the request ID to match async - responses and prevent race conditions. + for the pipeline's primary LLM. + Tracks the request ID to match async responses and prevent race conditions. + + Args: + config_override: Optional per-request summary configuration. If provided, + overrides the default summary generation settings from + ``self._auto_config.summary_config``. """ # Generate unique request ID request_id = str(uuid.uuid4()) - min_keep = self._config.min_messages_after_summary + summary_config = config_override or self._auto_config.summary_config # Mark summarization in progress self._summarization_in_progress = True @@ -251,16 +297,16 @@ class LLMContextSummarizer(BaseObject): request_frame = LLMContextSummaryRequestFrame( request_id=request_id, context=self._context, - min_messages_to_keep=min_keep, - target_context_tokens=self._config.target_context_tokens, - summarization_prompt=self._config.summary_prompt, - summarization_timeout=self._config.summarization_timeout, + min_messages_to_keep=summary_config.min_messages_after_summary, + target_context_tokens=summary_config.target_context_tokens, + summarization_prompt=summary_config.summary_prompt, + summarization_timeout=summary_config.summarization_timeout, ) - if self._config.llm: + if summary_config.llm: # Use dedicated LLM directly — no need to involve the pipeline self.task_manager.create_task( - self._generate_summary_with_dedicated_llm(self._config.llm, request_frame), + self._generate_summary_with_dedicated_llm(summary_config.llm, request_frame), f"{self}-dedicated-llm-summary", ) else: @@ -323,7 +369,9 @@ class LLMContextSummarizer(BaseObject): """ logger.debug(f"{self}: Received summary result (request_id={frame.request_id})") - # Check if this is the result we're waiting for + # Check if this is the result we're waiting for. Both auto and manual + # summarization set _pending_summary_request_id via _request_summarization(), + # so this check always applies. if frame.request_id != self._pending_summary_request_id: logger.debug(f"{self}: Ignoring stale summary result (request_id={frame.request_id})") return @@ -360,7 +408,7 @@ class LLMContextSummarizer(BaseObject): if last_summarized_index >= len(self._context.messages): return False - min_keep = self._config.min_messages_after_summary + min_keep = self._auto_config.summary_config.min_messages_after_summary remaining = len(self._context.messages) - 1 - last_summarized_index if remaining < min_keep: return False @@ -377,6 +425,7 @@ class LLMContextSummarizer(BaseObject): summary: The generated summary text. last_summarized_index: Index of the last message that was summarized. """ + config = self._auto_config.summary_config messages = self._context.messages # Find the first system message to preserve. LLMSpecificMessage instances are excluded @@ -397,7 +446,7 @@ class LLMContextSummarizer(BaseObject): # Create summary message as a user message (the summary is context # provided *to* the assistant, not something the assistant said) - summary_content = self._config.summary_message_template.format(summary=summary) + summary_content = config.summary_message_template.format(summary=summary) summary_message = {"role": "user", "content": summary_content} # Reconstruct context From dfd0a515f320ae47091713c4a9a071c91fa7eafd Mon Sep 17 00:00:00 2001 From: filipi87 Date: Fri, 27 Feb 2026 18:42:13 -0300 Subject: [PATCH 152/189] Changelog entries for the context summarization improvements. --- changelog/3863.added.2.md | 1 + changelog/3863.added.md | 1 + changelog/3863.changed.md | 1 + changelog/3863.deprecated.md | 1 + 4 files changed, 4 insertions(+) create mode 100644 changelog/3863.added.2.md create mode 100644 changelog/3863.added.md create mode 100644 changelog/3863.changed.md create mode 100644 changelog/3863.deprecated.md diff --git a/changelog/3863.added.2.md b/changelog/3863.added.2.md new file mode 100644 index 000000000..9c0ab90ba --- /dev/null +++ b/changelog/3863.added.2.md @@ -0,0 +1 @@ +- Added `LLMContextSummaryConfig` (summary generation params: `target_context_tokens`, `min_messages_after_summary`, `summarization_prompt`) and `LLMAutoContextSummarizationConfig` (auto-trigger thresholds: `max_context_tokens`, `max_unsummarized_messages`, plus a nested `summary_config`). These replace the monolithic `LLMContextSummarizationConfig`. diff --git a/changelog/3863.added.md b/changelog/3863.added.md new file mode 100644 index 000000000..d6214aed0 --- /dev/null +++ b/changelog/3863.added.md @@ -0,0 +1 @@ +- Added `LLMSummarizeContextFrame` to trigger on-demand context summarization from anywhere in the pipeline (e.g. a function call tool). Accepts an optional `config: LLMContextSummaryConfig` to override summary generation settings per request. diff --git a/changelog/3863.changed.md b/changelog/3863.changed.md new file mode 100644 index 000000000..faf5712d8 --- /dev/null +++ b/changelog/3863.changed.md @@ -0,0 +1 @@ +- ⚠️ Renamed `LLMAssistantAggregatorParams` fields: `enable_context_summarization` → `enable_auto_context_summarization` and `context_summarization_config` → `auto_context_summarization_config` (now accepts `LLMAutoContextSummarizationConfig`). The old names still work with a `DeprecationWarning` for one release cycle. diff --git a/changelog/3863.deprecated.md b/changelog/3863.deprecated.md new file mode 100644 index 000000000..ba2311fbd --- /dev/null +++ b/changelog/3863.deprecated.md @@ -0,0 +1 @@ +- Deprecated `LLMContextSummarizationConfig`. Use `LLMAutoContextSummarizationConfig` with a nested `LLMContextSummaryConfig` instead. The old class emits a `DeprecationWarning`. From 69414e8a5ab8b9569c4e40b17cba7d186081fbdd Mon Sep 17 00:00:00 2001 From: filipi87 Date: Fri, 27 Feb 2026 18:42:23 -0300 Subject: [PATCH 153/189] Added example 54b-context-summarization-manual-openai.py demonstrating on-demand summarization triggered via a function call tool. --- ...54b-context-summarization-manual-openai.py | 179 ++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 examples/foundational/54b-context-summarization-manual-openai.py diff --git a/examples/foundational/54b-context-summarization-manual-openai.py b/examples/foundational/54b-context-summarization-manual-openai.py new file mode 100644 index 000000000..e8acf4bf1 --- /dev/null +++ b/examples/foundational/54b-context-summarization-manual-openai.py @@ -0,0 +1,179 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Example demonstrating manual context summarization via a function call. + +This example shows how to trigger context summarization on demand rather than +automatically. The user can ask the bot to "summarize the conversation" and the +bot will call a function that pushes an LLMSummarizeContextFrame into the +pipeline, causing the LLM service to compress the conversation history. + +Unlike example 54, automatic summarization is NOT enabled here. Summarization +only happens when the user explicitly requests it through the function call. +""" + +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.adapters.schemas.function_schema import FunctionSchema +from pipecat.adapters.schemas.tools_schema import ToolsSchema +from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.audio.vad.vad_analyzer import VADParams +from pipecat.frames.frames import LLMRunFrame, LLMSummarizeContextFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.llm_service import FunctionCallParams +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy +from pipecat.turns.user_turn_strategies import UserTurnStrategies + +load_dotenv(override=True) + +# We use lambdas to defer transport parameter creation until the transport +# type is selected at runtime. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def summarize_conversation(params: FunctionCallParams): + """Trigger manual context summarization via a pipeline frame.""" + logger.info("Tool called: summarize_conversation") + await params.result_callback({"status": "summarization_requested"}) + await params.llm.queue_frame(LLMSummarizeContextFrame()) + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info("Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + llm.register_function("summarize_conversation", summarize_conversation) + + summarize_function = FunctionSchema( + name="summarize_conversation", + description=( + "Summarize and compress the conversation history. " + "Call this when the user asks you to summarize the conversation " + "or when you want to free up context space." + ), + properties={}, + required=[], + ) + tools = ToolsSchema(standard_tools=[summarize_function]) + + messages = [ + { + "role": "system", + "content": ( + "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your " + "capabilities in a succinct way. Your output will be spoken aloud, so avoid " + "special characters that can't easily be spoken, such as emojis or bullet points. " + "Respond to what the user said in a creative and helpful way. " + "If the user asks you to summarize the conversation, call the " + "summarize_conversation function. After summarization, briefly acknowledge " + "that the conversation history has been compressed." + ), + }, + ] + + context = LLMContext(messages, tools=tools) + + # Automatic summarization is NOT enabled here (enable_auto_context_summarization + # defaults to False). The summarizer is still created internally so that + # LLMSummarizeContextFrame frames pushed via the function call are handled. + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams( + user_turn_strategies=UserTurnStrategies( + stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), + ), + ) + + pipeline = Pipeline( + [ + transport.input(), # Transport user input + stt, + user_aggregator, # User responses + llm, # LLM + tts, # TTS + transport.output(), # Transport bot output + assistant_aggregator, # Assistant spoken responses + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info("Client connected") + # Kick off the conversation. + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info("Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() From 0839e3813f658998f03ccd750b7084088caecd21 Mon Sep 17 00:00:00 2001 From: filipi87 Date: Fri, 27 Feb 2026 18:42:39 -0300 Subject: [PATCH 154/189] Refactoring the examples to use the new context summarization classes. --- .../54-context-summarization-openai.py | 15 +++++--- .../54a-context-summarization-google.py | 15 +++++--- ...54c-context-summarization-dedicated-llm.py | 35 +++++++++++-------- 3 files changed, 40 insertions(+), 25 deletions(-) diff --git a/examples/foundational/54-context-summarization-openai.py b/examples/foundational/54-context-summarization-openai.py index 45f27854f..ff6701bec 100644 --- a/examples/foundational/54-context-summarization-openai.py +++ b/examples/foundational/54-context-summarization-openai.py @@ -41,7 +41,10 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams -from pipecat.utils.context.llm_context_summarization import LLMContextSummarizationConfig +from pipecat.utils.context.llm_context_summarization import ( + LLMAutoContextSummarizationConfig, + LLMContextSummaryConfig, +) load_dotenv(override=True) @@ -120,14 +123,16 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): vad_analyzer=SileroVADAnalyzer(), ), assistant_params=LLMAssistantAggregatorParams( - enable_context_summarization=True, + enable_auto_context_summarization=True, # Optional: customize context summarization behavior # Using low limits to demonstrate the feature quickly - context_summarization_config=LLMContextSummarizationConfig( + auto_context_summarization_config=LLMAutoContextSummarizationConfig( max_context_tokens=1000, # Trigger summarization at 1000 tokens - target_context_tokens=800, # Target context size for the summarization max_unsummarized_messages=10, # Or when 10 new messages accumulate - min_messages_after_summary=2, # Keep last 2 messages uncompressed + summary_config=LLMContextSummaryConfig( + target_context_tokens=800, # Target context size for the summarization + min_messages_after_summary=2, # Keep last 2 messages uncompressed + ), ), ), ) diff --git a/examples/foundational/54a-context-summarization-google.py b/examples/foundational/54a-context-summarization-google.py index 2ce29e959..7d2a91310 100644 --- a/examples/foundational/54a-context-summarization-google.py +++ b/examples/foundational/54a-context-summarization-google.py @@ -41,7 +41,10 @@ from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams -from pipecat.utils.context.llm_context_summarization import LLMContextSummarizationConfig +from pipecat.utils.context.llm_context_summarization import ( + LLMAutoContextSummarizationConfig, + LLMContextSummaryConfig, +) load_dotenv(override=True) @@ -120,14 +123,16 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): vad_analyzer=SileroVADAnalyzer(), ), assistant_params=LLMAssistantAggregatorParams( - enable_context_summarization=True, + enable_auto_context_summarization=True, # Optional: customize context summarization behavior # Using low limits to demonstrate the feature quickly - context_summarization_config=LLMContextSummarizationConfig( + auto_context_summarization_config=LLMAutoContextSummarizationConfig( max_context_tokens=1000, # Trigger summarization at 1000 tokens - target_context_tokens=800, # Target context size for the summarization max_unsummarized_messages=10, # Or when 10 new messages accumulate - min_messages_after_summary=2, # Keep last 2 messages uncompressed + summary_config=LLMContextSummaryConfig( + target_context_tokens=800, # Target context size for the summarization + min_messages_after_summary=2, # Keep last 2 messages uncompressed + ), ), ), ) diff --git a/examples/foundational/54c-context-summarization-dedicated-llm.py b/examples/foundational/54c-context-summarization-dedicated-llm.py index 3b2195e80..1dce3890f 100644 --- a/examples/foundational/54c-context-summarization-dedicated-llm.py +++ b/examples/foundational/54c-context-summarization-dedicated-llm.py @@ -44,7 +44,10 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams -from pipecat.utils.context.llm_context_summarization import LLMContextSummarizationConfig +from pipecat.utils.context.llm_context_summarization import ( + LLMAutoContextSummarizationConfig, + LLMContextSummaryConfig, +) load_dotenv(override=True) @@ -147,23 +150,25 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): vad_analyzer=SileroVADAnalyzer(), ), assistant_params=LLMAssistantAggregatorParams( - enable_context_summarization=True, - context_summarization_config=LLMContextSummarizationConfig( + enable_auto_context_summarization=True, + auto_context_summarization_config=LLMAutoContextSummarizationConfig( # Trigger thresholds (low values to demonstrate quickly) max_context_tokens=1000, max_unsummarized_messages=10, - # Summary generation - target_context_tokens=800, - min_messages_after_summary=2, - summarization_prompt=CUSTOM_SUMMARIZATION_PROMPT, - # Custom summary format - wrap in XML tags so the system - # prompt can identify summaries vs. live conversation - summary_message_template="\n{summary}\n", - # Use a dedicated cheap LLM for summarization instead of - # the primary conversation model - llm=summarization_llm, - # Cancel summarization if it takes longer than 60 seconds - summarization_timeout=60.0, + summary_config=LLMContextSummaryConfig( + # Summary generation + target_context_tokens=800, + min_messages_after_summary=2, + summarization_prompt=CUSTOM_SUMMARIZATION_PROMPT, + # Custom summary format - wrap in XML tags so the system + # prompt can identify summaries vs. live conversation + summary_message_template="\n{summary}\n", + # Use a dedicated cheap LLM for summarization instead of + # the primary conversation model + llm=summarization_llm, + # Cancel summarization if it takes longer than 60 seconds + summarization_timeout=60.0, + ), ), ), ) From d077a810ae22b4270abb0791e524298e9e99ab00 Mon Sep 17 00:00:00 2001 From: filipi87 Date: Fri, 27 Feb 2026 18:42:50 -0300 Subject: [PATCH 155/189] Fixing context summarization tests --- tests/test_context_summarization.py | 104 ++++++++++++++--- tests/test_llm_context_summarizer.py | 162 ++++++++++++++++++++++++--- 2 files changed, 232 insertions(+), 34 deletions(-) diff --git a/tests/test_context_summarization.py b/tests/test_context_summarization.py index ca56e7a32..10223a606 100644 --- a/tests/test_context_summarization.py +++ b/tests/test_context_summarization.py @@ -14,8 +14,10 @@ from pipecat.frames.frames import LLMContextSummaryRequestFrame, LLMContextSumma from pipecat.processors.aggregators.llm_context import LLMContext, LLMSpecificMessage from pipecat.services.llm_service import LLMService from pipecat.utils.context.llm_context_summarization import ( + LLMAutoContextSummarizationConfig, LLMContextSummarizationConfig, LLMContextSummarizationUtil, + LLMContextSummaryConfig, ) @@ -167,43 +169,109 @@ class TestContextSummarizationMixin(unittest.TestCase): self.assertIn("USER: First part Second part", transcript) -class TestLLMContextSummarizationConfig(unittest.TestCase): - """Tests for LLMContextSummarizationConfig.""" +class TestLLMContextSummaryConfig(unittest.TestCase): + """Tests for LLMContextSummaryConfig.""" def test_default_config(self): """Test default configuration values.""" - config = LLMContextSummarizationConfig() + config = LLMContextSummaryConfig() - self.assertEqual(config.max_context_tokens, 8000) - self.assertEqual(config.max_unsummarized_messages, 20) + self.assertEqual(config.target_context_tokens, 6000) self.assertEqual(config.min_messages_after_summary, 4) self.assertIsNone(config.summarization_prompt) def test_custom_config(self): """Test custom configuration.""" - config = LLMContextSummarizationConfig( - max_context_tokens=2500, + config = LLMContextSummaryConfig( target_context_tokens=2000, - max_unsummarized_messages=15, min_messages_after_summary=4, summarization_prompt="Custom prompt", ) - self.assertEqual(config.max_context_tokens, 2500) self.assertEqual(config.target_context_tokens, 2000) - self.assertEqual(config.max_unsummarized_messages, 15) self.assertEqual(config.min_messages_after_summary, 4) self.assertEqual(config.summary_prompt, "Custom prompt") def test_summary_prompt_property(self): """Test summary_prompt property uses default when None.""" - config = LLMContextSummarizationConfig() + config = LLMContextSummaryConfig() self.assertIn("summarizing a conversation", config.summary_prompt.lower()) - config_with_custom = LLMContextSummarizationConfig(summarization_prompt="Custom") + config_with_custom = LLMContextSummaryConfig(summarization_prompt="Custom") self.assertEqual(config_with_custom.summary_prompt, "Custom") +class TestLLMAutoContextSummarizationConfig(unittest.TestCase): + """Tests for LLMAutoContextSummarizationConfig.""" + + def test_default_config(self): + """Test default configuration values.""" + config = LLMAutoContextSummarizationConfig() + + self.assertEqual(config.max_context_tokens, 8000) + self.assertEqual(config.max_unsummarized_messages, 20) + self.assertEqual(config.summary_config.target_context_tokens, 6000) + self.assertEqual(config.summary_config.min_messages_after_summary, 4) + + def test_custom_config(self): + """Test custom configuration.""" + config = LLMAutoContextSummarizationConfig( + max_context_tokens=2500, + max_unsummarized_messages=15, + summary_config=LLMContextSummaryConfig( + target_context_tokens=2000, + min_messages_after_summary=4, + summarization_prompt="Custom prompt", + ), + ) + + self.assertEqual(config.max_context_tokens, 2500) + self.assertEqual(config.max_unsummarized_messages, 15) + self.assertEqual(config.summary_config.target_context_tokens, 2000) + self.assertEqual(config.summary_config.min_messages_after_summary, 4) + self.assertEqual(config.summary_config.summary_prompt, "Custom prompt") + + def test_target_tokens_auto_adjusted(self): + """Test that target_context_tokens is auto-adjusted when it exceeds max.""" + config = LLMAutoContextSummarizationConfig( + max_context_tokens=1000, + summary_config=LLMContextSummaryConfig(target_context_tokens=9000), + ) + self.assertLessEqual(config.summary_config.target_context_tokens, config.max_context_tokens) + + +class TestLLMContextSummarizationConfigDeprecated(unittest.TestCase): + """Tests for deprecated LLMContextSummarizationConfig.""" + + def test_emits_deprecation_warning(self): + """Test that instantiating the deprecated config emits a DeprecationWarning.""" + with self.assertWarns(DeprecationWarning): + LLMContextSummarizationConfig() + + def test_to_auto_config(self): + """Test conversion to the new LLMAutoContextSummarizationConfig.""" + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + old_config = LLMContextSummarizationConfig( + max_context_tokens=2500, + target_context_tokens=2000, + max_unsummarized_messages=15, + min_messages_after_summary=4, + summarization_prompt="Custom", + ) + + new_config = old_config.to_auto_config() + + self.assertIsInstance(new_config, LLMAutoContextSummarizationConfig) + self.assertEqual(new_config.max_context_tokens, 2500) + self.assertEqual(new_config.max_unsummarized_messages, 15) + self.assertEqual(new_config.summary_config.target_context_tokens, 2000) + self.assertEqual(new_config.summary_config.min_messages_after_summary, 4) + self.assertEqual(new_config.summary_config.summarization_prompt, "Custom") + + class TestFunctionCallHandling(unittest.TestCase): """Tests for function call handling in summarization.""" @@ -670,10 +738,12 @@ class TestDedicatedLLMSummarization(unittest.IsolatedAsyncioTestCase): {"role": "user", "content": f"Test message {i} that adds tokens to context."} ) - config = LLMContextSummarizationConfig( + config = LLMAutoContextSummarizationConfig( max_context_tokens=50, # Very low to trigger easily - llm=dedicated_llm, - summarization_timeout=5.0, + summary_config=LLMContextSummaryConfig( + llm=dedicated_llm, + summarization_timeout=5.0, + ), ) return context, config @@ -736,7 +806,7 @@ class TestDedicatedLLMSummarization(unittest.IsolatedAsyncioTestCase): dedicated_llm._generate_summary = slow_summary context, config = self._create_context_and_config(dedicated_llm) - config.summarization_timeout = 0.1 # Very short timeout + config.summary_config.summarization_timeout = 0.1 # Very short timeout summarizer = LLMContextSummarizer(context=context, config=config) await summarizer.setup(self.task_manager) @@ -826,7 +896,7 @@ class TestDedicatedLLMSummarization(unittest.IsolatedAsyncioTestCase): {"role": "user", "content": f"Test message {i} that adds tokens to context."} ) - config = LLMContextSummarizationConfig(max_context_tokens=50) + config = LLMAutoContextSummarizationConfig(max_context_tokens=50) summarizer = LLMContextSummarizer(context=context, config=config) await summarizer.setup(self.task_manager) diff --git a/tests/test_llm_context_summarizer.py b/tests/test_llm_context_summarizer.py index 0439d403d..7e8b326f9 100644 --- a/tests/test_llm_context_summarizer.py +++ b/tests/test_llm_context_summarizer.py @@ -12,6 +12,7 @@ from pipecat.frames.frames import ( LLMContextSummaryRequestFrame, LLMContextSummaryResultFrame, LLMFullResponseStartFrame, + LLMSummarizeContextFrame, ) from pipecat.processors.aggregators.llm_context import LLMContext from pipecat.processors.aggregators.llm_context_summarizer import ( @@ -19,7 +20,10 @@ from pipecat.processors.aggregators.llm_context_summarizer import ( SummaryAppliedEvent, ) from pipecat.utils.asyncio.task_manager import TaskManager, TaskManagerParams -from pipecat.utils.context.llm_context_summarization import LLMContextSummarizationConfig +from pipecat.utils.context.llm_context_summarization import ( + LLMAutoContextSummarizationConfig, + LLMContextSummaryConfig, +) class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase): @@ -35,7 +39,7 @@ class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase): async def test_summarization_triggered_by_token_limit(self): """Test that summarization is triggered when token limit is reached.""" - config = LLMContextSummarizationConfig( + config = LLMAutoContextSummarizationConfig( max_context_tokens=100, # Very low to trigger easily max_unsummarized_messages=100, # High so it doesn't trigger by message count ) @@ -71,7 +75,7 @@ class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase): async def test_summarization_triggered_by_message_count(self): """Test that summarization is triggered when message count threshold is reached.""" - config = LLMContextSummarizationConfig( + config = LLMAutoContextSummarizationConfig( max_context_tokens=100000, # Very high so it doesn't trigger by tokens max_unsummarized_messages=5, # Low to trigger easily ) @@ -101,7 +105,7 @@ class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase): async def test_summarization_not_triggered_below_thresholds(self): """Test that summarization is not triggered when below thresholds.""" - config = LLMContextSummarizationConfig( + config = LLMAutoContextSummarizationConfig( max_context_tokens=10000, max_unsummarized_messages=20, ) @@ -130,7 +134,7 @@ class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase): async def test_summarization_in_progress_prevents_duplicate(self): """Test that a summarization in progress prevents triggering another.""" - config = LLMContextSummarizationConfig( + config = LLMAutoContextSummarizationConfig( max_context_tokens=50, # Very low max_unsummarized_messages=100, ) @@ -161,7 +165,10 @@ class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase): async def test_summary_result_handling(self): """Test that summary results are processed and applied correctly.""" - config = LLMContextSummarizationConfig(max_context_tokens=50, min_messages_after_summary=2) + config = LLMAutoContextSummarizationConfig( + max_context_tokens=50, + summary_config=LLMContextSummaryConfig(min_messages_after_summary=2), + ) summarizer = LLMContextSummarizer(context=self.context, config=config) await summarizer.setup(self.task_manager) @@ -208,7 +215,7 @@ class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase): async def test_interruption_cancels_summarization(self): """Test that an interruption cancels pending summarization.""" - config = LLMContextSummarizationConfig(max_context_tokens=50) + config = LLMAutoContextSummarizationConfig(max_context_tokens=50) summarizer = LLMContextSummarizer(context=self.context, config=config) await summarizer.setup(self.task_manager) @@ -238,7 +245,10 @@ class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase): async def test_stale_summary_result_ignored(self): """Test that stale summary results are ignored.""" - config = LLMContextSummarizationConfig(max_context_tokens=50, min_messages_after_summary=2) + config = LLMAutoContextSummarizationConfig( + max_context_tokens=50, + summary_config=LLMContextSummaryConfig(min_messages_after_summary=2), + ) summarizer = LLMContextSummarizer(context=self.context, config=config) await summarizer.setup(self.task_manager) @@ -294,9 +304,116 @@ class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase): await summarizer.cleanup() + async def test_manual_summarization_via_frame(self): + """Test that LLMSummarizeContextFrame triggers summarization on demand.""" + config = LLMAutoContextSummarizationConfig( + max_context_tokens=100000, # High — auto trigger would never fire + max_unsummarized_messages=100, + ) + + summarizer = LLMContextSummarizer( + context=self.context, + config=config, + auto_trigger=False, # Disable auto; only manual requests should work + ) + await summarizer.setup(self.task_manager) + + request_frame = None + + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal request_frame + request_frame = frame + + # Add messages + for i in range(5): + self.context.add_message({"role": "user", "content": f"Message {i}"}) + + # Auto-trigger should NOT fire even on LLMFullResponseStartFrame + await summarizer.process_frame(LLMFullResponseStartFrame()) + self.assertIsNone(request_frame) + + # Manual trigger via LLMSummarizeContextFrame should fire + await summarizer.process_frame(LLMSummarizeContextFrame()) + self.assertIsNotNone(request_frame) + self.assertIsInstance(request_frame, LLMContextSummaryRequestFrame) + + # The request must have a valid request_id and carry the current context + self.assertTrue(request_frame.request_id) + self.assertEqual(request_frame.context, self.context) + + await summarizer.cleanup() + + async def test_manual_summarization_with_config_override(self): + """Test that LLMSummarizeContextFrame can override default summary config.""" + config = LLMAutoContextSummarizationConfig( + max_context_tokens=100000, + summary_config=LLMContextSummaryConfig( + target_context_tokens=6000, + min_messages_after_summary=4, + ), + ) + + summarizer = LLMContextSummarizer(context=self.context, config=config) + await summarizer.setup(self.task_manager) + + request_frame = None + + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal request_frame + request_frame = frame + + for i in range(5): + self.context.add_message({"role": "user", "content": f"Message {i}"}) + + # Push a manual frame with custom config overrides + custom_config = LLMContextSummaryConfig( + target_context_tokens=500, + min_messages_after_summary=1, + ) + await summarizer.process_frame(LLMSummarizeContextFrame(config=custom_config)) + + self.assertIsNotNone(request_frame) + # The request should use the overridden values + self.assertEqual(request_frame.target_context_tokens, 500) + self.assertEqual(request_frame.min_messages_to_keep, 1) + + await summarizer.cleanup() + + async def test_manual_summarization_blocked_when_in_progress(self): + """Test that a second LLMSummarizeContextFrame is ignored while one is in progress.""" + config = LLMAutoContextSummarizationConfig(max_context_tokens=100000) + + summarizer = LLMContextSummarizer(context=self.context, config=config) + await summarizer.setup(self.task_manager) + + request_count = 0 + + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal request_count + request_count += 1 + + for i in range(5): + self.context.add_message({"role": "user", "content": f"Message {i}"}) + + # First manual request + await summarizer.process_frame(LLMSummarizeContextFrame()) + self.assertEqual(request_count, 1) + + # Second manual request while first is in progress — should be ignored + await summarizer.process_frame(LLMSummarizeContextFrame()) + self.assertEqual(request_count, 1) + + await summarizer.cleanup() + async def test_summary_message_role_is_user(self): """Test that the summary message uses the user role.""" - config = LLMContextSummarizationConfig(max_context_tokens=50, min_messages_after_summary=2) + config = LLMAutoContextSummarizationConfig( + max_context_tokens=50, + summary_config=LLMContextSummaryConfig(min_messages_after_summary=2), + ) summarizer = LLMContextSummarizer(context=self.context, config=config) await summarizer.setup(self.task_manager) @@ -335,7 +452,10 @@ class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase): async def test_summary_message_default_template(self): """Test that the default summary_message_template is used.""" - config = LLMContextSummarizationConfig(max_context_tokens=50, min_messages_after_summary=2) + config = LLMAutoContextSummarizationConfig( + max_context_tokens=50, + summary_config=LLMContextSummaryConfig(min_messages_after_summary=2), + ) summarizer = LLMContextSummarizer(context=self.context, config=config) await summarizer.setup(self.task_manager) @@ -377,10 +497,12 @@ class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase): async def test_summary_message_custom_template(self): """Test that a custom summary_message_template is applied.""" - config = LLMContextSummarizationConfig( + config = LLMAutoContextSummarizationConfig( max_context_tokens=50, - min_messages_after_summary=2, - summary_message_template="\n{summary}\n", + summary_config=LLMContextSummaryConfig( + min_messages_after_summary=2, + summary_message_template="\n{summary}\n", + ), ) summarizer = LLMContextSummarizer(context=self.context, config=config) @@ -420,7 +542,10 @@ class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase): async def test_on_summary_applied_event(self): """Test that on_summary_applied event fires with correct data.""" - config = LLMContextSummarizationConfig(max_context_tokens=50, min_messages_after_summary=2) + config = LLMAutoContextSummarizationConfig( + max_context_tokens=50, + summary_config=LLMContextSummaryConfig(min_messages_after_summary=2), + ) summarizer = LLMContextSummarizer(context=self.context, config=config) await summarizer.setup(self.task_manager) @@ -474,7 +599,10 @@ class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase): async def test_on_summary_applied_not_fired_on_error(self): """Test that on_summary_applied event is NOT fired when summarization fails.""" - config = LLMContextSummarizationConfig(max_context_tokens=50, min_messages_after_summary=2) + config = LLMAutoContextSummarizationConfig( + max_context_tokens=50, + summary_config=LLMContextSummaryConfig(min_messages_after_summary=2), + ) summarizer = LLMContextSummarizer(context=self.context, config=config) await summarizer.setup(self.task_manager) @@ -515,9 +643,9 @@ class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase): async def test_request_frame_includes_timeout(self): """Test that the request frame includes the configured summarization_timeout.""" - config = LLMContextSummarizationConfig( + config = LLMAutoContextSummarizationConfig( max_context_tokens=50, - summarization_timeout=60.0, + summary_config=LLMContextSummaryConfig(summarization_timeout=60.0), ) summarizer = LLMContextSummarizer(context=self.context, config=config) From 000d38e253776b25fd8693d5fc129a02ec76a413 Mon Sep 17 00:00:00 2001 From: macaki Date: Fri, 27 Feb 2026 15:17:23 -0700 Subject: [PATCH 156/189] [Rime] Both mist and arcana now support the speedAlpha parameter. --- src/pipecat/services/rime/tts.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index d5f97e028..2dbaf2760 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -301,6 +301,8 @@ class RimeTTSService(AudioContextTTSService): params["lang"] = self._settings.language if self._settings.segment is not None: params["segment"] = self._settings.segment + if self._settings.speedAlpha is not None: + params["speedAlpha"] = self._settings.speedAlpha if self._settings.model == "arcana": if self._settings.repetition_penalty is not None: @@ -310,8 +312,6 @@ class RimeTTSService(AudioContextTTSService): if self._settings.top_p is not None: params["top_p"] = self._settings.top_p else: # mistv2/mist - if self._settings.speedAlpha is not None: - params["speedAlpha"] = self._settings.speedAlpha if self._settings.reduceLatency is not None: params["reduceLatency"] = self._settings.reduceLatency if self._settings.pauseBetweenBrackets is not None: From 56f2564ed10da68d96675e7a482a0f8f29e26561 Mon Sep 17 00:00:00 2001 From: Rupesh Date: Fri, 27 Feb 2026 14:45:37 -0800 Subject: [PATCH 157/189] Use local variable instead of instance variable for RTVI prepend decision Replace _rtvi_external instance variable with a local prepend_rtvi flag since it is only used during __init__ to decide whether to prepend the RTVIProcessor to the pipeline. --- src/pipecat/pipeline/task.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/pipecat/pipeline/task.py b/src/pipecat/pipeline/task.py index 1db23e7d4..eeb39c9b6 100644 --- a/src/pipecat/pipeline/task.py +++ b/src/pipecat/pipeline/task.py @@ -330,7 +330,7 @@ class PipelineTask(BasePipelineTask): # RTVI support self._rtvi = None - self._rtvi_external = False + prepend_rtvi = False external_rtvi = self._find_processor(pipeline, RTVIProcessor) external_observer_found = any(isinstance(o, RTVIObserver) for o in observers) @@ -350,10 +350,10 @@ class PipelineTask(BasePipelineTask): "They are both added by default, no need to add them yourself." ) self._rtvi = external_rtvi - self._rtvi_external = True elif enable_rtvi: self._rtvi = rtvi_processor or RTVIProcessor() observers.append(self._rtvi.create_rtvi_observer(params=rtvi_observer_params)) + prepend_rtvi = True if self._rtvi: # Automatically call RTVIProcessor.set_bot_ready() @@ -393,10 +393,7 @@ class PipelineTask(BasePipelineTask): # Only prepend the RTVIProcessor if we created it ourselves. When the # user already placed it inside their pipeline we must not insert it # again or it will appear twice in the frame chain. - if self._rtvi and not self._rtvi_external: - processors = [self._rtvi, pipeline] - else: - processors = [pipeline] + processors = [self._rtvi, pipeline] if prepend_rtvi else [pipeline] self._pipeline = Pipeline(processors, source=source, sink=sink) # The task observer acts as a proxy to the provided observers. This way, From 950a8628dca00583f3f6748c244e19fe36826ed5 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Fri, 27 Feb 2026 19:49:45 -0500 Subject: [PATCH 158/189] Miscellaneous foundational example updates --- .../07c-interruptible-deepgram-flux.py | 6 +++++- .../07g-interruptible-openai-http.py | 1 - ...o-function-calling-gemini-openai-format.py | 20 +++++++++---------- ...54b-context-summarization-manual-openai.py | 9 +-------- uv.lock | 2 +- 5 files changed, 17 insertions(+), 21 deletions(-) diff --git a/examples/foundational/07c-interruptible-deepgram-flux.py b/examples/foundational/07c-interruptible-deepgram-flux.py index e51a30c1b..d2bcceaf7 100644 --- a/examples/foundational/07c-interruptible-deepgram-flux.py +++ b/examples/foundational/07c-interruptible-deepgram-flux.py @@ -10,6 +10,7 @@ import os from dotenv import load_dotenv from loguru import logger +from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.frames.frames import LLMRunFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner @@ -72,7 +73,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): context = LLMContext(messages) user_aggregator, assistant_aggregator = LLMContextAggregatorPair( context, - user_params=LLMUserAggregatorParams(user_turn_strategies=ExternalUserTurnStrategies()), + user_params=LLMUserAggregatorParams( + user_turn_strategies=ExternalUserTurnStrategies(), + vad_analyzer=SileroVADAnalyzer(), + ), ) pipeline = Pipeline( diff --git a/examples/foundational/07g-interruptible-openai-http.py b/examples/foundational/07g-interruptible-openai-http.py index 325fd4ae4..65b2f8b9b 100644 --- a/examples/foundational/07g-interruptible-openai-http.py +++ b/examples/foundational/07g-interruptible-openai-http.py @@ -11,7 +11,6 @@ from dotenv import load_dotenv from loguru import logger from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.audio.vad.vad_analyzer import VADParams from pipecat.frames.frames import LLMRunFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner diff --git a/examples/foundational/14o-function-calling-gemini-openai-format.py b/examples/foundational/14o-function-calling-gemini-openai-format.py index c87c5278e..c3772eb2c 100644 --- a/examples/foundational/14o-function-calling-gemini-openai-format.py +++ b/examples/foundational/14o-function-calling-gemini-openai-format.py @@ -12,12 +12,15 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.frames.frames import LLMRunFrame, TTSSpeakFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport @@ -42,20 +45,14 @@ transport_params = { "daily": lambda: DailyParams( audio_in_enabled=True, audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - turn_analyzer=LocalSmartTurnAnalyzerV3(), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - turn_analyzer=LocalSmartTurnAnalyzerV3(), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - turn_analyzer=LocalSmartTurnAnalyzerV3(), ), } @@ -104,17 +101,20 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): ] context = OpenAILLMContext(messages, tools) - context_aggregator = llm.create_context_aggregator(context) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) pipeline = Pipeline( [ transport.input(), stt, - context_aggregator.user(), + user_aggregator, llm, tts, transport.output(), - context_aggregator.assistant(), + assistant_aggregator, ] ) diff --git a/examples/foundational/54b-context-summarization-manual-openai.py b/examples/foundational/54b-context-summarization-manual-openai.py index e8acf4bf1..c1ff83ef0 100644 --- a/examples/foundational/54b-context-summarization-manual-openai.py +++ b/examples/foundational/54b-context-summarization-manual-openai.py @@ -22,9 +22,7 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.audio.vad.vad_analyzer import VADParams from pipecat.frames.frames import LLMRunFrame, LLMSummarizeContextFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner @@ -121,12 +119,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): # LLMSummarizeContextFrame frames pushed via the function call are handled. user_aggregator, assistant_aggregator = LLMContextAggregatorPair( context, - user_params=LLMUserAggregatorParams( - user_turn_strategies=UserTurnStrategies( - stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] - ), - vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - ), + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), ) pipeline = Pipeline( diff --git a/uv.lock b/uv.lock index e368cfc45..a386ee81e 100644 --- a/uv.lock +++ b/uv.lock @@ -4691,7 +4691,7 @@ requires-dist = [ { name = "mlx-whisper", marker = "extra == 'mlx-whisper'", specifier = "~=0.4.2" }, { name = "nltk", specifier = ">=3.9.3,<4" }, { name = "noisereduce", marker = "extra == 'noisereduce'", specifier = "~=3.0.3" }, - { name = "numba", specifier = "==0.61.2" }, + { name = "numba", specifier = ">=0.61.2" }, { name = "numpy", specifier = ">=1.26.4,<3" }, { name = "nvidia-riva-client", marker = "extra == 'nvidia'", specifier = "~=2.21.1" }, { name = "onnxruntime", specifier = "~=1.23.2" }, From 6464230627b47225d31442cf3f95d3beb4718d50 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Fri, 27 Feb 2026 22:22:35 -0500 Subject: [PATCH 159/189] fix: use pull_request_target for docs workflow to access secrets from fork PRs The update-docs workflow intermittently failed with "Input required and not supplied: token" because pull_request events from fork PRs don't have access to repository secrets. Switching to pull_request_target runs the workflow in the base repo's context, ensuring secrets are always available. This is safe since the workflow only runs on already-merged PRs. --- .github/workflows/update-docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/update-docs.yml b/.github/workflows/update-docs.yml index 27453e74e..a9066762d 100644 --- a/.github/workflows/update-docs.yml +++ b/.github/workflows/update-docs.yml @@ -1,7 +1,7 @@ name: Update Documentation on PR Merge on: - pull_request: + pull_request_target: types: [closed] branches: [main] paths: From 9d4955054c53d80b6e005b4bb27af453b3cdcdd0 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Fri, 27 Feb 2026 22:35:29 -0500 Subject: [PATCH 160/189] Fix tracing to use ServiceSettings API instead of dict access The ServiceSettings refactor (PR #3714) changed self._settings from dicts to dataclass subclasses, but tracing code still used .items(), in containment, and subscript access, causing AttributeError on every traced call. Use given_fields() for iteration and attribute access for named fields. --- changelog/3879.changed.md | 1 + .../utils/tracing/service_attributes.py | 14 ++++++++------ .../utils/tracing/service_decorators.py | 19 +++++++------------ 3 files changed, 16 insertions(+), 18 deletions(-) create mode 100644 changelog/3879.changed.md diff --git a/changelog/3879.changed.md b/changelog/3879.changed.md new file mode 100644 index 000000000..2b69f63ce --- /dev/null +++ b/changelog/3879.changed.md @@ -0,0 +1 @@ +- Updated tracing code to use `ServiceSettings` dataclass API (`given_fields()`, attribute access) instead of dict-style access (`.items()`, `in`, subscript). diff --git a/src/pipecat/utils/tracing/service_attributes.py b/src/pipecat/utils/tracing/service_attributes.py index c8471a03b..97ac49d87 100644 --- a/src/pipecat/utils/tracing/service_attributes.py +++ b/src/pipecat/utils/tracing/service_attributes.py @@ -17,6 +17,8 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional if TYPE_CHECKING: from opentelemetry.trace import Span + from pipecat.services.settings import ServiceSettings + from pipecat.utils.tracing.setup import is_tracing_available if is_tracing_available(): @@ -68,7 +70,7 @@ def add_tts_span_attributes( model: str, voice_id: str, text: Optional[str] = None, - settings: Optional[Dict[str, Any]] = None, + settings: Optional["ServiceSettings"] = None, character_count: Optional[int] = None, operation_name: str = "tts", ttfb: Optional[float] = None, @@ -107,7 +109,7 @@ def add_tts_span_attributes( # Add settings if provided if settings: - for key, value in settings.items(): + for key, value in settings.given_fields().items(): if isinstance(value, (str, int, float, bool)): span.set_attribute(f"settings.{key}", value) @@ -126,7 +128,7 @@ def add_stt_span_attributes( is_final: Optional[bool] = None, language: Optional[str] = None, user_id: Optional[str] = None, - settings: Optional[Dict[str, Any]] = None, + settings: Optional["ServiceSettings"] = None, vad_enabled: bool = False, ttfb: Optional[float] = None, **kwargs, @@ -171,7 +173,7 @@ def add_stt_span_attributes( # Add settings if provided if settings: - for key, value in settings.items(): + for key, value in settings.given_fields().items(): if isinstance(value, (str, int, float, bool)): span.set_attribute(f"settings.{key}", value) @@ -282,7 +284,7 @@ def add_gemini_live_span_attributes( voice_id: Optional[str] = None, language: Optional[str] = None, modalities: Optional[str] = None, - settings: Optional[Dict[str, Any]] = None, + settings: Optional["ServiceSettings"] = None, tools: Optional[List[Dict]] = None, tools_serialized: Optional[str] = None, transcript: Optional[str] = None, @@ -359,7 +361,7 @@ def add_gemini_live_span_attributes( # Add settings if provided if settings: - for key, value in settings.items(): + for key, value in settings.given_fields().items(): if isinstance(value, (str, int, float, bool)): span.set_attribute(f"settings.{key}", value) elif key == "vad" and value: diff --git a/src/pipecat/utils/tracing/service_decorators.py b/src/pipecat/utils/tracing/service_decorators.py index 601cad53d..304ecb5e8 100644 --- a/src/pipecat/utils/tracing/service_decorators.py +++ b/src/pipecat/utils/tracing/service_decorators.py @@ -219,7 +219,7 @@ def traced_tts(func: Optional[Callable] = None, *, name: Optional[str] = None) - tracer = trace.get_tracer("pipecat") with tracer.start_as_current_span(span_name, context=parent_context) as span: try: - settings = getattr(self, "_settings", {}) + settings = getattr(self, "_settings", None) add_tts_span_attributes( span=span, service_name=service_class_name, @@ -338,7 +338,7 @@ def traced_stt(func: Optional[Callable] = None, *, name: Optional[str] = None) - ) # Use settings from the service if available - settings = getattr(self, "_settings", {}) + settings = getattr(self, "_settings", None) add_stt_span_attributes( span=current_span, @@ -510,15 +510,10 @@ def traced_llm(func: Optional[Callable] = None, *, name: Optional[str] = None) - # Get settings from the service params = {} if hasattr(self, "_settings"): - for key, value in self._settings.items(): - if key == "extra": - continue - # Add value directly if it's a basic type + for key, value in self._settings.given_fields().items(): if isinstance(value, (int, float, bool, str)): params[key] = value - elif value is None or ( - hasattr(value, "__name__") and value.__name__ == "NOT_GIVEN" - ): + elif value is None: params[key] = "NOT_GIVEN" # Add all available attributes to the span @@ -627,12 +622,12 @@ def traced_gemini_live(operation: str) -> Callable: model_name = _get_model_name(self) voice_id = getattr(self, "_voice_id", None) language_code = getattr(self, "_language_code", None) - settings = getattr(self, "_settings", {}) + settings = getattr(self, "_settings", None) # Get modalities if available modalities = None - if hasattr(self, "_settings") and "modalities" in self._settings: - modality_obj = self._settings["modalities"] + if settings and hasattr(settings, "modalities"): + modality_obj = settings.modalities if hasattr(modality_obj, "value"): modalities = modality_obj.value else: From f37fd39cdbcbe87ef4c528a2c46304c7bac3b08e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Sat, 28 Feb 2026 17:11:07 -0800 Subject: [PATCH 161/189] Add optional direction parameter to PipelineTask.queue_frame() and queue_frames() Allow pushing frames upstream through the pipeline by passing FrameDirection.UPSTREAM. Downstream frames use the existing push queue, while upstream frames are pushed directly from the pipeline sink. --- src/pipecat/pipeline/task.py | 35 ++++++++++---- src/pipecat/processors/frame_processor.py | 2 +- tests/test_pipeline.py | 57 +++++++++++++++++++++++ 3 files changed, 84 insertions(+), 10 deletions(-) diff --git a/src/pipecat/pipeline/task.py b/src/pipecat/pipeline/task.py index eeb39c9b6..deae6290c 100644 --- a/src/pipecat/pipeline/task.py +++ b/src/pipecat/pipeline/task.py @@ -389,12 +389,12 @@ class PipelineTask(BasePipelineTask): # source allows us to receive and react to upstream frames, and the sink # allows us to receive and react to downstream frames. source = PipelineSource(self._source_push_frame, name=f"{self}::Source") - sink = PipelineSink(self._sink_push_frame, name=f"{self}::Sink") + self._sink = PipelineSink(self._sink_push_frame, name=f"{self}::Sink") # Only prepend the RTVIProcessor if we created it ourselves. When the # user already placed it inside their pipeline we must not insert it # again or it will appear twice in the frame chain. processors = [self._rtvi, pipeline] if prepend_rtvi else [pipeline] - self._pipeline = Pipeline(processors, source=source, sink=sink) + self._pipeline = Pipeline(processors, source=source, sink=self._sink) # The task observer acts as a proxy to the provided observers. This way, # we only need to pass a single observer (using the StartFrame) which @@ -625,26 +625,43 @@ class PipelineTask(BasePipelineTask): self._finished = True logger.debug(f"Pipeline task {self} has finished") - async def queue_frame(self, frame: Frame): - """Queue a single frame to be pushed down the pipeline. + async def queue_frame( + self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM + ): + """Queue a single frame to be pushed through the pipeline. + + Downstream frames are pushed from the beginning of the pipeline. + Upstream frames are pushed from the end of the pipeline. Args: frame: The frame to be processed. + direction: The direction to push the frame. Defaults to downstream. """ - await self._push_queue.put(frame) + if direction == FrameDirection.DOWNSTREAM: + await self._push_queue.put(frame) + else: + await self._sink.queue_frame(frame, direction) - async def queue_frames(self, frames: Iterable[Frame] | AsyncIterable[Frame]): - """Queues multiple frames to be pushed down the pipeline. + async def queue_frames( + self, + frames: Iterable[Frame] | AsyncIterable[Frame], + direction: FrameDirection = FrameDirection.DOWNSTREAM, + ): + """Queue multiple frames to be pushed through the pipeline. + + Downstream frames are pushed from the beginning of the pipeline. + Upstream frames are pushed from the end of the pipeline. Args: frames: An iterable or async iterable of frames to be processed. + direction: The direction to push the frames. Defaults to downstream. """ if isinstance(frames, AsyncIterable): async for frame in frames: - await self.queue_frame(frame) + await self.queue_frame(frame, direction) elif isinstance(frames, Iterable): for frame in frames: - await self.queue_frame(frame) + await self.queue_frame(frame, direction) async def _cancel(self, *, reason: Optional[str] = None): """Internal cancellation logic for the pipeline task. diff --git a/src/pipecat/processors/frame_processor.py b/src/pipecat/processors/frame_processor.py index 3e90968fe..3e7b48442 100644 --- a/src/pipecat/processors/frame_processor.py +++ b/src/pipecat/processors/frame_processor.py @@ -965,7 +965,7 @@ class FrameProcessor(BaseObject): try: timestamp = self._clock.get_time() if self._clock else 0 if direction == FrameDirection.DOWNSTREAM and self._next: - logger.trace(f"Pushing {frame} from {self} to {self._next}") + logger.trace(f"Pushing {frame} downstream from {self} to {self._next}") if self._observer: data = FramePushed( diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 71121a3fc..04601bf14 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -292,6 +292,63 @@ class TestPipelineTask(unittest.IsolatedAsyncioTestCase): assert upstream_received assert downstream_received + async def test_task_queue_frame_upstream(self): + upstream_received = False + + pipeline = Pipeline([IdentityFilter()]) + task = PipelineTask(pipeline, cancel_on_idle_timeout=False) + task.set_reached_upstream_filter((TextFrame,)) + + @task.event_handler("on_frame_reached_upstream") + async def on_frame_reached_upstream(task, frame): + nonlocal upstream_received + if isinstance(frame, TextFrame) and frame.text == "Hello Upstream!": + upstream_received = True + + @task.event_handler("on_pipeline_started") + async def on_pipeline_started(task, frame): + await task.queue_frame(TextFrame(text="Hello Upstream!"), FrameDirection.UPSTREAM) + + try: + await asyncio.wait_for( + task.run(PipelineTaskParams(loop=asyncio.get_event_loop())), + timeout=1.0, + ) + except asyncio.TimeoutError: + pass + + assert upstream_received + + async def test_task_queue_frames_upstream(self): + upstream_texts = [] + + pipeline = Pipeline([IdentityFilter()]) + task = PipelineTask(pipeline, cancel_on_idle_timeout=False) + task.set_reached_upstream_filter((TextFrame,)) + + @task.event_handler("on_frame_reached_upstream") + async def on_frame_reached_upstream(task, frame): + if isinstance(frame, TextFrame): + upstream_texts.append(frame.text) + + @task.event_handler("on_pipeline_started") + async def on_pipeline_started(task, frame): + await task.queue_frames( + [TextFrame(text="First"), TextFrame(text="Second")], + FrameDirection.UPSTREAM, + ) + + try: + await asyncio.wait_for( + task.run(PipelineTaskParams(loop=asyncio.get_event_loop())), + timeout=1.0, + ) + except asyncio.TimeoutError: + pass + + assert "First" in upstream_texts + assert "Second" in upstream_texts + async def test_task_heartbeats(self): heartbeats_counter = 0 From 94a59de4e1d28f8d9af0cd82ff0344fbfe9656a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Sat, 28 Feb 2026 17:12:06 -0800 Subject: [PATCH 162/189] Add changelog for #3883 --- changelog/3883.added.md | 1 + uv.lock | 1221 +++++++++++++++++++++------------------ 2 files changed, 675 insertions(+), 547 deletions(-) create mode 100644 changelog/3883.added.md diff --git a/changelog/3883.added.md b/changelog/3883.added.md new file mode 100644 index 000000000..84360a891 --- /dev/null +++ b/changelog/3883.added.md @@ -0,0 +1 @@ +- Added optional `direction` parameter to `PipelineTask.queue_frame()` and `PipelineTask.queue_frames()`, allowing frames to be pushed upstream from the end of the pipeline. diff --git a/uv.lock b/uv.lock index e368cfc45..49cfa089b 100644 --- a/uv.lock +++ b/uv.lock @@ -15,7 +15,8 @@ version = "1.10.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "huggingface-hub" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "packaging" }, { name = "psutil" }, { name = "pyyaml" }, @@ -41,7 +42,8 @@ name = "aic-sdk" version = "2.0.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/68/c6/1f0b3d3d226c6d19ec654fdaea7859ee9931e0286735385b1f9ea4bcfba1/aic_sdk-2.0.1.tar.gz", hash = "sha256:2480d8398a26639ed7fb5175c37da82cf5e6b1138a1a301938cd8491fe461c20", size = 73091, upload-time = "2026-01-23T23:38:15.77Z" } wheels = [ @@ -663,7 +665,7 @@ wheels = [ [[package]] name = "camb-sdk" -version = "1.5.8" +version = "1.5.9" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "httpx" }, @@ -672,9 +674,9 @@ dependencies = [ { name = "websocket-client" }, { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6c/f9/4d3f62909f62f98556e09958f40934abf226289f55a43e149dfc426dc1cf/camb_sdk-1.5.8.tar.gz", hash = "sha256:4ace563accb6aab35d2a4dce53789c98d8809a8c48806a69d0873fc8b0361300", size = 83508, upload-time = "2026-01-27T14:55:49.16Z" } +sdist = { url = "https://files.pythonhosted.org/packages/86/29/17527519a72ed1592f28a4d380fd50ed72978ac38148efc0f9e796504496/camb_sdk-1.5.9.tar.gz", hash = "sha256:c8daaa8eea20c94523ffddd2aa630a902932f78ea8af37e140603e52ff0025ad", size = 83521, upload-time = "2026-02-27T22:57:18.283Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1d/2d/e7aeef5d5f48205020d153f4a6ffb39d8971fca78b2cc64fdf0a36ceeb12/camb_sdk-1.5.8-py3-none-any.whl", hash = "sha256:7e1a4764376791ab7cccc27014cdfb691b8c73eecdcaeb01457f506ffd3425be", size = 152371, upload-time = "2026-01-27T14:55:45.637Z" }, + { url = "https://files.pythonhosted.org/packages/fc/2a/b759c32c60c51f33ceb299b52f8f73348773cd75d3177a15eefc25b2dee9/camb_sdk-1.5.9-py3-none-any.whl", hash = "sha256:8c3fe9d05adee1d8de121eb6f1ee0a37e913f072d89c11ed3399746a9b69adbc", size = 152395, upload-time = "2026-02-27T22:57:14.137Z" }, ] [[package]] @@ -714,11 +716,11 @@ wheels = [ [[package]] name = "certifi" -version = "2026.1.4" +version = "2026.2.25" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268, upload-time = "2026-01-04T02:42:41.825Z" } +sdist = { url = "https://files.pythonhosted.org/packages/af/2d/7bf41579a8986e348fa033a31cdd0e4121114f6bce2457e8876010b092dd/certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7", size = 155029, upload-time = "2026-02-25T02:54:17.342Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" }, + { url = "https://files.pythonhosted.org/packages/9a/3c/c17fb3ca2d9c3acff52e30b309f538586f9f5b9c9cf454f3845fc9af4881/certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa", size = 153684, upload-time = "2026-02-25T02:54:15.766Z" }, ] [[package]] @@ -942,7 +944,7 @@ resolution-markers = [ "python_full_version < '3.11'", ] dependencies = [ - { name = "numpy", marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/66/54/eb9bfc647b19f2009dd5c7f5ec51c4e6ca831725f1aea7a993034f483147/contourpy-1.3.2.tar.gz", hash = "sha256:b6945942715a034c671b7fc54f9588126b0b8bf23db2696e3ca8328f3ff0ab54", size = 13466130, upload-time = "2025-04-15T17:47:53.79Z" } wheels = [ @@ -1015,7 +1017,7 @@ resolution-markers = [ "python_full_version == '3.11.*'", ] dependencies = [ - { name = "numpy", marker = "python_full_version >= '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/58/01/1253e6698a07380cd31a736d248a3f2a50a7c88779a1813da27503cadc2a/contourpy-1.3.3.tar.gz", hash = "sha256:083e12155b210502d0bca491432bb04d56dc3432f95a979b429f2848c3dbe880", size = 13466174, upload-time = "2025-07-26T12:03:12.549Z" } wheels = [ @@ -1099,7 +1101,8 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "attrs" }, { name = "cattrs" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "packaging" }, { name = "protobuf" }, { name = "pyaml" }, @@ -1273,7 +1276,8 @@ name = "ctranslate2" version = "4.7.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "pyyaml" }, { name = "setuptools" }, ] @@ -1329,10 +1333,10 @@ wheels = [ [[package]] name = "cuda-pathfinder" -version = "1.3.4" +version = "1.4.0" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b8/5e/db279a3bfbd18d59d0598922a3b3c1454908d0969e8372260afec9736376/cuda_pathfinder-1.3.4-py3-none-any.whl", hash = "sha256:fb983f6e0d43af27ef486e14d5989b5f904ef45cedf40538bfdcbffa6bb01fb2", size = 30878, upload-time = "2026-02-11T18:50:31.008Z" }, + { url = "https://files.pythonhosted.org/packages/ff/60/d8f1dbfb7f06b94c662e98c95189e6f39b817da638bc8fcea0d003f89e5d/cuda_pathfinder-1.4.0-py3-none-any.whl", hash = "sha256:437079ca59e7b61ae439ecc501d69ed87b3accc34d58153ef1e54815e2c2e118", size = 38406, upload-time = "2026-02-25T22:13:00.807Z" }, ] [[package]] @@ -1573,7 +1577,7 @@ all = [ [[package]] name = "fastapi-cli" -version = "0.0.23" +version = "0.0.24" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "rich-toolkit" }, @@ -1581,9 +1585,9 @@ dependencies = [ { name = "typer" }, { name = "uvicorn", extra = ["standard"] }, ] -sdist = { url = "https://files.pythonhosted.org/packages/71/9f/cbd463e57de4e977b8ea0403f95347f9150441568b1d3fe3e4949ef80ef3/fastapi_cli-0.0.23.tar.gz", hash = "sha256:210ac280ea41e73aac5a57688781256beb23c2cba3a41266896fa43e6445c8e7", size = 19763, upload-time = "2026-02-16T19:45:53.358Z" } +sdist = { url = "https://files.pythonhosted.org/packages/6e/58/74797ae9e4610cfa0c6b34c8309096d3b20bb29be3b8b5fbf1004d10fa5f/fastapi_cli-0.0.24.tar.gz", hash = "sha256:1afc9c9e21d7ebc8a3ca5e31790cd8d837742be7e4f8b9236e99cb3451f0de00", size = 19043, upload-time = "2026-02-24T10:45:10.476Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/68/89/19dcfd5cd289b306abdcabac68b88a4f54b7710a2c33adc16a337ecdcdfa/fastapi_cli-0.0.23-py3-none-any.whl", hash = "sha256:7e9634fc212da0b6cfc75bd3ac366cc9dfdb43b5e9ec12e58bfd1acdd2697f25", size = 12305, upload-time = "2026-02-16T19:45:52.554Z" }, + { url = "https://files.pythonhosted.org/packages/c7/4b/68f9fe268e535d79c76910519530026a4f994ce07189ac0dded45c6af825/fastapi_cli-0.0.24-py3-none-any.whl", hash = "sha256:4a1f78ed798f106b4fee85ca93b85d8fe33c0a3570f775964d37edb80b8f0edc", size = 12304, upload-time = "2026-02-24T10:45:09.552Z" }, ] [package.optional-dependencies] @@ -1594,7 +1598,7 @@ standard = [ [[package]] name = "fastapi-cloud-cli" -version = "0.13.0" +version = "0.14.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "fastar" }, @@ -1606,9 +1610,9 @@ dependencies = [ { name = "typer" }, { name = "uvicorn", extra = ["standard"] }, ] -sdist = { url = "https://files.pythonhosted.org/packages/de/0b/f07f4976784978ef159fd2e8f5c16f1f9d610578fb1fd976ff1315c11ea6/fastapi_cloud_cli-0.13.0.tar.gz", hash = "sha256:4d8f42337e8021c648f6cb0672de7d5b31b0fc7387a83d7b12f974600ac3f2fd", size = 38436, upload-time = "2026-02-17T05:18:19.033Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2b/eb/e78ebd05a714c62a0578cdce4339cb6cd138421a7d865fbddedd7242420b/fastapi_cloud_cli-0.14.0.tar.gz", hash = "sha256:d3ecb8c942685a71df0af7bd59f463b5eff76f5818b48e5a03c6159726831e68", size = 39822, upload-time = "2026-02-25T14:19:53.535Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b4/88/71a1e989d17b9edb483f32e28b7891ffdd3005271518c98ba6415987c430/fastapi_cloud_cli-0.13.0-py3-none-any.whl", hash = "sha256:874a9ed8dba34ec828f198c72de9f9a38de77ac1b15083d6bc3a4d772b0bc477", size = 27631, upload-time = "2026-02-17T05:18:18.094Z" }, + { url = "https://files.pythonhosted.org/packages/d9/18/7bf922ee0b6a737a9d88cf613182ecd6031f52298da893556f158eba763f/fastapi_cloud_cli-0.14.0-py3-none-any.whl", hash = "sha256:325fcb4b45e661184152da6db861d9fb718739fbcd561a4d334dbe78c026586f", size = 28350, upload-time = "2026-02-25T14:19:52.416Z" }, ] [[package]] @@ -2070,10 +2074,9 @@ wheels = [ [[package]] name = "google-genai" -version = "1.64.0" +version = "1.65.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "aiohttp" }, { name = "anyio" }, { name = "distro" }, { name = "google-auth", extra = ["requests"] }, @@ -2085,9 +2088,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/bc/14/344b450d4387845fc5c8b7f168ffbe734b831b729ece3333fc0fe8556f04/google_genai-1.64.0.tar.gz", hash = "sha256:8db94ab031f745d08c45c69674d1892f7447c74ed21542abe599f7888e28b924", size = 496434, upload-time = "2026-02-19T02:06:13.95Z" } +sdist = { url = "https://files.pythonhosted.org/packages/79/f9/cc1191c2540d6a4e24609a586c4ed45d2db57cfef47931c139ee70e5874a/google_genai-1.65.0.tar.gz", hash = "sha256:d470eb600af802d58a79c7f13342d9ea0d05d965007cae8f76c7adff3d7a4750", size = 497206, upload-time = "2026-02-26T00:20:33.824Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/54/56/765eca90c781fedbe2a7e7dc873ef6045048e28ba5f2d4a5bcb13e13062b/google_genai-1.64.0-py3-none-any.whl", hash = "sha256:78a4d2deeb33b15ad78eaa419f6f431755e7f0e03771254f8000d70f717e940b", size = 728836, upload-time = "2026-02-19T02:06:11.655Z" }, + { url = "https://files.pythonhosted.org/packages/68/3c/3fea4e7c91357c71782d7dcaad7a2577d636c90317e003386893c25bc62c/google_genai-1.65.0-py3-none-any.whl", hash = "sha256:68c025205856919bc03edb0155c11b4b833810b7ce17ad4b7a9eeba5158f6c44", size = 724429, upload-time = "2026-02-26T00:20:32.186Z" }, ] [[package]] @@ -2111,7 +2114,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/38/3f/9859f655d11901e7b2996c6e3d33e0caa9a1d4572c3bc61ed0faa64b2f4c/greenlet-3.3.2-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9bc885b89709d901859cf95179ec9f6bb67a3d2bb1f0e88456461bd4b7f8fd0d", size = 277747, upload-time = "2026-02-20T20:16:21.325Z" }, { url = "https://files.pythonhosted.org/packages/fb/07/cb284a8b5c6498dbd7cba35d31380bb123d7dceaa7907f606c8ff5993cbf/greenlet-3.3.2-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b568183cf65b94919be4438dc28416b234b678c608cafac8874dfeeb2a9bbe13", size = 579202, upload-time = "2026-02-20T20:47:28.955Z" }, { url = "https://files.pythonhosted.org/packages/ed/45/67922992b3a152f726163b19f890a85129a992f39607a2a53155de3448b8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:527fec58dc9f90efd594b9b700662ed3fb2493c2122067ac9c740d98080a620e", size = 590620, upload-time = "2026-02-20T20:55:55.581Z" }, - { url = "https://files.pythonhosted.org/packages/03/5f/6e2a7d80c353587751ef3d44bb947f0565ec008a2e0927821c007e96d3a7/greenlet-3.3.2-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:508c7f01f1791fbc8e011bd508f6794cb95397fdb198a46cb6635eb5b78d85a7", size = 602132, upload-time = "2026-02-20T21:02:43.261Z" }, { url = "https://files.pythonhosted.org/packages/ad/55/9f1ebb5a825215fadcc0f7d5073f6e79e3007e3282b14b22d6aba7ca6cb8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ad0c8917dd42a819fe77e6bdfcb84e3379c0de956469301d9fd36427a1ca501f", size = 591729, upload-time = "2026-02-20T20:20:58.395Z" }, { url = "https://files.pythonhosted.org/packages/24/b4/21f5455773d37f94b866eb3cf5caed88d6cea6dd2c6e1f9c34f463cba3ec/greenlet-3.3.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:97245cc10e5515dbc8c3104b2928f7f02b6813002770cfaffaf9a6e0fc2b94ef", size = 1551946, upload-time = "2026-02-20T20:49:31.102Z" }, { url = "https://files.pythonhosted.org/packages/00/68/91f061a926abead128fe1a87f0b453ccf07368666bd59ffa46016627a930/greenlet-3.3.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8c1fdd7d1b309ff0da81d60a9688a8bd044ac4e18b250320a96fc68d31c209ca", size = 1618494, upload-time = "2026-02-20T20:21:06.541Z" }, @@ -2119,7 +2121,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f3/47/16400cb42d18d7a6bb46f0626852c1718612e35dcb0dffa16bbaffdf5dd2/greenlet-3.3.2-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:c56692189a7d1c7606cb794be0a8381470d95c57ce5be03fb3d0ef57c7853b86", size = 278890, upload-time = "2026-02-20T20:19:39.263Z" }, { url = "https://files.pythonhosted.org/packages/a3/90/42762b77a5b6aa96cd8c0e80612663d39211e8ae8a6cd47c7f1249a66262/greenlet-3.3.2-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ebd458fa8285960f382841da585e02201b53a5ec2bac6b156fc623b5ce4499f", size = 581120, upload-time = "2026-02-20T20:47:30.161Z" }, { url = "https://files.pythonhosted.org/packages/bf/6f/f3d64f4fa0a9c7b5c5b3c810ff1df614540d5aa7d519261b53fba55d4df9/greenlet-3.3.2-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a443358b33c4ec7b05b79a7c8b466f5d275025e750298be7340f8fc63dff2a55", size = 594363, upload-time = "2026-02-20T20:55:56.965Z" }, - { url = "https://files.pythonhosted.org/packages/9c/8b/1430a04657735a3f23116c2e0d5eb10220928846e4537a938a41b350bed6/greenlet-3.3.2-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4375a58e49522698d3e70cc0b801c19433021b5c37686f7ce9c65b0d5c8677d2", size = 605046, upload-time = "2026-02-20T21:02:45.234Z" }, { url = "https://files.pythonhosted.org/packages/72/83/3e06a52aca8128bdd4dcd67e932b809e76a96ab8c232a8b025b2850264c5/greenlet-3.3.2-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e2cd90d413acbf5e77ae41e5d3c9b3ac1d011a756d7284d7f3f2b806bbd6358", size = 594156, upload-time = "2026-02-20T20:20:59.955Z" }, { url = "https://files.pythonhosted.org/packages/70/79/0de5e62b873e08fe3cef7dbe84e5c4bc0e8ed0c7ff131bccb8405cd107c8/greenlet-3.3.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:442b6057453c8cb29b4fb36a2ac689382fc71112273726e2423f7f17dc73bf99", size = 1554649, upload-time = "2026-02-20T20:49:32.293Z" }, { url = "https://files.pythonhosted.org/packages/5a/00/32d30dee8389dc36d42170a9c66217757289e2afb0de59a3565260f38373/greenlet-3.3.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:45abe8eb6339518180d5a7fa47fa01945414d7cca5ecb745346fc6a87d2750be", size = 1619472, upload-time = "2026-02-20T20:21:07.966Z" }, @@ -2128,7 +2129,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ea/ab/1608e5a7578e62113506740b88066bf09888322a311cff602105e619bd87/greenlet-3.3.2-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:ac8d61d4343b799d1e526db579833d72f23759c71e07181c2d2944e429eb09cd", size = 280358, upload-time = "2026-02-20T20:17:43.971Z" }, { url = "https://files.pythonhosted.org/packages/a5/23/0eae412a4ade4e6623ff7626e38998cb9b11e9ff1ebacaa021e4e108ec15/greenlet-3.3.2-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ceec72030dae6ac0c8ed7591b96b70410a8be370b6a477b1dbc072856ad02bd", size = 601217, upload-time = "2026-02-20T20:47:31.462Z" }, { url = "https://files.pythonhosted.org/packages/f8/16/5b1678a9c07098ecb9ab2dd159fafaf12e963293e61ee8d10ecb55273e5e/greenlet-3.3.2-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a2a5be83a45ce6188c045bcc44b0ee037d6a518978de9a5d97438548b953a1ac", size = 611792, upload-time = "2026-02-20T20:55:58.423Z" }, - { url = "https://files.pythonhosted.org/packages/5c/c5/cc09412a29e43406eba18d61c70baa936e299bc27e074e2be3806ed29098/greenlet-3.3.2-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ae9e21c84035c490506c17002f5c8ab25f980205c3e61ddb3a2a2a2e6c411fcb", size = 626250, upload-time = "2026-02-20T21:02:46.596Z" }, { url = "https://files.pythonhosted.org/packages/50/1f/5155f55bd71cabd03765a4aac9ac446be129895271f73872c36ebd4b04b6/greenlet-3.3.2-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43e99d1749147ac21dde49b99c9abffcbc1e2d55c67501465ef0930d6e78e070", size = 613875, upload-time = "2026-02-20T20:21:01.102Z" }, { url = "https://files.pythonhosted.org/packages/fc/dd/845f249c3fcd69e32df80cdab059b4be8b766ef5830a3d0aa9d6cad55beb/greenlet-3.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4c956a19350e2c37f2c48b336a3afb4bff120b36076d9d7fb68cb44e05d95b79", size = 1571467, upload-time = "2026-02-20T20:49:33.495Z" }, { url = "https://files.pythonhosted.org/packages/2a/50/2649fe21fcc2b56659a452868e695634722a6655ba245d9f77f5656010bf/greenlet-3.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6c6f8ba97d17a1e7d664151284cb3315fc5f8353e75221ed4324f84eb162b395", size = 1640001, upload-time = "2026-02-20T20:21:09.154Z" }, @@ -2137,7 +2137,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ac/48/f8b875fa7dea7dd9b33245e37f065af59df6a25af2f9561efa8d822fde51/greenlet-3.3.2-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:aa6ac98bdfd716a749b84d4034486863fd81c3abde9aa3cf8eff9127981a4ae4", size = 279120, upload-time = "2026-02-20T20:19:01.9Z" }, { url = "https://files.pythonhosted.org/packages/49/8d/9771d03e7a8b1ee456511961e1b97a6d77ae1dea4a34a5b98eee706689d3/greenlet-3.3.2-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab0c7e7901a00bc0a7284907273dc165b32e0d109a6713babd04471327ff7986", size = 603238, upload-time = "2026-02-20T20:47:32.873Z" }, { url = "https://files.pythonhosted.org/packages/59/0e/4223c2bbb63cd5c97f28ffb2a8aee71bdfb30b323c35d409450f51b91e3e/greenlet-3.3.2-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d248d8c23c67d2291ffd47af766e2a3aa9fa1c6703155c099feb11f526c63a92", size = 614219, upload-time = "2026-02-20T20:55:59.817Z" }, - { url = "https://files.pythonhosted.org/packages/94/2b/4d012a69759ac9d77210b8bfb128bc621125f5b20fc398bce3940d036b1c/greenlet-3.3.2-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ccd21bb86944ca9be6d967cf7691e658e43417782bce90b5d2faeda0ff78a7dd", size = 628268, upload-time = "2026-02-20T21:02:48.024Z" }, { url = "https://files.pythonhosted.org/packages/7a/34/259b28ea7a2a0c904b11cd36c79b8cef8019b26ee5dbe24e73b469dea347/greenlet-3.3.2-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6997d360a4e6a4e936c0f9625b1c20416b8a0ea18a8e19cabbefc712e7397ab", size = 616774, upload-time = "2026-02-20T20:21:02.454Z" }, { url = "https://files.pythonhosted.org/packages/0a/03/996c2d1689d486a6e199cb0f1cf9e4aa940c500e01bdf201299d7d61fa69/greenlet-3.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:64970c33a50551c7c50491671265d8954046cb6e8e2999aacdd60e439b70418a", size = 1571277, upload-time = "2026-02-20T20:49:34.795Z" }, { url = "https://files.pythonhosted.org/packages/d9/c4/2570fc07f34a39f2caf0bf9f24b0a1a0a47bc2e8e465b2c2424821389dfc/greenlet-3.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1a9172f5bf6bd88e6ba5a84e0a68afeac9dc7b6b412b245dd64f52d83c81e55b", size = 1640455, upload-time = "2026-02-20T20:21:10.261Z" }, @@ -2146,7 +2145,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3f/ae/8bffcbd373b57a5992cd077cbe8858fff39110480a9d50697091faea6f39/greenlet-3.3.2-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:8d1658d7291f9859beed69a776c10822a0a799bc4bfe1bd4272bb60e62507dab", size = 279650, upload-time = "2026-02-20T20:18:00.783Z" }, { url = "https://files.pythonhosted.org/packages/d1/c0/45f93f348fa49abf32ac8439938726c480bd96b2a3c6f4d949ec0124b69f/greenlet-3.3.2-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18cb1b7337bca281915b3c5d5ae19f4e76d35e1df80f4ad3c1a7be91fadf1082", size = 650295, upload-time = "2026-02-20T20:47:34.036Z" }, { url = "https://files.pythonhosted.org/packages/b3/de/dd7589b3f2b8372069ab3e4763ea5329940fc7ad9dcd3e272a37516d7c9b/greenlet-3.3.2-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c2e47408e8ce1c6f1ceea0dffcdf6ebb85cc09e55c7af407c99f1112016e45e9", size = 662163, upload-time = "2026-02-20T20:56:01.295Z" }, - { url = "https://files.pythonhosted.org/packages/cd/ac/85804f74f1ccea31ba518dcc8ee6f14c79f73fe36fa1beba38930806df09/greenlet-3.3.2-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e3cb43ce200f59483eb82949bf1835a99cf43d7571e900d7c8d5c62cdf25d2f9", size = 675371, upload-time = "2026-02-20T21:02:49.664Z" }, { url = "https://files.pythonhosted.org/packages/d2/d8/09bfa816572a4d83bccd6750df1926f79158b1c36c5f73786e26dbe4ee38/greenlet-3.3.2-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63d10328839d1973e5ba35e98cccbca71b232b14051fd957b6f8b6e8e80d0506", size = 664160, upload-time = "2026-02-20T20:21:04.015Z" }, { url = "https://files.pythonhosted.org/packages/48/cf/56832f0c8255d27f6c35d41b5ec91168d74ec721d85f01a12131eec6b93c/greenlet-3.3.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8e4ab3cfb02993c8cc248ea73d7dae6cec0253e9afa311c9b37e603ca9fad2ce", size = 1619181, upload-time = "2026-02-20T20:49:36.052Z" }, { url = "https://files.pythonhosted.org/packages/0a/23/b90b60a4aabb4cec0796e55f25ffbfb579a907c3898cd2905c8918acaa16/greenlet-3.3.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:94ad81f0fd3c0c0681a018a976e5c2bd2ca2d9d94895f23e7bb1af4e8af4e2d5", size = 1687713, upload-time = "2026-02-20T20:21:11.684Z" }, @@ -2155,7 +2153,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/98/6d/8f2ef704e614bcf58ed43cfb8d87afa1c285e98194ab2cfad351bf04f81e/greenlet-3.3.2-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:e26e72bec7ab387ac80caa7496e0f908ff954f31065b0ffc1f8ecb1338b11b54", size = 286617, upload-time = "2026-02-20T20:19:29.856Z" }, { url = "https://files.pythonhosted.org/packages/5e/0d/93894161d307c6ea237a43988f27eba0947b360b99ac5239ad3fe09f0b47/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b466dff7a4ffda6ca975979bab80bdadde979e29fc947ac3be4451428d8b0e4", size = 655189, upload-time = "2026-02-20T20:47:35.742Z" }, { url = "https://files.pythonhosted.org/packages/f5/2c/d2d506ebd8abcb57386ec4f7ba20f4030cbe56eae541bc6fd6ef399c0b41/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b8bddc5b73c9720bea487b3bffdb1840fe4e3656fba3bd40aa1489e9f37877ff", size = 658225, upload-time = "2026-02-20T20:56:02.527Z" }, - { url = "https://files.pythonhosted.org/packages/d1/67/8197b7e7e602150938049d8e7f30de1660cfb87e4c8ee349b42b67bdb2e1/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:59b3e2c40f6706b05a9cd299c836c6aa2378cabe25d021acd80f13abf81181cf", size = 666581, upload-time = "2026-02-20T21:02:51.526Z" }, { url = "https://files.pythonhosted.org/packages/8e/30/3a09155fbf728673a1dea713572d2d31159f824a37c22da82127056c44e4/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b26b0f4428b871a751968285a1ac9648944cea09807177ac639b030bddebcea4", size = 657907, upload-time = "2026-02-20T20:21:05.259Z" }, { url = "https://files.pythonhosted.org/packages/f3/fd/d05a4b7acd0154ed758797f0a43b4c0962a843bedfe980115e842c5b2d08/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1fb39a11ee2e4d94be9a76671482be9398560955c9e568550de0224e41104727", size = 1618857, upload-time = "2026-02-20T20:49:37.309Z" }, { url = "https://files.pythonhosted.org/packages/6f/e1/50ee92a5db521de8f35075b5eff060dd43d39ebd46c2181a2042f7070385/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:20154044d9085151bc309e7689d6f7ba10027f8f5a8c0676ad398b951913d89e", size = 1680010, upload-time = "2026-02-20T20:21:13.427Z" }, @@ -2310,31 +2307,34 @@ wheels = [ [[package]] name = "hf-xet" -version = "1.2.0" +version = "1.3.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5e/6e/0f11bacf08a67f7fb5ee09740f2ca54163863b07b70d579356e9222ce5d8/hf_xet-1.2.0.tar.gz", hash = "sha256:a8c27070ca547293b6890c4bf389f713f80e8c478631432962bb7f4bc0bd7d7f", size = 506020, upload-time = "2025-10-24T19:04:32.129Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8b/cb/9bb543bd987ffa1ee48202cc96a756951b734b79a542335c566148ade36c/hf_xet-1.3.2.tar.gz", hash = "sha256:e130ee08984783d12717444e538587fa2119385e5bd8fc2bb9f930419b73a7af", size = 643646, upload-time = "2026-02-27T17:26:08.051Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9e/a5/85ef910a0aa034a2abcfadc360ab5ac6f6bc4e9112349bd40ca97551cff0/hf_xet-1.2.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:ceeefcd1b7aed4956ae8499e2199607765fbd1c60510752003b6cc0b8413b649", size = 2861870, upload-time = "2025-10-24T19:04:11.422Z" }, - { url = "https://files.pythonhosted.org/packages/ea/40/e2e0a7eb9a51fe8828ba2d47fe22a7e74914ea8a0db68a18c3aa7449c767/hf_xet-1.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b70218dd548e9840224df5638fdc94bd033552963cfa97f9170829381179c813", size = 2717584, upload-time = "2025-10-24T19:04:09.586Z" }, - { url = "https://files.pythonhosted.org/packages/a5/7d/daf7f8bc4594fdd59a8a596f9e3886133fdc68e675292218a5e4c1b7e834/hf_xet-1.2.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d40b18769bb9a8bc82a9ede575ce1a44c75eb80e7375a01d76259089529b5dc", size = 3315004, upload-time = "2025-10-24T19:04:00.314Z" }, - { url = "https://files.pythonhosted.org/packages/b1/ba/45ea2f605fbf6d81c8b21e4d970b168b18a53515923010c312c06cd83164/hf_xet-1.2.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:cd3a6027d59cfb60177c12d6424e31f4b5ff13d8e3a1247b3a584bf8977e6df5", size = 3222636, upload-time = "2025-10-24T19:03:58.111Z" }, - { url = "https://files.pythonhosted.org/packages/4a/1d/04513e3cab8f29ab8c109d309ddd21a2705afab9d52f2ba1151e0c14f086/hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6de1fc44f58f6dd937956c8d304d8c2dea264c80680bcfa61ca4a15e7b76780f", size = 3408448, upload-time = "2025-10-24T19:04:20.951Z" }, - { url = "https://files.pythonhosted.org/packages/f0/7c/60a2756d7feec7387db3a1176c632357632fbe7849fce576c5559d4520c7/hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f182f264ed2acd566c514e45da9f2119110e48a87a327ca271027904c70c5832", size = 3503401, upload-time = "2025-10-24T19:04:22.549Z" }, - { url = "https://files.pythonhosted.org/packages/4e/64/48fffbd67fb418ab07451e4ce641a70de1c40c10a13e25325e24858ebe5a/hf_xet-1.2.0-cp313-cp313t-win_amd64.whl", hash = "sha256:293a7a3787e5c95d7be1857358a9130694a9c6021de3f27fa233f37267174382", size = 2900866, upload-time = "2025-10-24T19:04:33.461Z" }, - { url = "https://files.pythonhosted.org/packages/e2/51/f7e2caae42f80af886db414d4e9885fac959330509089f97cccb339c6b87/hf_xet-1.2.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:10bfab528b968c70e062607f663e21e34e2bba349e8038db546646875495179e", size = 2861861, upload-time = "2025-10-24T19:04:19.01Z" }, - { url = "https://files.pythonhosted.org/packages/6e/1d/a641a88b69994f9371bd347f1dd35e5d1e2e2460a2e350c8d5165fc62005/hf_xet-1.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2a212e842647b02eb6a911187dc878e79c4aa0aa397e88dd3b26761676e8c1f8", size = 2717699, upload-time = "2025-10-24T19:04:17.306Z" }, - { url = "https://files.pythonhosted.org/packages/df/e0/e5e9bba7d15f0318955f7ec3f4af13f92e773fbb368c0b8008a5acbcb12f/hf_xet-1.2.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30e06daccb3a7d4c065f34fc26c14c74f4653069bb2b194e7f18f17cbe9939c0", size = 3314885, upload-time = "2025-10-24T19:04:07.642Z" }, - { url = "https://files.pythonhosted.org/packages/21/90/b7fe5ff6f2b7b8cbdf1bd56145f863c90a5807d9758a549bf3d916aa4dec/hf_xet-1.2.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:29c8fc913a529ec0a91867ce3d119ac1aac966e098cf49501800c870328cc090", size = 3221550, upload-time = "2025-10-24T19:04:05.55Z" }, - { url = "https://files.pythonhosted.org/packages/6f/cb/73f276f0a7ce46cc6a6ec7d6c7d61cbfe5f2e107123d9bbd0193c355f106/hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e159cbfcfbb29f920db2c09ed8b660eb894640d284f102ada929b6e3dc410a", size = 3408010, upload-time = "2025-10-24T19:04:28.598Z" }, - { url = "https://files.pythonhosted.org/packages/b8/1e/d642a12caa78171f4be64f7cd9c40e3ca5279d055d0873188a58c0f5fbb9/hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9c91d5ae931510107f148874e9e2de8a16052b6f1b3ca3c1b12f15ccb491390f", size = 3503264, upload-time = "2025-10-24T19:04:30.397Z" }, - { url = "https://files.pythonhosted.org/packages/17/b5/33764714923fa1ff922770f7ed18c2daae034d21ae6e10dbf4347c854154/hf_xet-1.2.0-cp314-cp314t-win_amd64.whl", hash = "sha256:210d577732b519ac6ede149d2f2f34049d44e8622bf14eb3d63bbcd2d4b332dc", size = 2901071, upload-time = "2025-10-24T19:04:37.463Z" }, - { url = "https://files.pythonhosted.org/packages/96/2d/22338486473df5923a9ab7107d375dbef9173c338ebef5098ef593d2b560/hf_xet-1.2.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:46740d4ac024a7ca9b22bebf77460ff43332868b661186a8e46c227fdae01848", size = 2866099, upload-time = "2025-10-24T19:04:15.366Z" }, - { url = "https://files.pythonhosted.org/packages/7f/8c/c5becfa53234299bc2210ba314eaaae36c2875e0045809b82e40a9544f0c/hf_xet-1.2.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:27df617a076420d8845bea087f59303da8be17ed7ec0cd7ee3b9b9f579dff0e4", size = 2722178, upload-time = "2025-10-24T19:04:13.695Z" }, - { url = "https://files.pythonhosted.org/packages/9a/92/cf3ab0b652b082e66876d08da57fcc6fa2f0e6c70dfbbafbd470bb73eb47/hf_xet-1.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3651fd5bfe0281951b988c0facbe726aa5e347b103a675f49a3fa8144c7968fd", size = 3320214, upload-time = "2025-10-24T19:04:03.596Z" }, - { url = "https://files.pythonhosted.org/packages/46/92/3f7ec4a1b6a65bf45b059b6d4a5d38988f63e193056de2f420137e3c3244/hf_xet-1.2.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d06fa97c8562fb3ee7a378dd9b51e343bc5bc8190254202c9771029152f5e08c", size = 3229054, upload-time = "2025-10-24T19:04:01.949Z" }, - { url = "https://files.pythonhosted.org/packages/0b/dd/7ac658d54b9fb7999a0ccb07ad863b413cbaf5cf172f48ebcd9497ec7263/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4c1428c9ae73ec0939410ec73023c4f842927f39db09b063b9482dac5a3bb737", size = 3413812, upload-time = "2025-10-24T19:04:24.585Z" }, - { url = "https://files.pythonhosted.org/packages/92/68/89ac4e5b12a9ff6286a12174c8538a5930e2ed662091dd2572bbe0a18c8a/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a55558084c16b09b5ed32ab9ed38421e2d87cf3f1f89815764d1177081b99865", size = 3508920, upload-time = "2025-10-24T19:04:26.927Z" }, - { url = "https://files.pythonhosted.org/packages/cb/44/870d44b30e1dcfb6a65932e3e1506c103a8a5aea9103c337e7a53180322c/hf_xet-1.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69", size = 2905735, upload-time = "2025-10-24T19:04:35.928Z" }, + { url = "https://files.pythonhosted.org/packages/49/75/462285971954269432aad2e7938c5c7ff9ec7d60129cec542ab37121e3d6/hf_xet-1.3.2-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:335a8f36c55fd35a92d0062f4e9201b4015057e62747b7e7001ffb203c0ee1d2", size = 3761019, upload-time = "2026-02-27T17:25:49.441Z" }, + { url = "https://files.pythonhosted.org/packages/35/56/987b0537ddaf88e17192ea09afa8eca853e55f39a4721578be436f8409df/hf_xet-1.3.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c1ae4d3a716afc774e66922f3cac8206bfa707db13f6a7e62dfff74bfc95c9a8", size = 3521565, upload-time = "2026-02-27T17:25:47.469Z" }, + { url = "https://files.pythonhosted.org/packages/a8/5c/7e4a33a3d689f77761156cc34558047569e54af92e4d15a8f493229f6767/hf_xet-1.3.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d6dbdf231efac0b9b39adcf12a07f0c030498f9212a18e8c50224d0e84ab803d", size = 4176494, upload-time = "2026-02-27T17:25:40.247Z" }, + { url = "https://files.pythonhosted.org/packages/6b/b3/71e856bf9d9a69b3931837e8bf22e095775f268c8edcd4a9e8c355f92484/hf_xet-1.3.2-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:c1980abfb68ecf6c1c7983379ed7b1e2b49a1aaf1a5aca9acc7d48e5e2e0a961", size = 3955601, upload-time = "2026-02-27T17:25:38.376Z" }, + { url = "https://files.pythonhosted.org/packages/63/d7/aecf97b3f0a981600a67ff4db15e2d433389d698a284bb0ea5d8fcdd6f7f/hf_xet-1.3.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:1c88fbd90ad0d27c46b77a445f0a436ebaa94e14965c581123b68b1c52f5fd30", size = 4154770, upload-time = "2026-02-27T17:25:56.756Z" }, + { url = "https://files.pythonhosted.org/packages/e2/e1/3af961f71a40e09bf5ee909842127b6b00f5ab4ee3817599dc0771b79893/hf_xet-1.3.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:35b855024ca37f2dd113ac1c08993e997fbe167b9d61f9ef66d3d4f84015e508", size = 4394161, upload-time = "2026-02-27T17:25:58.111Z" }, + { url = "https://files.pythonhosted.org/packages/a1/c3/859509bade9178e21b8b1db867b8e10e9f817ab9ac1de77cb9f461ced765/hf_xet-1.3.2-cp313-cp313t-win_amd64.whl", hash = "sha256:31612ba0629046e425ba50375685a2586e11fb9144270ebabd75878c3eaf6378", size = 3637377, upload-time = "2026-02-27T17:26:10.611Z" }, + { url = "https://files.pythonhosted.org/packages/05/7f/724cfbef4da92d577b71f68bf832961c8919f36c60d28d289a9fc9d024d4/hf_xet-1.3.2-cp313-cp313t-win_arm64.whl", hash = "sha256:433c77c9f4e132b562f37d66c9b22c05b5479f243a1f06a120c1c06ce8b1502a", size = 3497875, upload-time = "2026-02-27T17:26:09.034Z" }, + { url = "https://files.pythonhosted.org/packages/ba/75/9d54c1ae1d05fb704f977eca1671747babf1957f19f38ae75c5933bc2dc1/hf_xet-1.3.2-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:c34e2c7aefad15792d57067c1c89b2b02c1bbaeabd7f8456ae3d07b4bbaf4094", size = 3761076, upload-time = "2026-02-27T17:25:55.42Z" }, + { url = "https://files.pythonhosted.org/packages/f2/8a/08a24b6c6f52b5d26848c16e4b6d790bb810d1bf62c3505bed179f7032d3/hf_xet-1.3.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4bc995d6c41992831f762096020dc14a65fdf3963f86ffed580b596d04de32e3", size = 3521745, upload-time = "2026-02-27T17:25:54.217Z" }, + { url = "https://files.pythonhosted.org/packages/b5/db/a75cf400dd8a1a8acf226a12955ff6ee999f272dfc0505bafd8079a61267/hf_xet-1.3.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:959083c89dee30f7d6f890b36cdadda823386c4de63b1a30384a75bfd2ae995d", size = 4176301, upload-time = "2026-02-27T17:25:46.044Z" }, + { url = "https://files.pythonhosted.org/packages/01/40/6c4c798ffdd83e740dd3925c4e47793b07442a9efa3bc3866ba141a82365/hf_xet-1.3.2-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:cfa760888633b08c01b398d212ce7e8c0d7adac6c86e4b20dfb2397d8acd78ee", size = 3955437, upload-time = "2026-02-27T17:25:44.703Z" }, + { url = "https://files.pythonhosted.org/packages/0c/09/9a3aa7c5f07d3e5cc57bb750d12a124ffa72c273a87164bd848f9ac5cc14/hf_xet-1.3.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3155a02e083aa21fd733a7485c7c36025e49d5975c8d6bda0453d224dd0b0ac4", size = 4154535, upload-time = "2026-02-27T17:26:05.207Z" }, + { url = "https://files.pythonhosted.org/packages/ae/e0/831f7fa6d90cb47a230bc23284b502c700e1483bbe459437b3844cdc0776/hf_xet-1.3.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:91b1dc03c31cbf733d35dc03df7c5353686233d86af045e716f1e0ea4a2673cf", size = 4393891, upload-time = "2026-02-27T17:26:06.607Z" }, + { url = "https://files.pythonhosted.org/packages/ab/96/6ed472fdce7f8b70f5da6e3f05be76816a610063003bfd6d9cea0bbb58a3/hf_xet-1.3.2-cp314-cp314t-win_amd64.whl", hash = "sha256:211f30098512d95e85ad03ae63bd7dd2c4df476558a5095d09f9e38e78cbf674", size = 3637583, upload-time = "2026-02-27T17:26:17.349Z" }, + { url = "https://files.pythonhosted.org/packages/8b/e8/a069edc4570b3f8e123c0b80fadc94530f3d7b01394e1fc1bb223339366c/hf_xet-1.3.2-cp314-cp314t-win_arm64.whl", hash = "sha256:4a6817c41de7c48ed9270da0b02849347e089c5ece9a0e72ae4f4b3a57617f82", size = 3497977, upload-time = "2026-02-27T17:26:14.966Z" }, + { url = "https://files.pythonhosted.org/packages/d8/28/dbb024e2e3907f6f3052847ca7d1a2f7a3972fafcd53ff79018977fcb3e4/hf_xet-1.3.2-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:f93b7595f1d8fefddfede775c18b5c9256757824f7f6832930b49858483cd56f", size = 3763961, upload-time = "2026-02-27T17:25:52.537Z" }, + { url = "https://files.pythonhosted.org/packages/e4/71/b99aed3823c9d1795e4865cf437d651097356a3f38c7d5877e4ac544b8e4/hf_xet-1.3.2-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:a85d3d43743174393afe27835bde0cd146e652b5fcfdbcd624602daef2ef3259", size = 3526171, upload-time = "2026-02-27T17:25:50.968Z" }, + { url = "https://files.pythonhosted.org/packages/9d/ca/907890ce6ef5598b5920514f255ed0a65f558f820515b18db75a51b2f878/hf_xet-1.3.2-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7c2a054a97c44e136b1f7f5a78f12b3efffdf2eed3abc6746fc5ea4b39511633", size = 4180750, upload-time = "2026-02-27T17:25:43.125Z" }, + { url = "https://files.pythonhosted.org/packages/8c/ad/bc7f41f87173d51d0bce497b171c4ee0cbde1eed2d7b4216db5d0ada9f50/hf_xet-1.3.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:06b724a361f670ae557836e57801b82c75b534812e351a87a2c739f77d1e0635", size = 3961035, upload-time = "2026-02-27T17:25:41.837Z" }, + { url = "https://files.pythonhosted.org/packages/73/38/600f4dda40c4a33133404d9fe644f1d35ff2d9babb4d0435c646c63dd107/hf_xet-1.3.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:305f5489d7241a47e0458ef49334be02411d1d0f480846363c1c8084ed9916f7", size = 4161378, upload-time = "2026-02-27T17:26:00.365Z" }, + { url = "https://files.pythonhosted.org/packages/00/b3/7bc1ff91d1ac18420b7ad1e169b618b27c00001b96310a89f8a9294fe509/hf_xet-1.3.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:06cdbde243c85f39a63b28e9034321399c507bcd5e7befdd17ed2ccc06dfe14e", size = 4398020, upload-time = "2026-02-27T17:26:03.977Z" }, + { url = "https://files.pythonhosted.org/packages/2b/0b/99bfd948a3ed3620ab709276df3ad3710dcea61976918cce8706502927af/hf_xet-1.3.2-cp37-abi3-win_amd64.whl", hash = "sha256:9298b47cce6037b7045ae41482e703c471ce36b52e73e49f71226d2e8e5685a1", size = 3641624, upload-time = "2026-02-27T17:26:13.542Z" }, + { url = "https://files.pythonhosted.org/packages/cc/02/9a6e4ca1f3f73a164c0cd48e41b3cc56585dcc37e809250de443d673266f/hf_xet-1.3.2-cp37-abi3-win_arm64.whl", hash = "sha256:83d8ec273136171431833a6957e8f3af496bee227a0fe47c7b8b39c106d1749a", size = 3503976, upload-time = "2026-02-27T17:26:12.123Z" }, ] [[package]] @@ -2473,7 +2473,7 @@ wheels = [ [[package]] name = "hume" -version = "0.13.8" +version = "0.13.10" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiofiles" }, @@ -2485,9 +2485,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/93/5b/849ac072161e985ce5758f19f792043274b64a9f9dd73fdd14333b7446f4/hume-0.13.8.tar.gz", hash = "sha256:067691b0ce0353e4438d32d5fbfcbb6ed2099533bf5e06af99084c8c76fad24f", size = 142326, upload-time = "2026-02-10T16:05:22.234Z" } +sdist = { url = "https://files.pythonhosted.org/packages/78/d7/97845c3903ef5782b6f4581138f06a595513c2e129b2cbeacfc6e3645f61/hume-0.13.10.tar.gz", hash = "sha256:425596d17bd8b85bdf4f27bd0d3680c50ce50b4339f64adf39f69557907dc41c", size = 144063, upload-time = "2026-02-27T21:06:17.913Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/48/10/ec2c1e9a0401a39c3575ff8c5e42ad4b03687d5dbdefaa94ec5d52dbe088/hume-0.13.8-py3-none-any.whl", hash = "sha256:8295c095e4e04918512eec2df3adf4a0900b8d7ef06e3e8487c45ab520ed0ad5", size = 353023, upload-time = "2026-02-10T16:05:20.537Z" }, + { url = "https://files.pythonhosted.org/packages/0c/ee/52598b811660f874f84b880b2b46481c78f8f7df2d9cff95b8130af95826/hume-0.13.10-py3-none-any.whl", hash = "sha256:a724b6cd9fc2278dff0b831276b1b2c82604edece3e036e0d46c312aea2d70b8", size = 355071, upload-time = "2026-02-27T21:06:14.847Z" }, ] [[package]] @@ -2528,93 +2528,93 @@ wheels = [ [[package]] name = "ijson" -version = "3.4.0.post0" +version = "3.5.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/2d/30/7ab4b9e88e7946f6beef419f74edcc541df3ea562c7882257b4eaa82417d/ijson-3.4.0.post0.tar.gz", hash = "sha256:9aa02dc70bb245670a6ca7fba737b992aeeb4895360980622f7e568dbf23e41e", size = 67216, upload-time = "2025-10-10T05:29:25.62Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f4/57/60d1a6a512f2f0508d0bc8b4f1cc5616fd3196619b66bd6a01f9155a1292/ijson-3.5.0.tar.gz", hash = "sha256:94688760720e3f5212731b3cb8d30267f9a045fb38fb3870254e7b9504246f31", size = 68658, upload-time = "2026-02-24T03:58:30.974Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b5/15/4f4921ed9ab94032fd0b03ecb211ff9dbd5cc9953463f5b5c4ddeab406fc/ijson-3.4.0.post0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8f904a405b58a04b6ef0425f1babbc5c65feb66b0a4cc7f214d4ad7de106f77d", size = 88244, upload-time = "2025-10-10T05:27:42.001Z" }, - { url = "https://files.pythonhosted.org/packages/af/d6/b85d4da1752362a789bc3e0fc4b55e812a374a50d2fe1c06cab2e2bcb170/ijson-3.4.0.post0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a07dcc1a8a1ddd76131a7c7528cbd12951c2e34eb3c3d63697b905069a2d65b1", size = 59880, upload-time = "2025-10-10T05:27:44.791Z" }, - { url = "https://files.pythonhosted.org/packages/c3/96/e1027e6d0efb5b9192bdc9f0af5633c20a56999cce4cf7ad35427f823138/ijson-3.4.0.post0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ab3be841b8c430c1883b8c0775eb551f21b5500c102c7ee828afa35ddd701bdd", size = 59939, upload-time = "2025-10-10T05:27:45.66Z" }, - { url = "https://files.pythonhosted.org/packages/e3/71/b9ca0a19afb2f36be35c6afa2c4d1c19950dc45f6a50b483b56082b3e165/ijson-3.4.0.post0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:43059ae0d657b11c5ddb11d149bc400c44f9e514fb8663057e9b2ea4d8d44c1f", size = 125894, upload-time = "2025-10-10T05:27:46.551Z" }, - { url = "https://files.pythonhosted.org/packages/02/1b/f7356de078d85564829c5e2a2a31473ee0ad1876258ceecf550b582e57b7/ijson-3.4.0.post0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0d3e82963096579d1385c06b2559570d7191e225664b7fa049617da838e1a4a4", size = 132385, upload-time = "2025-10-10T05:27:48Z" }, - { url = "https://files.pythonhosted.org/packages/57/7b/08f86eed5df0849b673260dd2943b6a7367a55b5a4b6e73ddbfbdf4206f1/ijson-3.4.0.post0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:461ce4e87a21a261b60c0a68a2ad17c7dd214f0b90a0bec7e559a66b6ae3bd7e", size = 129567, upload-time = "2025-10-10T05:27:49.188Z" }, - { url = "https://files.pythonhosted.org/packages/96/e1/69672d95b1a16e7c6bf89cef6c892b228cc84b484945a731786a425700d2/ijson-3.4.0.post0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:890cf6610c9554efcb9765a93e368efeb5bb6135f59ce0828d92eaefff07fde5", size = 132821, upload-time = "2025-10-10T05:27:50.342Z" }, - { url = "https://files.pythonhosted.org/packages/0b/15/9ed4868e2e92db2454508f7ea1282bec0b039bd344ac0cbac4a2de16786d/ijson-3.4.0.post0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:6793c29a5728e7751a7df01be58ba7da9b9690c12bf79d32094c70a908fa02b9", size = 127757, upload-time = "2025-10-10T05:27:51.203Z" }, - { url = "https://files.pythonhosted.org/packages/5b/aa/08a308d3aaa6e98511f3100f8a1e4e8ff8c853fa4ec3f18b71094ac36bbe/ijson-3.4.0.post0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a56b6674d7feec0401c91f86c376f4e3d8ff8129128a8ad21ca43ec0b1242f79", size = 130439, upload-time = "2025-10-10T05:27:52.123Z" }, - { url = "https://files.pythonhosted.org/packages/56/46/3da05a044f335b97635d59eede016ea158fbf1b59e584149177b6524e1e5/ijson-3.4.0.post0-cp310-cp310-win32.whl", hash = "sha256:01767fcbd75a5fa5a626069787b41f04681216b798510d5f63bcf66884386368", size = 52004, upload-time = "2025-10-10T05:27:53.441Z" }, - { url = "https://files.pythonhosted.org/packages/60/d7/a126d58f379df16fa9a0c2532ac00ae3debf1d28c090020775bc735032b8/ijson-3.4.0.post0-cp310-cp310-win_amd64.whl", hash = "sha256:09127c06e5dec753feb9e4b8c5f6a23603d1cd672d098159a17e53a73b898eec", size = 54407, upload-time = "2025-10-10T05:27:54.259Z" }, - { url = "https://files.pythonhosted.org/packages/a7/ac/3d57249d4acba66a33eaef794edb5b2a2222ca449ae08800f8abe9286645/ijson-3.4.0.post0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0b473112e72c0c506da425da3278367b6680f340ecc093084693a1e819d28435", size = 88278, upload-time = "2025-10-10T05:27:55.403Z" }, - { url = "https://files.pythonhosted.org/packages/12/fb/2d068d23d1a665f500282ceb6f2473952a95fc7107d739fd629b4ab41959/ijson-3.4.0.post0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:043f9b7cf9cc744263a78175e769947733710d2412d25180df44b1086b23ebd5", size = 59898, upload-time = "2025-10-10T05:27:56.361Z" }, - { url = "https://files.pythonhosted.org/packages/26/3d/8b14589dfb0e5dbb7bcf9063e53d3617c041cf315ff3dfa60945382237ce/ijson-3.4.0.post0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b55e49045f4c8031f3673f56662fd828dc9e8d65bd3b03a9420dda0d370e64ba", size = 59945, upload-time = "2025-10-10T05:27:57.581Z" }, - { url = "https://files.pythonhosted.org/packages/77/57/086a75094397d4b7584698a540a279689e12905271af78cdfc903bf9eaf8/ijson-3.4.0.post0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:11f13b73194ea2a5a8b4a2863f25b0b4624311f10db3a75747b510c4958179b0", size = 131318, upload-time = "2025-10-10T05:27:58.453Z" }, - { url = "https://files.pythonhosted.org/packages/df/35/7f61e9ce4a9ff1306ec581eb851f8a660439126d92ee595c6dc8084aac97/ijson-3.4.0.post0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:659acb2843433e080c271ecedf7d19c71adde1ee5274fc7faa2fec0a793f9f1c", size = 137990, upload-time = "2025-10-10T05:27:59.328Z" }, - { url = "https://files.pythonhosted.org/packages/59/bf/590bbc3c3566adce5e2f43ba5894520cbaf19a3e7f38c1250926ba67eee4/ijson-3.4.0.post0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:deda4cfcaafa72ca3fa845350045b1d0fef9364ec9f413241bb46988afbe6ee6", size = 134416, upload-time = "2025-10-10T05:28:00.317Z" }, - { url = "https://files.pythonhosted.org/packages/24/c1/fb719049851979df71f3e039d6f1a565d349c9cb1b29c0f8775d9db141b4/ijson-3.4.0.post0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:47352563e8c594360bacee2e0753e97025f0861234722d02faace62b1b6d2b2a", size = 138034, upload-time = "2025-10-10T05:28:01.627Z" }, - { url = "https://files.pythonhosted.org/packages/10/ce/ccda891f572876aaf2c43f0b2079e31d5b476c3ae53196187eab1a788eff/ijson-3.4.0.post0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5a48b9486242d1295abe7fd0fbb6308867da5ca3f69b55c77922a93c2b6847aa", size = 132510, upload-time = "2025-10-10T05:28:03.141Z" }, - { url = "https://files.pythonhosted.org/packages/11/b5/ca8e64ab7cf5252f358e467be767630f085b5bbcd3c04333a3a5f36c3dd3/ijson-3.4.0.post0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9c0886234d1fae15cf4581a430bdba03d79251c1ab3b07e30aa31b13ef28d01c", size = 134907, upload-time = "2025-10-10T05:28:04.438Z" }, - { url = "https://files.pythonhosted.org/packages/93/14/63a4d5dc548690f29f0c2fc9cabd5ecbb37532547439c05f5b3b9ce73021/ijson-3.4.0.post0-cp311-cp311-win32.whl", hash = "sha256:fecae19b5187d92900c73debb3a979b0b3290a53f85df1f8f3c5ba7d1e9fb9cb", size = 52006, upload-time = "2025-10-10T05:28:05.424Z" }, - { url = "https://files.pythonhosted.org/packages/fa/bf/932740899e572a97f9be0c6cd64ebda557eae7701ac216fc284aba21786d/ijson-3.4.0.post0-cp311-cp311-win_amd64.whl", hash = "sha256:b39dbf87071f23a23c8077eea2ae7cfeeca9ff9ffec722dfc8b5f352e4dd729c", size = 54410, upload-time = "2025-10-10T05:28:06.264Z" }, - { url = "https://files.pythonhosted.org/packages/7d/fe/3b6af0025288e769dbfa30485dae1b3bd3f33f00390f3ee532cbb1c33e9b/ijson-3.4.0.post0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:b607a500fca26101be47d2baf7cddb457b819ab60a75ce51ed1092a40da8b2f9", size = 87847, upload-time = "2025-10-10T05:28:07.229Z" }, - { url = "https://files.pythonhosted.org/packages/6e/a5/95ee2ca82f3b1a57892452f6e5087607d56c620beb8ce625475194568698/ijson-3.4.0.post0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4827d9874a6a81625412c59f7ca979a84d01f7f6bfb3c6d4dc4c46d0382b14e0", size = 59815, upload-time = "2025-10-10T05:28:08.448Z" }, - { url = "https://files.pythonhosted.org/packages/51/8d/5a704ab3c17c55c21c86423458db8610626ca99cc9086a74dfeb7ee9054c/ijson-3.4.0.post0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d4d4afec780881edb2a0d2dd40b1cdbe246e630022d5192f266172a0307986a7", size = 59648, upload-time = "2025-10-10T05:28:09.307Z" }, - { url = "https://files.pythonhosted.org/packages/25/56/ca5d6ca145d007f30b44e747f3c163bc08710ce004af0deaad4a2301339b/ijson-3.4.0.post0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:432fb60ffb952926f9438e0539011e2dfcd108f8426ee826ccc6173308c3ff2c", size = 138279, upload-time = "2025-10-10T05:28:10.489Z" }, - { url = "https://files.pythonhosted.org/packages/c3/d3/22e3cc806fcdda7ad4c8482ed74db7a017d4a1d49b4300c7bc07052fb561/ijson-3.4.0.post0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:54a0e3e05d9a0c95ecba73d9579f146cf6d5c5874116c849dba2d39a5f30380e", size = 149110, upload-time = "2025-10-10T05:28:12.263Z" }, - { url = "https://files.pythonhosted.org/packages/3e/04/efb30f413648b9267f5a33920ac124d7ebef3bc4063af8f6ffc8ca11ddcb/ijson-3.4.0.post0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:05807edc0bcbd222dc6ea32a2b897f0c81dc7f12c8580148bc82f6d7f5e7ec7b", size = 149026, upload-time = "2025-10-10T05:28:13.557Z" }, - { url = "https://files.pythonhosted.org/packages/2d/cf/481165f7046ade32488719300a3994a437020bc41cfbb54334356348f513/ijson-3.4.0.post0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a5269af16f715855d9864937f9dd5c348ca1ac49cee6a2c7a1b7091c159e874f", size = 150012, upload-time = "2025-10-10T05:28:14.859Z" }, - { url = "https://files.pythonhosted.org/packages/0f/24/642e3289917ecf860386e26dfde775f9962d26ab7f6c2e364ed3ca3c25d8/ijson-3.4.0.post0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b200df83c901f5bfa416d069ac71077aa1608f854a4c50df1b84ced560e9c9ec", size = 142193, upload-time = "2025-10-10T05:28:16.131Z" }, - { url = "https://files.pythonhosted.org/packages/0f/f5/fd2f038abe95e553e1c3ee207cda19db9196eb416e63c7c89699a8cf0db7/ijson-3.4.0.post0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6458bd8e679cdff459a0a5e555b107c3bbacb1f382da3fe0f40e392871eb518d", size = 150904, upload-time = "2025-10-10T05:28:17.401Z" }, - { url = "https://files.pythonhosted.org/packages/49/35/24259d22519987928164e6cb8fe3486e1df0899b2999ada4b0498639b463/ijson-3.4.0.post0-cp312-cp312-win32.whl", hash = "sha256:55f7f656b5986326c978cbb3a9eea9e33f3ef6ecc4535b38f1d452c731da39ab", size = 52358, upload-time = "2025-10-10T05:28:18.315Z" }, - { url = "https://files.pythonhosted.org/packages/a1/2b/6f7ade27a8ff5758fc41006dadd2de01730def84fe3e60553b329c59e0d4/ijson-3.4.0.post0-cp312-cp312-win_amd64.whl", hash = "sha256:e15833dcf6f6d188fdc624a31cd0520c3ba21b6855dc304bc7c1a8aeca02d4ac", size = 54789, upload-time = "2025-10-10T05:28:19.552Z" }, - { url = "https://files.pythonhosted.org/packages/1b/20/aaec6977f9d538bbadd760c7fa0f6a0937742abdcc920ec6478a8576e55f/ijson-3.4.0.post0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:114ed248166ac06377e87a245a158d6b98019d2bdd3bb93995718e0bd996154f", size = 87863, upload-time = "2025-10-10T05:28:20.786Z" }, - { url = "https://files.pythonhosted.org/packages/5b/29/06bf56a866e2fe21453a1ad8f3a5d7bca3c723f73d96329656dfee969783/ijson-3.4.0.post0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ffb21203736b08fe27cb30df6a4f802fafb9ef7646c5ff7ef79569b63ea76c57", size = 59806, upload-time = "2025-10-10T05:28:21.596Z" }, - { url = "https://files.pythonhosted.org/packages/ba/ae/e1d0fda91ba7a444b75f0d60cb845fdb1f55d3111351529dcbf4b1c276fe/ijson-3.4.0.post0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:07f20ecd748602ac7f18c617637e53bd73ded7f3b22260bba3abe401a7fc284e", size = 59643, upload-time = "2025-10-10T05:28:22.45Z" }, - { url = "https://files.pythonhosted.org/packages/4d/24/5a24533be2726396cc1724dc237bada09b19715b5bfb0e7b9400db0901ad/ijson-3.4.0.post0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:27aa193d47ffc6bc4e45453896ad98fb089a367e8283b973f1fe5c0198b60b4e", size = 138082, upload-time = "2025-10-10T05:28:23.319Z" }, - { url = "https://files.pythonhosted.org/packages/05/60/026c3efcec23c329657e878cbc0a9a25b42e7eb3971e8c2377cb3284e2b7/ijson-3.4.0.post0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ccddb2894eb7af162ba43b9475ac5825d15d568832f82eb8783036e5d2aebd42", size = 149145, upload-time = "2025-10-10T05:28:24.279Z" }, - { url = "https://files.pythonhosted.org/packages/ed/c2/036499909b7a1bc0bcd85305e4348ad171aeb9df57581287533bdb3497e9/ijson-3.4.0.post0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:61ab0b8c5bf707201dc67e02c116f4b6545c4afd7feb2264b989d242d9c4348a", size = 149046, upload-time = "2025-10-10T05:28:25.186Z" }, - { url = "https://files.pythonhosted.org/packages/ba/75/e7736073ad96867c129f9e799e3e65086badd89dbf3911f76d9b3bf8a115/ijson-3.4.0.post0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:254cfb8c124af68327a0e7a49b50bbdacafd87c4690a3d62c96eb01020a685ef", size = 150356, upload-time = "2025-10-10T05:28:26.135Z" }, - { url = "https://files.pythonhosted.org/packages/9d/1b/1c1575d2cda136985561fcf774fe6c54412cd0fa08005342015af0403193/ijson-3.4.0.post0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:04ac9ca54db20f82aeda6379b5f4f6112fdb150d09ebce04affeab98a17b4ed3", size = 142322, upload-time = "2025-10-10T05:28:27.125Z" }, - { url = "https://files.pythonhosted.org/packages/28/4d/aba9871feb624df8494435d1a9ddc7b6a4f782c6044bfc0d770a4b59f145/ijson-3.4.0.post0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a603d7474bf35e7b3a8e49c8dabfc4751841931301adff3f3318171c4e407f32", size = 151386, upload-time = "2025-10-10T05:28:28.274Z" }, - { url = "https://files.pythonhosted.org/packages/3f/9a/791baa83895fb6e492bce2c7a0ea6427b6a41fe854349e62a37d0c9deaf0/ijson-3.4.0.post0-cp313-cp313-win32.whl", hash = "sha256:ec5bb1520cb212ebead7dba048bb9b70552c3440584f83b01b0abc96862e2a09", size = 52352, upload-time = "2025-10-10T05:28:29.191Z" }, - { url = "https://files.pythonhosted.org/packages/a9/0c/061f51493e1da21116d74ee8f6a6b9ae06ca5fa2eb53c3b38b64f9a9a5ae/ijson-3.4.0.post0-cp313-cp313-win_amd64.whl", hash = "sha256:3505dff18bdeb8b171eb28af6df34857e2be80dc01e2e3b624e77215ad58897f", size = 54783, upload-time = "2025-10-10T05:28:30.048Z" }, - { url = "https://files.pythonhosted.org/packages/c7/89/4344e176f2c5f5ef3251c9bfa4ddd5b4cf3f9601fd6ec3f677a3ba0b9c71/ijson-3.4.0.post0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:45a0b1c833ed2620eaf8da958f06ac8351c59e5e470e078400d23814670ed708", size = 92342, upload-time = "2025-10-10T05:28:31.389Z" }, - { url = "https://files.pythonhosted.org/packages/d4/b1/85012c586a6645f9fb8bfa3ef62ed2f303c8d73fc7c2f705111582925980/ijson-3.4.0.post0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7809ec8c8f40228edaaa089f33e811dff4c5b8509702652870d3f286c9682e27", size = 62028, upload-time = "2025-10-10T05:28:32.849Z" }, - { url = "https://files.pythonhosted.org/packages/65/ea/7b7e2815c101d78b33e74d64ddb70cccc377afccd5dda76e566ed3fcb56f/ijson-3.4.0.post0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cf4a34c2cfe852aee75c89c05b0a4531c49dc0be27eeed221afd6fbf9c3e149c", size = 61773, upload-time = "2025-10-10T05:28:34.016Z" }, - { url = "https://files.pythonhosted.org/packages/59/7d/2175e599cb77a64f528629bad3ce95dfdf2aa6171d313c1fc00bbfaf0d22/ijson-3.4.0.post0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a39d5d36067604b26b78de70b8951c90e9272450642661fe531a8f7a6936a7fa", size = 198562, upload-time = "2025-10-10T05:28:34.878Z" }, - { url = "https://files.pythonhosted.org/packages/13/97/82247c501c92405bb2fc44ab5efb497335bcb9cf0f5d3a0b04a800737bd8/ijson-3.4.0.post0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83fc738d81c9ea686b452996110b8a6678296c481e0546857db24785bff8da92", size = 216212, upload-time = "2025-10-10T05:28:36.208Z" }, - { url = "https://files.pythonhosted.org/packages/95/ca/b956f507bb02e05ce109fd11ab6a2c054f8b686cc5affe41afe50630984d/ijson-3.4.0.post0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b2a81aee91633868f5b40280e2523f7c5392e920a5082f47c5e991e516b483f6", size = 206618, upload-time = "2025-10-10T05:28:37.243Z" }, - { url = "https://files.pythonhosted.org/packages/3e/12/e827840ab81d86a9882e499097934df53294f05155f1acfcb9a211ac1142/ijson-3.4.0.post0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:56169e298c5a2e7196aaa55da78ddc2415876a74fe6304f81b1eb0d3273346f7", size = 210689, upload-time = "2025-10-10T05:28:38.252Z" }, - { url = "https://files.pythonhosted.org/packages/1b/3b/59238d9422c31a4aefa22ebeb8e599e706158a0ab03669ef623be77a499a/ijson-3.4.0.post0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:eeb9540f0b1a575cbb5968166706946458f98c16e7accc6f2fe71efa29864241", size = 199927, upload-time = "2025-10-10T05:28:39.233Z" }, - { url = "https://files.pythonhosted.org/packages/b6/0f/ec01c36c128c37edb8a5ae8f3de3256009f886338d459210dfe121ee4ba9/ijson-3.4.0.post0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ba3478ff0bb49d7ba88783f491a99b6e3fa929c930ab062d2bb7837e6a38fe88", size = 204455, upload-time = "2025-10-10T05:28:40.644Z" }, - { url = "https://files.pythonhosted.org/packages/c8/cf/5560e1db96c6d10a5313be76bf5a1754266cbfb5cc13ff64d107829e07b1/ijson-3.4.0.post0-cp313-cp313t-win32.whl", hash = "sha256:b005ce84e82f28b00bf777a464833465dfe3efa43a0a26c77b5ac40723e1a728", size = 54566, upload-time = "2025-10-10T05:28:41.663Z" }, - { url = "https://files.pythonhosted.org/packages/22/5a/cbb69144c3b25dd56f5421ff7dc0cf3051355579062024772518e4f4b3c5/ijson-3.4.0.post0-cp313-cp313t-win_amd64.whl", hash = "sha256:fe9c84c9b1c8798afa407be1cea1603401d99bfc7c34497e19f4f5e5ddc9b441", size = 57298, upload-time = "2025-10-10T05:28:42.881Z" }, - { url = "https://files.pythonhosted.org/packages/af/0b/a4ce8524fd850302bbf5d9f38d07c0fa981fdbe44951d2fcd036935b67dd/ijson-3.4.0.post0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da6a21b88cbf5ecbc53371283988d22c9643aa71ae2873bbeaefd2dea3b6160b", size = 88361, upload-time = "2025-10-10T05:28:43.73Z" }, - { url = "https://files.pythonhosted.org/packages/be/90/a5e5f33e46f28174a9c8142d12dcb3d26ce358d9a2230b9b15f5c987b3a5/ijson-3.4.0.post0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cf24a48a1c3ca9d44a04feb59ccefeb9aa52bb49b9cb70ad30518c25cce74bb7", size = 59960, upload-time = "2025-10-10T05:28:44.585Z" }, - { url = "https://files.pythonhosted.org/packages/83/e2/551dd7037dda759aa0ce53f0d3d7be03b03c6b05c0b0a5d5ab7a47e6b4b1/ijson-3.4.0.post0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d14427d366f95f21adcb97d0ed1f6d30f6fdc04d0aa1e4de839152c50c2b8d65", size = 59957, upload-time = "2025-10-10T05:28:45.748Z" }, - { url = "https://files.pythonhosted.org/packages/ac/b9/3006384f85cc26cf83dbbd542d362cc336f1e1ddd491e32147cfa46ea8ae/ijson-3.4.0.post0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:339d49f6c5d24051c85d9226be96d2d56e633cb8b7d09dd8099de8d8b51a97e2", size = 139967, upload-time = "2025-10-10T05:28:47.229Z" }, - { url = "https://files.pythonhosted.org/packages/77/3b/b5234add8115cbfe8635b6c152fb527327f45e4c0f0bf2e93844b36b5217/ijson-3.4.0.post0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7206afcb396aaef66c2b066997b4e9d9042c4b7d777f4d994e9cec6d322c2fe6", size = 149196, upload-time = "2025-10-10T05:28:48.226Z" }, - { url = "https://files.pythonhosted.org/packages/a2/d2/c4ae543e37d7a9fba09740c221976a63705dbad23a9cda9022fc9fa0f3de/ijson-3.4.0.post0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c8dd327da225887194fe8b93f2b3c9c256353e14a6b9eefc940ed17fde38f5b8", size = 148516, upload-time = "2025-10-10T05:28:49.237Z" }, - { url = "https://files.pythonhosted.org/packages/0d/a1/914b5fb1c26af2474cd04841626e0e95576499a4ca940661fb105ee12dd2/ijson-3.4.0.post0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:4810546e66128af51fd4a0c9a640e84e8508e9c15c4f247d8a3e3253b20e1465", size = 149770, upload-time = "2025-10-10T05:28:50.501Z" }, - { url = "https://files.pythonhosted.org/packages/7a/c1/51c3584102d0d85d4aa10cc88dbbe431ecb9fe98160a9e2fad62a4456aed/ijson-3.4.0.post0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:103a0838061297d063bca81d724b0958b616f372bd893bbc278320152252c652", size = 143688, upload-time = "2025-10-10T05:28:51.823Z" }, - { url = "https://files.pythonhosted.org/packages/47/3d/a54f13d766332620bded8ee76bcdd274509ecc53cf99573450f95b3ad910/ijson-3.4.0.post0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:40007c977e230e04118b27322f25a72ae342a3d61464b2057fcd9b21eeb7427a", size = 150688, upload-time = "2025-10-10T05:28:52.757Z" }, - { url = "https://files.pythonhosted.org/packages/72/49/43d97cccf3266da7c044bd42e5083340ad1fd97fbb16d1bcd6791fd8918f/ijson-3.4.0.post0-cp314-cp314-win32.whl", hash = "sha256:f932969fc1fd4449ca141cf5f47ff357656a154a361f28d9ebca0badc5b02297", size = 52882, upload-time = "2025-10-10T05:28:53.708Z" }, - { url = "https://files.pythonhosted.org/packages/e9/f0/008f1ed4e0fc6f6dc7a5a82ecf08a59bb212514e158954374d440d700e6c/ijson-3.4.0.post0-cp314-cp314-win_amd64.whl", hash = "sha256:3ed19b1e4349240773a8ce4a4bfa450892d4a57949c02c515cd6be5a46b7696a", size = 55568, upload-time = "2025-10-10T05:28:54.79Z" }, - { url = "https://files.pythonhosted.org/packages/69/1c/8a199fded709e762aced89bb7086973c837e432dd714bbad78a6ac789c23/ijson-3.4.0.post0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:226447e40ca9340a39ed07d68ea02ee14b52cb4fe649425b256c1f0073531c83", size = 92345, upload-time = "2025-10-10T05:28:55.657Z" }, - { url = "https://files.pythonhosted.org/packages/be/60/04e97f6a403203bd2eb8849570bdce5719d696b5fb96aa2a62566fe7a1d9/ijson-3.4.0.post0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2c88f0669d45d4b1aa017c9b68d378e7cd15d188dfb6f0209adc78b7f45590a7", size = 62029, upload-time = "2025-10-10T05:28:56.561Z" }, - { url = "https://files.pythonhosted.org/packages/2a/97/e88295f9456ba939d90d4603af28fcabda3b443ef55e709e9381df3daa58/ijson-3.4.0.post0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:56b3089dc28c12492d92cc4896d2be585a89ecae34e25d08c1df88f21815cb50", size = 61776, upload-time = "2025-10-10T05:28:57.401Z" }, - { url = "https://files.pythonhosted.org/packages/1b/9f/0e9c236e720c2de887ab0d7cad8a15d2aa55fb449f792437fc99899957a9/ijson-3.4.0.post0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c117321cfa7b749cc1213f9b4c80dc958f0a206df98ec038ae4bcbbdb8463a15", size = 199808, upload-time = "2025-10-10T05:28:58.62Z" }, - { url = "https://files.pythonhosted.org/packages/0e/70/c21de30e7013e074924cd82057acfc5760e7b2cc41180f80770621b0ad36/ijson-3.4.0.post0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8311f48db6a33116db5c81682f08b6e2405501a4b4e460193ae69fec3cd1f87a", size = 217152, upload-time = "2025-10-10T05:28:59.656Z" }, - { url = "https://files.pythonhosted.org/packages/64/78/63a0bcc0707037df4e22bb836451279d850592258c859685a402c27f5d6d/ijson-3.4.0.post0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:91c61a3e63e04da648737e6b4abd537df1b46fb8cdf3219b072e790bb3c1a46b", size = 207663, upload-time = "2025-10-10T05:29:00.73Z" }, - { url = "https://files.pythonhosted.org/packages/7d/85/834e9838d69893cb7567e1210be044444213c78f7414aaf1cd241df16078/ijson-3.4.0.post0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1709171023ce82651b2f132575c2e6282e47f64ad67bd3260da476418d0e7895", size = 211157, upload-time = "2025-10-10T05:29:01.87Z" }, - { url = "https://files.pythonhosted.org/packages/2e/9b/9fda503799ebc30397710552e5dedc1d98d9ea6a694e5717415892623a94/ijson-3.4.0.post0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:5f0a72b1e3c0f78551670c12b2fdc1bf05f2796254d9c2055ba319bec2216020", size = 200231, upload-time = "2025-10-10T05:29:02.883Z" }, - { url = "https://files.pythonhosted.org/packages/15/f3/6419d1d5795a16591233d3aa3747b084e82c0c1d7184bdad9be638174560/ijson-3.4.0.post0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:b982a3597b0439ce9c8f4cfc929d86c6ed43907908be1e8463a34dc35fe5b258", size = 204825, upload-time = "2025-10-10T05:29:04.242Z" }, - { url = "https://files.pythonhosted.org/packages/1f/8d/a520e6902129c55fa94428ea0a22e8547540d5e7ca30f18b39594a5feea2/ijson-3.4.0.post0-cp314-cp314t-win32.whl", hash = "sha256:4e39bfdc36b0b460ef15a06550a6a385c64c81f7ac205ccff39bd45147918912", size = 55559, upload-time = "2025-10-10T05:29:05.681Z" }, - { url = "https://files.pythonhosted.org/packages/20/67/0ac6dd0045957ba1270b7b1860864f7d8cea4062e70b1083134c587e5768/ijson-3.4.0.post0-cp314-cp314t-win_amd64.whl", hash = "sha256:17e45262a5ddef39894013fb1548ee7094e444c8389eb1a97f86708b19bea03e", size = 58238, upload-time = "2025-10-10T05:29:06.656Z" }, - { url = "https://files.pythonhosted.org/packages/43/66/27cfcea16e85b95e33814eae2052dab187206b8820cdd90aa39d32ffb441/ijson-3.4.0.post0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:add9242f886eae844a7410b84aee2bbb8bdc83c624f227cb1fdb2d0476a96cb1", size = 57029, upload-time = "2025-10-10T05:29:19.733Z" }, - { url = "https://files.pythonhosted.org/packages/b8/1b/df3f1561c6629241fb2f8bd7ea1da14e3c2dd16fe9d7cbc97120870ed09c/ijson-3.4.0.post0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:69718ed41710dfcaa7564b0af42abc05875d4f7aaa24627c808867ef32634bc7", size = 56523, upload-time = "2025-10-10T05:29:20.641Z" }, - { url = "https://files.pythonhosted.org/packages/39/0a/6c6a3221ddecf62b696fde0e864415237e05b9a36ab6685a606b8fb3b5a2/ijson-3.4.0.post0-pp311-pypy311_pp73-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:636b6eca96c6c43c04629c6b37fad0181662eaacf9877c71c698485637f752f9", size = 70546, upload-time = "2025-10-10T05:29:21.526Z" }, - { url = "https://files.pythonhosted.org/packages/42/cb/edf69755e86a3a9f8b418efd60239cb308af46c7c8e12f869423f51c9851/ijson-3.4.0.post0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb5e73028f6e63d27b3d286069fe350ed80a4ccc493b022b590fea4bb086710d", size = 70532, upload-time = "2025-10-10T05:29:22.718Z" }, - { url = "https://files.pythonhosted.org/packages/96/7e/c8730ea39b8712622cd5a1bdff676098208400e37bb92052ba52f93e2aa1/ijson-3.4.0.post0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:461acf4320219459dabe5ed90a45cb86c9ba8cc6d6db9dad0d9427d42f57794c", size = 67927, upload-time = "2025-10-10T05:29:23.596Z" }, - { url = "https://files.pythonhosted.org/packages/ec/f2/53b6e9bdd2a91202066764eaa74b572ba4dede0fe47a5a26f4de34b7541a/ijson-3.4.0.post0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:a0fedf09c0f6ffa2a99e7e7fd9c5f3caf74e655c1ee015a0797383e99382ebc3", size = 54657, upload-time = "2025-10-10T05:29:24.482Z" }, + { url = "https://files.pythonhosted.org/packages/6e/32/21c1b47a1afb7319944d0b9685c0997a9d574a77b030c82f6a1ac2cef4eb/ijson-3.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ea8dcac10d86adaeead454bc25c97b68d0bda573d5fd6f86f5e21cf8f7906f88", size = 88935, upload-time = "2026-02-24T03:56:40.591Z" }, + { url = "https://files.pythonhosted.org/packages/86/f7/6ac7ebbb3cd767c87cdcbb950a6754afd1c0977756347bfe03eb8e5b866d/ijson-3.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:92b0495bbb2150bbf14fc5d98fb6d76bcd1c526605a172709e602e6fedc96495", size = 60567, upload-time = "2026-02-24T03:56:41.919Z" }, + { url = "https://files.pythonhosted.org/packages/c4/98/1140de9ae872468a8bc2e87c171228e25e58b1eb696b7fb430f7590fea44/ijson-3.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7af0c4c8943be8b09a4e57bdc1da6001dae7b36526d4154fe5c8224738d0921f", size = 60620, upload-time = "2026-02-24T03:56:42.764Z" }, + { url = "https://files.pythonhosted.org/packages/60/e1/67dfe0774e4c7ca6ec8702e280e8764d356f3db54358999818cda6df7679/ijson-3.5.0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:45887d5e84ff0d2b138c926cebd9071830733968afe8d9d12080b3c178c7f918", size = 126558, upload-time = "2026-02-24T03:56:43.922Z" }, + { url = "https://files.pythonhosted.org/packages/1f/ef/23d614fc773d428caeb6e197218b7e32adcc668ff5b98777039149571208/ijson-3.5.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9a70b575be8e57a28c80e90ed349ad3a851c3478524c70e36e07d6092ecd12c9", size = 133091, upload-time = "2026-02-24T03:56:45.291Z" }, + { url = "https://files.pythonhosted.org/packages/b8/80/99727603cd8a1d32edafa4392f4056b2420bf48c15afd34481c68a2d4435/ijson-3.5.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2adeecd45830bfd5580ca79a584154713aabef0b9607e16249133df5d2859813", size = 130249, upload-time = "2026-02-24T03:56:46.333Z" }, + { url = "https://files.pythonhosted.org/packages/0b/94/3a3d623ca80768e834be8a834ef05960e3b9e79af1a911704ff10c9e8792/ijson-3.5.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d873e72889e7fc5962ab58909f1adff338d7c2f49e450e5b5fe844eff8155a14", size = 133501, upload-time = "2026-02-24T03:56:47.54Z" }, + { url = "https://files.pythonhosted.org/packages/cf/f6/df2c14ad340834eccee379046f155e4b66a16ddafd445429dee7b3323614/ijson-3.5.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9a88c559456a79708592234d697645d92b599718f4cbbeaa6515f83ac63ca0ae", size = 128438, upload-time = "2026-02-24T03:56:48.455Z" }, + { url = "https://files.pythonhosted.org/packages/0c/7e/9ff5b8b5fee113f5607bc4149b707382a898eeb545153189b075e5ec8d59/ijson-3.5.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cf83f58ad50dc0d39a2105cb26d4f359b38f42cef68b913170d4d47d97d97ba5", size = 131116, upload-time = "2026-02-24T03:56:49.737Z" }, + { url = "https://files.pythonhosted.org/packages/64/20/954ce0d440d7cf72a3d8361b14406f9cdbf624b1625c10f8488857c769d6/ijson-3.5.0-cp310-cp310-win32.whl", hash = "sha256:aec4580a7712a19b1f95cd41bed260fc6a31266d37ef941827772a4c199e8143", size = 52724, upload-time = "2026-02-24T03:56:50.932Z" }, + { url = "https://files.pythonhosted.org/packages/24/33/ece87d60502c6115642cbabeb8c122fa982212b392bc4f4ff5aab8e02dac/ijson-3.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:9a9c4c70501e23e8eb1675330686d1598eebfa14b6f0dbc8f00c2e081cc628fa", size = 55125, upload-time = "2026-02-24T03:56:51.942Z" }, + { url = "https://files.pythonhosted.org/packages/65/da/644343198abca5e0f6e2486063f8d8f3c443ca0ef5e5c890e51ef6032e33/ijson-3.5.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5616311404b858d32740b7ad8b9a799c62165f5ecb85d0a8ed16c21665a90533", size = 88964, upload-time = "2026-02-24T03:56:53.099Z" }, + { url = "https://files.pythonhosted.org/packages/5b/63/8621190aa2baf96156dfd4c632b6aa9f1464411e50b98750c09acc0505ea/ijson-3.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e9733f94029dd41702d573ef64752e2556e72aea14623d6dbb7a44ca1ccf30fd", size = 60582, upload-time = "2026-02-24T03:56:54.261Z" }, + { url = "https://files.pythonhosted.org/packages/20/31/6a3f041fdd17dacff33b7d7d3ba3df6dca48740108340c6042f974b2ad20/ijson-3.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:db8398c6721b98412a4f618da8022550c8b9c5d9214040646071b5deb4d4a393", size = 60632, upload-time = "2026-02-24T03:56:55.159Z" }, + { url = "https://files.pythonhosted.org/packages/e4/68/474541998abbdecfd46a744536878335de89aceb9f085bff1aaf35575ceb/ijson-3.5.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c061314845c08163b1784b6076ea5f075372461a32e6916f4e5f211fd4130b64", size = 131988, upload-time = "2026-02-24T03:56:56.35Z" }, + { url = "https://files.pythonhosted.org/packages/cd/32/e05ff8b72a44fe9d192f41c5dcbc35cfa87efc280cdbfe539ffaf4a7535e/ijson-3.5.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1111a1c5ac79119c5d6e836f900c1a53844b50a18af38311baa6bb61e2645aca", size = 138669, upload-time = "2026-02-24T03:56:57.555Z" }, + { url = "https://files.pythonhosted.org/packages/49/b5/955a83b031102c7a602e2c06d03aff0a0e584212f09edb94ccc754d203ac/ijson-3.5.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1e74aff8c681c24002b61b1822f9511d4c384f324f7dbc08c78538e01fdc9fcb", size = 135093, upload-time = "2026-02-24T03:56:59.267Z" }, + { url = "https://files.pythonhosted.org/packages/e8/f2/30250cfcb4d2766669b31f6732689aab2bb91de426a15a3ebe482df7ee48/ijson-3.5.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:739a7229b1b0cc5f7e2785a6e7a5fc915e850d3fed9588d0e89a09f88a417253", size = 138715, upload-time = "2026-02-24T03:57:00.491Z" }, + { url = "https://files.pythonhosted.org/packages/a2/05/785a145d7e75e04e04480d59b6323cd4b1d9013a6cd8643fa635fbc93490/ijson-3.5.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ef88712160360cab3ca6471a4e5418243f8b267cf1fe1620879d1b5558babc71", size = 133194, upload-time = "2026-02-24T03:57:01.759Z" }, + { url = "https://files.pythonhosted.org/packages/14/eb/80d6f8a748dead4034cea0939494a67d10ccf88d6413bf6e860393139676/ijson-3.5.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6ca0d1b6b5f8166a6248f4309497585fb8553b04bc8179a0260fad636cfdb798", size = 135588, upload-time = "2026-02-24T03:57:03.131Z" }, + { url = "https://files.pythonhosted.org/packages/ee/a8/bbc21f9400ebdbca48fab272593e0d1f875691be1e927d264d90d48b8c47/ijson-3.5.0-cp311-cp311-win32.whl", hash = "sha256:966039cf9047c7967febf7b9a52ec6f38f5464a4c7fbb5565e0224b7376fefff", size = 52721, upload-time = "2026-02-24T03:57:04.365Z" }, + { url = "https://files.pythonhosted.org/packages/0d/2e/4e8c0208b8f920ee80c88c956f93e78318f2cfb646455353b182738b490c/ijson-3.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:6bad6a1634cb7c9f3f4c7e52325283b35b565f5b6cc27d42660c6912ce883422", size = 55121, upload-time = "2026-02-24T03:57:05.498Z" }, + { url = "https://files.pythonhosted.org/packages/aa/17/9c63c7688025f3a8c47ea717b8306649c8c7244e49e20a2be4e3515dc75c/ijson-3.5.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1ebefbe149a6106cc848a3eaf536af51a9b5ccc9082de801389f152dba6ab755", size = 88536, upload-time = "2026-02-24T03:57:06.809Z" }, + { url = "https://files.pythonhosted.org/packages/6f/dd/e15c2400244c117b06585452ebc63ae254f5a6964f712306afd1422daae0/ijson-3.5.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:19e30d9f00f82e64de689c0b8651b9cfed879c184b139d7e1ea5030cec401c21", size = 60499, upload-time = "2026-02-24T03:57:09.155Z" }, + { url = "https://files.pythonhosted.org/packages/77/a9/bf4fe3538a0c965f16b406f180a06105b875da83f0743e36246be64ef550/ijson-3.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a04a33ee78a6f27b9b8528c1ca3c207b1df3b8b867a4cf2fcc4109986f35c227", size = 60330, upload-time = "2026-02-24T03:57:10.574Z" }, + { url = "https://files.pythonhosted.org/packages/31/76/6f91bdb019dd978fce1bc5ea1cd620cfc096d258126c91db2c03a20a7f34/ijson-3.5.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7d48dc2984af02eb3c56edfb3f13b3f62f2f3e4fe36f058c8cfc75d93adf4fed", size = 138977, upload-time = "2026-02-24T03:57:11.932Z" }, + { url = "https://files.pythonhosted.org/packages/11/be/bbc983059e48a54b0121ee60042979faed7674490bbe7b2c41560db3f436/ijson-3.5.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f1e73a44844d9adbca9cf2c4132cd875933e83f3d4b23881fcaf82be83644c7d", size = 149785, upload-time = "2026-02-24T03:57:13.255Z" }, + { url = "https://files.pythonhosted.org/packages/6d/81/2fee58f9024a3449aee83edfa7167fb5ccd7e1af2557300e28531bb68e16/ijson-3.5.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7389a56b8562a19948bdf1d7bae3a2edc8c7f86fb59834dcb1c4c722818e645a", size = 149729, upload-time = "2026-02-24T03:57:14.191Z" }, + { url = "https://files.pythonhosted.org/packages/c7/56/f1706761fcc096c9d414b3dcd000b1e6e5c24364c21cfba429837f98ee8d/ijson-3.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3176f23f8ebec83f374ed0c3b4e5a0c4db7ede54c005864efebbed46da123608", size = 150697, upload-time = "2026-02-24T03:57:15.855Z" }, + { url = "https://files.pythonhosted.org/packages/d9/6e/ee0d9c875a0193b632b3e9ccd1b22a50685fb510256ad57ba483b6529f77/ijson-3.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6babd88e508630c6ef86c9bebaaf13bb2fb8ec1d8f8868773a03c20253f599bc", size = 142873, upload-time = "2026-02-24T03:57:16.831Z" }, + { url = "https://files.pythonhosted.org/packages/d2/bf/f9d4399d0e6e3fd615035290a71e97c843f17f329b43638c0a01cf112d73/ijson-3.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dc1b3836b174b6db2fa8319f1926fb5445abd195dc963368092103f8579cb8ed", size = 151583, upload-time = "2026-02-24T03:57:17.757Z" }, + { url = "https://files.pythonhosted.org/packages/b2/71/a7254a065933c0e2ffd3586f46187d84830d3d7b6f41cfa5901820a4f87d/ijson-3.5.0-cp312-cp312-win32.whl", hash = "sha256:6673de9395fb9893c1c79a43becd8c8fbee0a250be6ea324bfd1487bb5e9ee4c", size = 53079, upload-time = "2026-02-24T03:57:18.703Z" }, + { url = "https://files.pythonhosted.org/packages/8f/7b/2edca79b359fc9f95d774616867a03ecccdf333797baf5b3eea79733918c/ijson-3.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:f4f7fabd653459dcb004175235f310435959b1bb5dfa8878578391c6cc9ad944", size = 55500, upload-time = "2026-02-24T03:57:20.428Z" }, + { url = "https://files.pythonhosted.org/packages/a2/71/d67e764a712c3590627480643a3b51efcc3afa4ef3cb54ee4c989073c97e/ijson-3.5.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e9cedc10e40dd6023c351ed8bfc7dcfce58204f15c321c3c1546b9c7b12562a4", size = 88544, upload-time = "2026-02-24T03:57:21.293Z" }, + { url = "https://files.pythonhosted.org/packages/1a/39/f1c299371686153fa3cf5c0736b96247a87a1bee1b7145e6d21f359c505a/ijson-3.5.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3647649f782ee06c97490b43680371186651f3f69bebe64c6083ee7615d185e5", size = 60495, upload-time = "2026-02-24T03:57:22.501Z" }, + { url = "https://files.pythonhosted.org/packages/16/94/b1438e204d75e01541bebe3e668fe3e68612d210e9931ae1611062dd0a56/ijson-3.5.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:90e74be1dce05fce73451c62d1118671f78f47c9f6be3991c82b91063bf01fc9", size = 60325, upload-time = "2026-02-24T03:57:23.332Z" }, + { url = "https://files.pythonhosted.org/packages/30/e2/4aa9c116fa86cc8b0f574f3c3a47409edc1cd4face05d0e589a5a176b05d/ijson-3.5.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:78e9ad73e7be2dd80627504bd5cbf512348c55ce2c06e362ed7683b5220e8568", size = 138774, upload-time = "2026-02-24T03:57:24.683Z" }, + { url = "https://files.pythonhosted.org/packages/d2/d2/738b88752a70c3be1505faa4dcd7110668c2712e582a6a36488ed1e295d4/ijson-3.5.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9577449313cc94be89a4fe4b3e716c65f09cc19636d5a6b2861c4e80dddebd58", size = 149820, upload-time = "2026-02-24T03:57:26.062Z" }, + { url = "https://files.pythonhosted.org/packages/ed/df/0b3ab9f393ca8f72ea03bc896ba9fdc987e90ae08cdb51c32a4ee0c14d5e/ijson-3.5.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3e4c1178fb50aff5f5701a30a5152ead82a14e189ce0f6102fa1b5f10b2f54ff", size = 149747, upload-time = "2026-02-24T03:57:27.308Z" }, + { url = "https://files.pythonhosted.org/packages/cc/a3/b0037119f75131b78cb00acc2657b1a9d0435475f1f2c5f8f5a170b66b9c/ijson-3.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0eb402ab026ffb37a918d75af2b7260fe6cfbce13232cc83728a714dd30bd81d", size = 151027, upload-time = "2026-02-24T03:57:28.522Z" }, + { url = "https://files.pythonhosted.org/packages/22/a0/cb344de1862bf09d8f769c9d25c944078c87dd59a1b496feec5ad96309a4/ijson-3.5.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:5b08ee08355f9f729612a8eb9bf69cc14f9310c3b2a487c6f1c3c65d85216ec4", size = 142996, upload-time = "2026-02-24T03:57:29.774Z" }, + { url = "https://files.pythonhosted.org/packages/ca/32/a8ffd67182e02ea61f70f62daf43ded4fa8a830a2520a851d2782460aba8/ijson-3.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:bda62b6d48442903e7bf56152108afb7f0f1293c2b9bef2f2c369defea76ab18", size = 152068, upload-time = "2026-02-24T03:57:30.969Z" }, + { url = "https://files.pythonhosted.org/packages/3c/d1/3578df8e75d446aab0ae92e27f641341f586b85e1988536adebc65300cb4/ijson-3.5.0-cp313-cp313-win32.whl", hash = "sha256:8d073d9b13574cfa11083cc7267c238b7a6ed563c2661e79192da4a25f09c82c", size = 53065, upload-time = "2026-02-24T03:57:31.93Z" }, + { url = "https://files.pythonhosted.org/packages/fb/a2/f7cdaf5896710da3e69e982e44f015a83d168aa0f3a89b6f074b5426779d/ijson-3.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:2419f9e32e0968a876b04d8f26aeac042abd16f582810b576936bbc4c6015069", size = 55499, upload-time = "2026-02-24T03:57:32.773Z" }, + { url = "https://files.pythonhosted.org/packages/42/65/13e2492d17e19a2084523e18716dc2809159f2287fd2700c735f311e76c4/ijson-3.5.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4d4b0cd676b8c842f7648c1a783448fac5cd3b98289abd83711b3e275e143524", size = 93019, upload-time = "2026-02-24T03:57:33.976Z" }, + { url = "https://files.pythonhosted.org/packages/33/92/483fc97ece0c3f1cecabf48f6a7a36e89d19369eec462faaeaa34c788992/ijson-3.5.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:252dec3680a48bb82d475e36b4ae1b3a9d7eb690b951bb98a76c5fe519e30188", size = 62714, upload-time = "2026-02-24T03:57:34.819Z" }, + { url = "https://files.pythonhosted.org/packages/4b/88/793fe020a0fe9d9eed4c285cf4a5cfdb0a935708b3bde0d72f35c794b513/ijson-3.5.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:aa1b5dca97d323931fde2501172337384c958914d81a9dac7f00f0d4bfc76bc7", size = 62460, upload-time = "2026-02-24T03:57:35.874Z" }, + { url = "https://files.pythonhosted.org/packages/51/69/f1a2690aa8d4df1f4e262b385e65a933ffdc250b091531bac9a449c19e16/ijson-3.5.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7a5ec7fd86d606094bba6f6f8f87494897102fa4584ef653f3005c51a784c320", size = 199273, upload-time = "2026-02-24T03:57:37.07Z" }, + { url = "https://files.pythonhosted.org/packages/ea/a2/f1346d5299e79b988ab472dc773d5381ec2d57c23cb2f1af3ede4a810e62/ijson-3.5.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:009f41443e1521847701c6d87fa3923c0b1961be3c7e7de90947c8cb92ea7c44", size = 216884, upload-time = "2026-02-24T03:57:38.346Z" }, + { url = "https://files.pythonhosted.org/packages/28/3c/8b637e869be87799e6c2c3c275a30a546f086b1aed77e2b7f11512168c5a/ijson-3.5.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e4c3651d1f9fe2839a93fdf8fd1d5ca3a54975349894249f3b1b572bcc4bd577", size = 207306, upload-time = "2026-02-24T03:57:39.718Z" }, + { url = "https://files.pythonhosted.org/packages/7f/7c/18b1c1df6951ca056782d7580ec40cea4ff9a27a0947d92640d1cc8c4ae3/ijson-3.5.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:945b7abcfcfeae2cde17d8d900870f03536494245dda7ad4f8d056faa303256c", size = 211364, upload-time = "2026-02-24T03:57:40.953Z" }, + { url = "https://files.pythonhosted.org/packages/f3/55/e795812e82851574a9dba8a53fde045378f531ef14110c6fb55dbd23b443/ijson-3.5.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:0574b0a841ff97495c13e9d7260fbf3d85358b061f540c52a123db9dbbaa2ed6", size = 200608, upload-time = "2026-02-24T03:57:42.272Z" }, + { url = "https://files.pythonhosted.org/packages/5c/cd/013c85b4749b57a4cb4c2670014d1b32b8db4ab1a7be92ea7aeb5d7fe7b5/ijson-3.5.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f969ffb2b89c5cdf686652d7fb66252bc72126fa54d416317411497276056a18", size = 205127, upload-time = "2026-02-24T03:57:43.286Z" }, + { url = "https://files.pythonhosted.org/packages/0e/7c/faf643733e3ab677f180018f6a855c4ef70b7c46540987424c563c959e42/ijson-3.5.0-cp313-cp313t-win32.whl", hash = "sha256:59d3f9f46deed1332ad669518b8099920512a78bda64c1f021fcd2aff2b36693", size = 55282, upload-time = "2026-02-24T03:57:44.353Z" }, + { url = "https://files.pythonhosted.org/packages/69/22/94ddb47c24b491377aca06cd8fc9202cad6ab50619842457d2beefde21ea/ijson-3.5.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5c2839fa233746d8aad3b8cd2354e441613f5df66d721d59da4a09394bd1db2b", size = 58016, upload-time = "2026-02-24T03:57:45.237Z" }, + { url = "https://files.pythonhosted.org/packages/7a/93/0868efe753dc1df80cc405cf0c1f2527a6991643607c741bff8dcb899b3b/ijson-3.5.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:25a5a6b2045c90bb83061df27cfa43572afa43ba9408611d7bfe237c20a731a9", size = 89094, upload-time = "2026-02-24T03:57:46.115Z" }, + { url = "https://files.pythonhosted.org/packages/24/94/fd5a832a0df52ef5e4e740f14ac8640725d61034a1b0c561e8b5fb424706/ijson-3.5.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:8976c54c0b864bc82b951bae06567566ac77ef63b90a773a69cd73aab47f4f4f", size = 60715, upload-time = "2026-02-24T03:57:47.552Z" }, + { url = "https://files.pythonhosted.org/packages/70/79/1b9a90af5732491f9eec751ee211b86b11011e1158c555c06576d52c3919/ijson-3.5.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:859eb2038f7f1b0664df4241957694cc35e6295992d71c98659b22c69b3cbc10", size = 60638, upload-time = "2026-02-24T03:57:48.428Z" }, + { url = "https://files.pythonhosted.org/packages/23/6f/2c551ea980fe56f68710a8d5389cfbd015fc45aaafd17c3c52c346db6aa1/ijson-3.5.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c911aa02991c7c0d3639b6619b93a93210ff1e7f58bf7225d613abea10adc78e", size = 140667, upload-time = "2026-02-24T03:57:49.314Z" }, + { url = "https://files.pythonhosted.org/packages/25/0e/27b887879ba6a5bc29766e3c5af4942638c952220fd63e1e442674f7883a/ijson-3.5.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:903cbdc350173605220edc19796fbea9b2203c8b3951fb7335abfa8ed37afda8", size = 149850, upload-time = "2026-02-24T03:57:50.329Z" }, + { url = "https://files.pythonhosted.org/packages/da/1e/23e10e1bc04bf31193b21e2960dce14b17dbd5d0c62204e8401c59d62c08/ijson-3.5.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a4549d96ded5b8efa71639b2160235415f6bdb8c83367615e2dbabcb72755c33", size = 149206, upload-time = "2026-02-24T03:57:51.261Z" }, + { url = "https://files.pythonhosted.org/packages/8e/90/e552f6495063b235cf7fa2c592f6597c057077195e517b842a0374fd470c/ijson-3.5.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:6b2dcf6349e6042d83f3f8c39ce84823cf7577eba25bac5aae5e39bbbbbe9c1c", size = 150438, upload-time = "2026-02-24T03:57:52.198Z" }, + { url = "https://files.pythonhosted.org/packages/5c/18/45bf8f297c41b42a1c231d261141097babd953d2c28a07be57ae4c3a1a02/ijson-3.5.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:e44af39e6f8a17e5627dcd89715d8279bf3474153ff99aae031a936e5c5572e5", size = 144369, upload-time = "2026-02-24T03:57:53.22Z" }, + { url = "https://files.pythonhosted.org/packages/9b/3a/deb9772bb2c0cead7ad64f00c3598eec9072bdf511818e70e2c512eeabbe/ijson-3.5.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9260332304b7e7828db56d43f08fc970a3ab741bf84ff10189361ea1b60c395b", size = 151352, upload-time = "2026-02-24T03:57:54.375Z" }, + { url = "https://files.pythonhosted.org/packages/e4/51/67f4d80cd58ad7eab0cd1af5fe28b961886338956b2f88c0979e21914346/ijson-3.5.0-cp314-cp314-win32.whl", hash = "sha256:63bc8121bb422f6969ced270173a3fa692c29d4ae30c860a2309941abd81012a", size = 53610, upload-time = "2026-02-24T03:57:55.655Z" }, + { url = "https://files.pythonhosted.org/packages/70/d3/263672ea22983ba3940f1534316dbc9200952c1c2a2332d7a664e4eaa7ae/ijson-3.5.0-cp314-cp314-win_amd64.whl", hash = "sha256:01b6dad72b7b7df225ef970d334556dfad46c696a2c6767fb5d9ed8889728bca", size = 56301, upload-time = "2026-02-24T03:57:56.584Z" }, + { url = "https://files.pythonhosted.org/packages/9f/d9/86f7fac35e0835faa188085ae0579e813493d5261ce056484015ad533445/ijson-3.5.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:2ea4b676ec98e374c1df400a47929859e4fa1239274339024df4716e802aa7e4", size = 93069, upload-time = "2026-02-24T03:57:57.849Z" }, + { url = "https://files.pythonhosted.org/packages/33/d2/e7366ed9c6e60228d35baf4404bac01a126e7775ea8ce57f560125ed190a/ijson-3.5.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:014586eec043e23c80be9a923c56c3a0920a0f1f7d17478ce7bc20ba443968ef", size = 62767, upload-time = "2026-02-24T03:57:58.758Z" }, + { url = "https://files.pythonhosted.org/packages/35/8b/3e703e8cc4b3ada79f13b28070b51d9550c578f76d1968657905857b2ddd/ijson-3.5.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d5b8b886b0248652d437f66e7c5ac318bbdcb2c7137a7e5327a68ca00b286f5f", size = 62467, upload-time = "2026-02-24T03:58:00.261Z" }, + { url = "https://files.pythonhosted.org/packages/21/42/0c91af32c1ee8a957fdac2e051b5780756d05fd34e4b60d94a08d51bac1d/ijson-3.5.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:498fd46ae2349297e43acf97cdc421e711dbd7198418677259393d2acdc62d78", size = 200447, upload-time = "2026-02-24T03:58:01.591Z" }, + { url = "https://files.pythonhosted.org/packages/f9/80/796ea0e391b7e2d45c5b1b451734bba03f81c2984cf955ea5eaa6c4920ad/ijson-3.5.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:22a51b4f9b81f12793731cf226266d1de2112c3c04ba4a04117ad4e466897e05", size = 217820, upload-time = "2026-02-24T03:58:02.598Z" }, + { url = "https://files.pythonhosted.org/packages/38/14/52b6613fdda4078c62eb5b4fe3efc724ddc55a4ad524c93de51830107aa3/ijson-3.5.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9636c710dc4ac4a281baa266a64f323b4cc165cec26836af702c44328b59a515", size = 208310, upload-time = "2026-02-24T03:58:04.759Z" }, + { url = "https://files.pythonhosted.org/packages/6a/ad/8b3105a78774fd4a65e534a21d975ef3a77e189489fe3029ebcaeba5e243/ijson-3.5.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f7168a39e8211107666d71b25693fd1b2bac0b33735ef744114c403c6cac21e1", size = 211843, upload-time = "2026-02-24T03:58:05.836Z" }, + { url = "https://files.pythonhosted.org/packages/36/ab/a2739f6072d6e1160581bc3ed32da614c8cced023dcd519d9c5fa66e0425/ijson-3.5.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:8696454245415bc617ab03b0dc3ae4c86987df5dc6a90bad378fe72c5409d89e", size = 200906, upload-time = "2026-02-24T03:58:07.788Z" }, + { url = "https://files.pythonhosted.org/packages/6d/5e/e06c2de3c3d4a9cfb655c1ad08a68fb72838d271072cdd3196576ac4431a/ijson-3.5.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c21bfb61f71f191565885bf1bc29e0a186292d866b4880637b833848360bdc1b", size = 205495, upload-time = "2026-02-24T03:58:09.163Z" }, + { url = "https://files.pythonhosted.org/packages/7c/11/778201eb2e202ddd76b36b0fb29bf3d8e3c167389d8aa883c62524e49f47/ijson-3.5.0-cp314-cp314t-win32.whl", hash = "sha256:a2619460d6795b70d0155e5bf016200ac8a63ab5397aa33588bb02b6c21759e6", size = 56280, upload-time = "2026-02-24T03:58:10.116Z" }, + { url = "https://files.pythonhosted.org/packages/23/28/96711503245339084c8086b892c47415895eba49782d6cc52d9f4ee50301/ijson-3.5.0-cp314-cp314t-win_amd64.whl", hash = "sha256:4f24b78d4ef028d17eb57ad1b16c0aed4a17bdd9badbf232dc5d9305b7e13854", size = 58965, upload-time = "2026-02-24T03:58:11.278Z" }, + { url = "https://files.pythonhosted.org/packages/d9/3b/d31ecfa63a218978617446159f3d77aab2417a5bd2885c425b176353ff78/ijson-3.5.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:d64c624da0e9d692d6eb0ff63a79656b59d76bf80773a17c5b0f835e4e8ef627", size = 57715, upload-time = "2026-02-24T03:58:24.545Z" }, + { url = "https://files.pythonhosted.org/packages/30/51/b170e646d378e8cccf9637c05edb5419b00c2c4df64b0258c3af5355608e/ijson-3.5.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:876f7df73b7e0d6474f9caa729b9cdbfc8e76de9075a4887dfd689e29e85c4ca", size = 57205, upload-time = "2026-02-24T03:58:25.681Z" }, + { url = "https://files.pythonhosted.org/packages/ef/83/44dbd0231b0a8c6c14d27473d10c4e27dfbce7d5d9a833c79e3e6c33eb40/ijson-3.5.0-pp311-pypy311_pp73-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e7dbff2c8d9027809b0cde663df44f3210da10ea377121d42896fb6ee405dd31", size = 71229, upload-time = "2026-02-24T03:58:27.103Z" }, + { url = "https://files.pythonhosted.org/packages/c8/98/cf84048b7c6cec888826e696a31f45bee7ebcac15e532b6be1fc4c2c9608/ijson-3.5.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4217a1edc278660679e1197c83a1a2a2d367792bfbb2a3279577f4b59b93730d", size = 71217, upload-time = "2026-02-24T03:58:28.021Z" }, + { url = "https://files.pythonhosted.org/packages/3c/0a/e34c729a87ff67dc6540f6bcc896626158e691d433ab57db0086d73decd2/ijson-3.5.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:04f0fc740311388ee745ba55a12292b722d6f52000b11acbb913982ba5fbdf87", size = 68618, upload-time = "2026-02-24T03:58:28.918Z" }, + { url = "https://files.pythonhosted.org/packages/c1/0f/e849d072f2e0afe49627de3995fc9dae54b4c804c70c0840f928d95c10e1/ijson-3.5.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:fdeee6957f92e0c114f65c55cf8fe7eabb80cfacab64eea6864060913173f66d", size = 55369, upload-time = "2026-02-24T03:58:29.839Z" }, ] [[package]] @@ -2963,7 +2963,8 @@ version = "0.5.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "espeakng-loader" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "onnxruntime" }, { name = "phonemizer-fork" }, ] @@ -3002,7 +3003,8 @@ dependencies = [ { name = "langchain" }, { name = "langchain-core" }, { name = "langsmith" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "pydantic-settings" }, { name = "pyyaml" }, { name = "requests" }, @@ -3061,7 +3063,7 @@ wheels = [ [[package]] name = "langsmith" -version = "0.7.5" +version = "0.7.9" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "httpx" }, @@ -3074,9 +3076,9 @@ dependencies = [ { name = "xxhash" }, { name = "zstandard" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e9/55/a3641cae990c842d3f4c52e5308b391267c98ce531a7a586dfedf1a78c42/langsmith-0.7.5.tar.gz", hash = "sha256:e3bfc2d7ff0a6f9a719125e1e136b5f4fa11828a2be8979f47ee1a4c0510030e", size = 1038926, upload-time = "2026-02-19T20:47:51.144Z" } +sdist = { url = "https://files.pythonhosted.org/packages/4f/01/c26b1d3a68764acd050cbb98f3ca922a25b3e4ece5768ee868f56206b4d4/langsmith-0.7.9.tar.gz", hash = "sha256:c6dfcc4cb8fea249714ac60a1963faa84cc59ded9cd1882794ffce8a8d1d1588", size = 1136295, upload-time = "2026-02-27T22:37:59.309Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/32/0e/65b3fab6db843150ed38f226b39213565c644f0aaa515e0168bb1eaee5ae/langsmith-0.7.5-py3-none-any.whl", hash = "sha256:c120c43c98af5f5af8877341f8256aba1a170a292645b31572f06b0cf703c683", size = 324337, upload-time = "2026-02-19T20:47:47.537Z" }, + { url = "https://files.pythonhosted.org/packages/b6/c9/2d5e5f654f97a4d38a0ff1b3004751c2cd81ceca05d603174e49f942b196/langsmith-0.7.9-py3-none-any.whl", hash = "sha256:e73478f4c4ae9b7407e0fcdced181f9f8b0e024c62a1552dbf0667ef6b19e82d", size = 344099, upload-time = "2026-02-27T22:37:57.497Z" }, ] [[package]] @@ -3094,7 +3096,8 @@ version = "1.0.25" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiofiles" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "protobuf" }, { name = "types-protobuf" }, ] @@ -3138,30 +3141,30 @@ wheels = [ [[package]] name = "llvmlite" -version = "0.44.0" +version = "0.46.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/89/6a/95a3d3610d5c75293d5dbbb2a76480d5d4eeba641557b69fe90af6c5b84e/llvmlite-0.44.0.tar.gz", hash = "sha256:07667d66a5d150abed9157ab6c0b9393c9356f229784a4385c02f99e94fc94d4", size = 171880, upload-time = "2025-01-20T11:14:41.342Z" } +sdist = { url = "https://files.pythonhosted.org/packages/74/cd/08ae687ba099c7e3d21fe2ea536500563ef1943c5105bf6ab4ee3829f68e/llvmlite-0.46.0.tar.gz", hash = "sha256:227c9fd6d09dce2783c18b754b7cd9d9b3b3515210c46acc2d3c5badd9870ceb", size = 193456, upload-time = "2025-12-08T18:15:36.295Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/41/75/d4863ddfd8ab5f6e70f4504cf8cc37f4e986ec6910f4ef8502bb7d3c1c71/llvmlite-0.44.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:9fbadbfba8422123bab5535b293da1cf72f9f478a65645ecd73e781f962ca614", size = 28132306, upload-time = "2025-01-20T11:12:18.634Z" }, - { url = "https://files.pythonhosted.org/packages/37/d9/6e8943e1515d2f1003e8278819ec03e4e653e2eeb71e4d00de6cfe59424e/llvmlite-0.44.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cccf8eb28f24840f2689fb1a45f9c0f7e582dd24e088dcf96e424834af11f791", size = 26201096, upload-time = "2025-01-20T11:12:24.544Z" }, - { url = "https://files.pythonhosted.org/packages/aa/46/8ffbc114def88cc698906bf5acab54ca9fdf9214fe04aed0e71731fb3688/llvmlite-0.44.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7202b678cdf904823c764ee0fe2dfe38a76981f4c1e51715b4cb5abb6cf1d9e8", size = 42361859, upload-time = "2025-01-20T11:12:31.839Z" }, - { url = "https://files.pythonhosted.org/packages/30/1c/9366b29ab050a726af13ebaae8d0dff00c3c58562261c79c635ad4f5eb71/llvmlite-0.44.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:40526fb5e313d7b96bda4cbb2c85cd5374e04d80732dd36a282d72a560bb6408", size = 41184199, upload-time = "2025-01-20T11:12:40.049Z" }, - { url = "https://files.pythonhosted.org/packages/69/07/35e7c594b021ecb1938540f5bce543ddd8713cff97f71d81f021221edc1b/llvmlite-0.44.0-cp310-cp310-win_amd64.whl", hash = "sha256:41e3839150db4330e1b2716c0be3b5c4672525b4c9005e17c7597f835f351ce2", size = 30332381, upload-time = "2025-01-20T11:12:47.054Z" }, - { url = "https://files.pythonhosted.org/packages/b5/e2/86b245397052386595ad726f9742e5223d7aea999b18c518a50e96c3aca4/llvmlite-0.44.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:eed7d5f29136bda63b6d7804c279e2b72e08c952b7c5df61f45db408e0ee52f3", size = 28132305, upload-time = "2025-01-20T11:12:53.936Z" }, - { url = "https://files.pythonhosted.org/packages/ff/ec/506902dc6870249fbe2466d9cf66d531265d0f3a1157213c8f986250c033/llvmlite-0.44.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ace564d9fa44bb91eb6e6d8e7754977783c68e90a471ea7ce913bff30bd62427", size = 26201090, upload-time = "2025-01-20T11:12:59.847Z" }, - { url = "https://files.pythonhosted.org/packages/99/fe/d030f1849ebb1f394bb3f7adad5e729b634fb100515594aca25c354ffc62/llvmlite-0.44.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5d22c3bfc842668168a786af4205ec8e3ad29fb1bc03fd11fd48460d0df64c1", size = 42361858, upload-time = "2025-01-20T11:13:07.623Z" }, - { url = "https://files.pythonhosted.org/packages/d7/7a/ce6174664b9077fc673d172e4c888cb0b128e707e306bc33fff8c2035f0d/llvmlite-0.44.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f01a394e9c9b7b1d4e63c327b096d10f6f0ed149ef53d38a09b3749dcf8c9610", size = 41184200, upload-time = "2025-01-20T11:13:20.058Z" }, - { url = "https://files.pythonhosted.org/packages/5f/c6/258801143975a6d09a373f2641237992496e15567b907a4d401839d671b8/llvmlite-0.44.0-cp311-cp311-win_amd64.whl", hash = "sha256:d8489634d43c20cd0ad71330dde1d5bc7b9966937a263ff1ec1cebb90dc50955", size = 30331193, upload-time = "2025-01-20T11:13:26.976Z" }, - { url = "https://files.pythonhosted.org/packages/15/86/e3c3195b92e6e492458f16d233e58a1a812aa2bfbef9bdd0fbafcec85c60/llvmlite-0.44.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:1d671a56acf725bf1b531d5ef76b86660a5ab8ef19bb6a46064a705c6ca80aad", size = 28132297, upload-time = "2025-01-20T11:13:32.57Z" }, - { url = "https://files.pythonhosted.org/packages/d6/53/373b6b8be67b9221d12b24125fd0ec56b1078b660eeae266ec388a6ac9a0/llvmlite-0.44.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5f79a728e0435493611c9f405168682bb75ffd1fbe6fc360733b850c80a026db", size = 26201105, upload-time = "2025-01-20T11:13:38.744Z" }, - { url = "https://files.pythonhosted.org/packages/cb/da/8341fd3056419441286c8e26bf436923021005ece0bff5f41906476ae514/llvmlite-0.44.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0143a5ef336da14deaa8ec26c5449ad5b6a2b564df82fcef4be040b9cacfea9", size = 42361901, upload-time = "2025-01-20T11:13:46.711Z" }, - { url = "https://files.pythonhosted.org/packages/53/ad/d79349dc07b8a395a99153d7ce8b01d6fcdc9f8231355a5df55ded649b61/llvmlite-0.44.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d752f89e31b66db6f8da06df8b39f9b91e78c5feea1bf9e8c1fba1d1c24c065d", size = 41184247, upload-time = "2025-01-20T11:13:56.159Z" }, - { url = "https://files.pythonhosted.org/packages/e2/3b/a9a17366af80127bd09decbe2a54d8974b6d8b274b39bf47fbaedeec6307/llvmlite-0.44.0-cp312-cp312-win_amd64.whl", hash = "sha256:eae7e2d4ca8f88f89d315b48c6b741dcb925d6a1042da694aa16ab3dd4cbd3a1", size = 30332380, upload-time = "2025-01-20T11:14:02.442Z" }, - { url = "https://files.pythonhosted.org/packages/89/24/4c0ca705a717514c2092b18476e7a12c74d34d875e05e4d742618ebbf449/llvmlite-0.44.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:319bddd44e5f71ae2689859b7203080716448a3cd1128fb144fe5c055219d516", size = 28132306, upload-time = "2025-01-20T11:14:09.035Z" }, - { url = "https://files.pythonhosted.org/packages/01/cf/1dd5a60ba6aee7122ab9243fd614abcf22f36b0437cbbe1ccf1e3391461c/llvmlite-0.44.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c58867118bad04a0bb22a2e0068c693719658105e40009ffe95c7000fcde88e", size = 26201090, upload-time = "2025-01-20T11:14:15.401Z" }, - { url = "https://files.pythonhosted.org/packages/d2/1b/656f5a357de7135a3777bd735cc7c9b8f23b4d37465505bd0eaf4be9befe/llvmlite-0.44.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46224058b13c96af1365290bdfebe9a6264ae62fb79b2b55693deed11657a8bf", size = 42361904, upload-time = "2025-01-20T11:14:22.949Z" }, - { url = "https://files.pythonhosted.org/packages/d8/e1/12c5f20cb9168fb3464a34310411d5ad86e4163c8ff2d14a2b57e5cc6bac/llvmlite-0.44.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa0097052c32bf721a4efc03bd109d335dfa57d9bffb3d4c24cc680711b8b4fc", size = 41184245, upload-time = "2025-01-20T11:14:31.731Z" }, - { url = "https://files.pythonhosted.org/packages/d0/81/e66fc86539293282fd9cb7c9417438e897f369e79ffb62e1ae5e5154d4dd/llvmlite-0.44.0-cp313-cp313-win_amd64.whl", hash = "sha256:2fb7c4f2fb86cbae6dca3db9ab203eeea0e22d73b99bc2341cdf9de93612e930", size = 30331193, upload-time = "2025-01-20T11:14:38.578Z" }, + { url = "https://files.pythonhosted.org/packages/3d/a4/3959e1c61c5ca9db7921e5fd115b344c29b9d57a5dadd87bef97963ca1a5/llvmlite-0.46.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4323177e936d61ae0f73e653e2e614284d97d14d5dd12579adc92b6c2b0597b0", size = 37232766, upload-time = "2025-12-08T18:14:34.765Z" }, + { url = "https://files.pythonhosted.org/packages/c2/a5/a4d916f1015106e1da876028606a8e87fd5d5c840f98c87bc2d5153b6a2f/llvmlite-0.46.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0a2d461cb89537b7c20feb04c46c32e12d5ad4f0896c9dfc0f60336219ff248e", size = 56275176, upload-time = "2025-12-08T18:14:37.944Z" }, + { url = "https://files.pythonhosted.org/packages/79/7f/a7f2028805dac8c1a6fae7bda4e739b7ebbcd45b29e15bf6d21556fcd3d5/llvmlite-0.46.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b1f6595a35b7b39c3518b85a28bf18f45e075264e4b2dce3f0c2a4f232b4a910", size = 55128629, upload-time = "2025-12-08T18:14:41.674Z" }, + { url = "https://files.pythonhosted.org/packages/b2/bc/4689e1ba0c073c196b594471eb21be0aa51d9e64b911728aa13cd85ef0ae/llvmlite-0.46.0-cp310-cp310-win_amd64.whl", hash = "sha256:e7a34d4aa6f9a97ee006b504be6d2b8cb7f755b80ab2f344dda1ef992f828559", size = 38138651, upload-time = "2025-12-08T18:14:45.845Z" }, + { url = "https://files.pythonhosted.org/packages/7a/a1/2ad4b2367915faeebe8447f0a057861f646dbf5fbbb3561db42c65659cf3/llvmlite-0.46.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:82f3d39b16f19aa1a56d5fe625883a6ab600d5cc9ea8906cca70ce94cabba067", size = 37232766, upload-time = "2025-12-08T18:14:48.836Z" }, + { url = "https://files.pythonhosted.org/packages/12/b5/99cf8772fdd846c07da4fd70f07812a3c8fd17ea2409522c946bb0f2b277/llvmlite-0.46.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a3df43900119803bbc52720e758c76f316a9a0f34612a886862dfe0a5591a17e", size = 56275175, upload-time = "2025-12-08T18:14:51.604Z" }, + { url = "https://files.pythonhosted.org/packages/38/f2/ed806f9c003563732da156139c45d970ee435bd0bfa5ed8de87ba972b452/llvmlite-0.46.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:de183fefc8022d21b0aa37fc3e90410bc3524aed8617f0ff76732fc6c3af5361", size = 55128630, upload-time = "2025-12-08T18:14:55.107Z" }, + { url = "https://files.pythonhosted.org/packages/19/0c/8f5a37a65fc9b7b17408508145edd5f86263ad69c19d3574e818f533a0eb/llvmlite-0.46.0-cp311-cp311-win_amd64.whl", hash = "sha256:e8b10bc585c58bdffec9e0c309bb7d51be1f2f15e169a4b4d42f2389e431eb93", size = 38138652, upload-time = "2025-12-08T18:14:58.171Z" }, + { url = "https://files.pythonhosted.org/packages/2b/f8/4db016a5e547d4e054ff2f3b99203d63a497465f81ab78ec8eb2ff7b2304/llvmlite-0.46.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6b9588ad4c63b4f0175a3984b85494f0c927c6b001e3a246a3a7fb3920d9a137", size = 37232767, upload-time = "2025-12-08T18:15:00.737Z" }, + { url = "https://files.pythonhosted.org/packages/aa/85/4890a7c14b4fa54400945cb52ac3cd88545bbdb973c440f98ca41591cdc5/llvmlite-0.46.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3535bd2bb6a2d7ae4012681ac228e5132cdb75fefb1bcb24e33f2f3e0c865ed4", size = 56275176, upload-time = "2025-12-08T18:15:03.936Z" }, + { url = "https://files.pythonhosted.org/packages/6a/07/3d31d39c1a1a08cd5337e78299fca77e6aebc07c059fbd0033e3edfab45c/llvmlite-0.46.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4cbfd366e60ff87ea6cc62f50bc4cd800ebb13ed4c149466f50cf2163a473d1e", size = 55128630, upload-time = "2025-12-08T18:15:07.196Z" }, + { url = "https://files.pythonhosted.org/packages/2a/6b/d139535d7590a1bba1ceb68751bef22fadaa5b815bbdf0e858e3875726b2/llvmlite-0.46.0-cp312-cp312-win_amd64.whl", hash = "sha256:398b39db462c39563a97b912d4f2866cd37cba60537975a09679b28fbbc0fb38", size = 38138940, upload-time = "2025-12-08T18:15:10.162Z" }, + { url = "https://files.pythonhosted.org/packages/e6/ff/3eba7eb0aed4b6fca37125387cd417e8c458e750621fce56d2c541f67fa8/llvmlite-0.46.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:30b60892d034bc560e0ec6654737aaa74e5ca327bd8114d82136aa071d611172", size = 37232767, upload-time = "2025-12-08T18:15:13.22Z" }, + { url = "https://files.pythonhosted.org/packages/0e/54/737755c0a91558364b9200702c3c9c15d70ed63f9b98a2c32f1c2aa1f3ba/llvmlite-0.46.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6cc19b051753368a9c9f31dc041299059ee91aceec81bd57b0e385e5d5bf1a54", size = 56275176, upload-time = "2025-12-08T18:15:16.339Z" }, + { url = "https://files.pythonhosted.org/packages/e6/91/14f32e1d70905c1c0aa4e6609ab5d705c3183116ca02ac6df2091868413a/llvmlite-0.46.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bca185892908f9ede48c0acd547fe4dc1bafefb8a4967d47db6cf664f9332d12", size = 55128629, upload-time = "2025-12-08T18:15:19.493Z" }, + { url = "https://files.pythonhosted.org/packages/4a/a7/d526ae86708cea531935ae777b6dbcabe7db52718e6401e0fb9c5edea80e/llvmlite-0.46.0-cp313-cp313-win_amd64.whl", hash = "sha256:67438fd30e12349ebb054d86a5a1a57fd5e87d264d2451bcfafbbbaa25b82a35", size = 38138941, upload-time = "2025-12-08T18:15:22.536Z" }, + { url = "https://files.pythonhosted.org/packages/95/ae/af0ffb724814cc2ea64445acad05f71cff5f799bb7efb22e47ee99340dbc/llvmlite-0.46.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:d252edfb9f4ac1fcf20652258e3f102b26b03eef738dc8a6ffdab7d7d341d547", size = 37232768, upload-time = "2025-12-08T18:15:25.055Z" }, + { url = "https://files.pythonhosted.org/packages/c9/19/5018e5352019be753b7b07f7759cdabb69ca5779fea2494be8839270df4c/llvmlite-0.46.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:379fdd1c59badeff8982cb47e4694a6143bec3bb49aa10a466e095410522064d", size = 56275173, upload-time = "2025-12-08T18:15:28.109Z" }, + { url = "https://files.pythonhosted.org/packages/9f/c9/d57877759d707e84c082163c543853245f91b70c804115a5010532890f18/llvmlite-0.46.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2e8cbfff7f6db0fa2c771ad24154e2a7e457c2444d7673e6de06b8b698c3b269", size = 55128628, upload-time = "2025-12-08T18:15:31.098Z" }, + { url = "https://files.pythonhosted.org/packages/30/a8/e61a8c2b3cc7a597073d9cde1fcbb567e9d827f1db30c93cf80422eac70d/llvmlite-0.46.0-cp314-cp314-win_amd64.whl", hash = "sha256:7821eda3ec1f18050f981819756631d60b6d7ab1a6cf806d9efefbe3f4082d61", size = 39153056, upload-time = "2025-12-08T18:15:33.938Z" }, ] [[package]] @@ -3305,7 +3308,8 @@ dependencies = [ { name = "cycler" }, { name = "fonttools" }, { name = "kiwisolver" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "packaging" }, { name = "pillow" }, { name = "pyparsing" }, @@ -3429,47 +3433,47 @@ wheels = [ [[package]] name = "mlx" -version = "0.30.6" +version = "0.31.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "mlx-metal", marker = "sys_platform == 'darwin'" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/a0/2e/016527cf1012a68bb25f1ba3a73914f87807a7fee58d7a54fa69adcd2f55/mlx-0.30.6-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:6c4df52aebfac40563259c04fca4a0c4d05b2061e09cdaad24e4233baa560b4f", size = 573214, upload-time = "2026-02-06T03:45:00.344Z" }, - { url = "https://files.pythonhosted.org/packages/a4/8f/600c6bed6eb6574e4a9d15e7a20a2ec903c2c5b54e2fd782c592a00ff933/mlx-0.30.6-cp310-cp310-macosx_15_0_arm64.whl", hash = "sha256:0df8715b5cb84b6b6314aa868302873a0a94e63e6d195bc9858b8c58c79aa5a4", size = 573213, upload-time = "2026-02-06T03:45:02.208Z" }, - { url = "https://files.pythonhosted.org/packages/11/f7/d15af26c639c3d6000b6478fc0d54a7a528d71e79255190a0abc42f31608/mlx-0.30.6-cp310-cp310-macosx_26_0_arm64.whl", hash = "sha256:7b4742ec2b748d2406c884e364fcd6f89d7f2b3f834f7b65c4c07acfa139cae8", size = 573254, upload-time = "2026-02-06T03:45:03.575Z" }, - { url = "https://files.pythonhosted.org/packages/d4/c3/e4f1fda18068fe0d5213f67d94771f39e219a24072746a02ca70a3a6020f/mlx-0.30.6-cp310-cp310-manylinux_2_35_aarch64.whl", hash = "sha256:45c91ff34690b0d34063d1dc68a7a87f142ff9c5df6e5c611884a6bdcc9a53e1", size = 636558, upload-time = "2026-02-06T03:45:05.262Z" }, - { url = "https://files.pythonhosted.org/packages/70/c7/201e9e3ab3304aca99f850a0c1bc5d52e52e48960b0d415a196cd288faef/mlx-0.30.6-cp310-cp310-manylinux_2_35_x86_64.whl", hash = "sha256:b9b746fa0a44dfe1576925eb343ee9afa7023d3d805f84a3d90d0066096f31b8", size = 669479, upload-time = "2026-02-06T03:45:07.122Z" }, - { url = "https://files.pythonhosted.org/packages/93/81/21d745beeda53ee29e9c027d806f1e1cac983e8ddb3d6b18d44a1b30a11b/mlx-0.30.6-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:e721d29c4250ada3cba7a5ad43d358b42401600e792c378ed6b52c9d692aaba8", size = 573359, upload-time = "2026-02-06T03:45:08.41Z" }, - { url = "https://files.pythonhosted.org/packages/05/08/826286458df5ea91efc380d71fd8058ee7338207c6b547204f2758e168d8/mlx-0.30.6-cp311-cp311-macosx_15_0_arm64.whl", hash = "sha256:23f55c1c160a38ab350f4f7ce3ab10c490df39800ad35c4821c3ef5fa89ec24e", size = 573359, upload-time = "2026-02-06T03:45:09.688Z" }, - { url = "https://files.pythonhosted.org/packages/56/aa/3fc9ac795934182e680a0cbeb99202838e4548139cfd580015dcfbfb7ee8/mlx-0.30.6-cp311-cp311-macosx_26_0_arm64.whl", hash = "sha256:37c37571f8c1567c2b7e4871237b92a2b321fb8157d6426373be946c03e49ebd", size = 573406, upload-time = "2026-02-06T03:45:11.383Z" }, - { url = "https://files.pythonhosted.org/packages/af/d1/b8bcc332e3c268bf59632d7a8f1b5c8e6a4b154d651aa20b93e359e3c004/mlx-0.30.6-cp311-cp311-manylinux_2_35_aarch64.whl", hash = "sha256:253317a2bab3a1927d7cb89267690d82525acb5810f30d696ff9b705e7f8a78a", size = 636997, upload-time = "2026-02-06T03:45:12.619Z" }, - { url = "https://files.pythonhosted.org/packages/89/fa/bdc4b8aa6d078e724decb754b0f04ac1a25e46c190e52639906401c3b8b8/mlx-0.30.6-cp311-cp311-manylinux_2_35_x86_64.whl", hash = "sha256:4e2058ac219d99d38baa90f810947c6bfa09a28511dfe660629012a7c470c35d", size = 669638, upload-time = "2026-02-06T03:45:14.103Z" }, - { url = "https://files.pythonhosted.org/packages/85/fe/85acff870a9949494fd505b22c34d63eb127442f5f8751a159d3a78f7ef6/mlx-0.30.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:47d20016cb5733d06c1d017412a31983dbe3237cf70942760430188922ffc1ba", size = 573484, upload-time = "2026-02-06T03:45:15.88Z" }, - { url = "https://files.pythonhosted.org/packages/e1/14/5546082ee37118b33afb6300d8e07d03efea2dbba838d514d9465f87489b/mlx-0.30.6-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:6b8c133df2d6a2ed173d2b7bb50d7032a13be84e1792b7d79171ad8f50a8c0ea", size = 573486, upload-time = "2026-02-06T03:45:17.506Z" }, - { url = "https://files.pythonhosted.org/packages/ef/b5/ae04666a7b8bda74e2c6903756710103e283ea6fa4edd2c92449ad4547d6/mlx-0.30.6-cp312-cp312-macosx_26_0_arm64.whl", hash = "sha256:31eabb5d1da4ac7b16f2042fdb046b993cdf0f32bc3312e0af469232bb67720b", size = 573509, upload-time = "2026-02-06T03:45:18.68Z" }, - { url = "https://files.pythonhosted.org/packages/9f/8e/fdee70051e2c7f523f9b22575f05bdb1b47300aba1ecda15bda98a9b01c1/mlx-0.30.6-cp312-cp312-manylinux_2_35_aarch64.whl", hash = "sha256:070010932d424005e6c9c76b379ccdf4d96b385658fdb34dc780fa4eb24cb1a0", size = 622061, upload-time = "2026-02-06T03:45:19.984Z" }, - { url = "https://files.pythonhosted.org/packages/65/dd/fe29f1e19e5268a8f892c83be35f14e63f1aea3baf7e7e44e246d4fea184/mlx-0.30.6-cp312-cp312-manylinux_2_35_x86_64.whl", hash = "sha256:9084c8f20544ec6a53aa3edcd2da85d205e07ff80bd47151633219bd5cfcd23c", size = 663715, upload-time = "2026-02-06T03:45:21.873Z" }, - { url = "https://files.pythonhosted.org/packages/ae/5b/e460e144a34d5529e010056cccf50b538d56ed001473bc6b246018fd58cb/mlx-0.30.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:ed86f8bffc174c2f259ca589ea25464c96cf69d1bb457074a2bf2ef53737e54f", size = 573515, upload-time = "2026-02-06T03:45:23.405Z" }, - { url = "https://files.pythonhosted.org/packages/60/25/69833fefb9a3fef30b56792b1bcd022496c4fea83e45411d289b77ef7546/mlx-0.30.6-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:c52294958269e20f300639a17c1900ca8fc737d859ddda737f9811e94bd040e5", size = 573516, upload-time = "2026-02-06T03:45:24.618Z" }, - { url = "https://files.pythonhosted.org/packages/9c/6a/7e7fbeebc5cb51b6a5eba96b263a6298707bcbdc059f4b0b73e088bc3dea/mlx-0.30.6-cp313-cp313-macosx_26_0_arm64.whl", hash = "sha256:b5b6636f7c49a4d86d8ec82643b972f45a144a7a9f3a967b27b2e6e22cf71e6a", size = 573592, upload-time = "2026-02-06T03:45:25.928Z" }, - { url = "https://files.pythonhosted.org/packages/93/06/280f6f2ba80520a7109730425eda0d966658793aa0d02d8be8d351f75253/mlx-0.30.6-cp313-cp313-manylinux_2_35_aarch64.whl", hash = "sha256:67e6c9e30a9faeacc209917ef5523177cf9b086914b6b5d83ff886e4294b727d", size = 622011, upload-time = "2026-02-06T03:45:28.165Z" }, - { url = "https://files.pythonhosted.org/packages/fe/35/f872afbee9c079cc69924d9e9c46f5663adb7da58cba3511db082dd307c1/mlx-0.30.6-cp313-cp313-manylinux_2_35_x86_64.whl", hash = "sha256:47db8b16fcb6f6c5a47c0bdb24ed377b41237017ac93aa6cb6aa206c9bdf82e4", size = 663650, upload-time = "2026-02-06T03:45:30.315Z" }, - { url = "https://files.pythonhosted.org/packages/60/23/361dc7a5797634e4d7e9bdd6564c6b28f9b1246672632def2f91bf066b18/mlx-0.30.6-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:78804a89dcff4a838f7c2da72392fe87a523e95122a3c840e53df019122aad45", size = 575028, upload-time = "2026-02-06T03:45:31.549Z" }, - { url = "https://files.pythonhosted.org/packages/a8/69/1854484d414171586814dfbe8def95f75c4ea2c7341ba13ba8ee675f7c62/mlx-0.30.6-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:ec13584ab069665cc7ad34a05494d9291cd623aef6ae96be48875fc87cfc25d6", size = 575026, upload-time = "2026-02-06T03:45:33.072Z" }, - { url = "https://files.pythonhosted.org/packages/6b/b8/3adbc441924209a7e4c568308b2a0b54bd09aee6a68db5bae85304791e54/mlx-0.30.6-cp314-cp314-macosx_26_0_arm64.whl", hash = "sha256:b2c5e8a090a753ef99a1380a4d059c983083f36198864f6df9faaf1223d083df", size = 575041, upload-time = "2026-02-06T03:45:34.814Z" }, - { url = "https://files.pythonhosted.org/packages/3f/54/9d9e06804fb2088202a2cdf60458e00b221f71420bea285720b60f9e82b5/mlx-0.30.6-cp314-cp314-manylinux_2_35_aarch64.whl", hash = "sha256:9ceddede4af0de31d1f6b3099f70e5469d60cd7c546975dedbdbeab3519cab3f", size = 624002, upload-time = "2026-02-06T03:45:36Z" }, - { url = "https://files.pythonhosted.org/packages/42/92/3140a15a50cb1f9267a6552171e1dfa577861de53e093124bc43707f2a0e/mlx-0.30.6-cp314-cp314-manylinux_2_35_x86_64.whl", hash = "sha256:4a6ffd2d16728cf95f63a1b555d7c2eaeea686a0e6b73228bd265411cb5d77a4", size = 663569, upload-time = "2026-02-06T03:45:37.242Z" }, + { url = "https://files.pythonhosted.org/packages/73/54/269d13847b04b07523d44cf903e1d3c6d48f56e6e89dda7e16418b411629/mlx-0.31.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:38680838e0dd9a621ed4adc5a9ed8b94aeb6a4798142fbe215b821b8c6b8fc36", size = 575395, upload-time = "2026-02-27T23:49:11.886Z" }, + { url = "https://files.pythonhosted.org/packages/3d/86/1fbe1f8f3a23c92c821c235ab7a28395c86c900b0a2b2425f3c8862bbeb6/mlx-0.31.0-cp310-cp310-macosx_15_0_arm64.whl", hash = "sha256:7aded590bcf6839307c3acc899e196936991f97b499ddbdd0cd3b228bf10792f", size = 575394, upload-time = "2026-02-27T23:49:13.738Z" }, + { url = "https://files.pythonhosted.org/packages/20/01/02b79132e91182c779bb6c4f586c5fb86d49c32e8f07f307d2d4ca64cca6/mlx-0.31.0-cp310-cp310-macosx_26_0_arm64.whl", hash = "sha256:6e3ae83607b798b44cb3e44437095cfd26886fecc15f90f29f9eafd206d4d170", size = 575411, upload-time = "2026-02-27T23:49:15.374Z" }, + { url = "https://files.pythonhosted.org/packages/13/86/c501ddb496a185b69f3181d77276907f43a847eaa4d9fff86bc0616d1dcc/mlx-0.31.0-cp310-cp310-manylinux_2_35_aarch64.whl", hash = "sha256:b25f785c94eb47d8104604a5de0e7d749b801e7a40073cbf457aa94c372e5593", size = 639542, upload-time = "2026-02-27T23:49:16.822Z" }, + { url = "https://files.pythonhosted.org/packages/86/7c/508bfc140cf777dbe61fc2be0fbfca56e3f0ceed233cd7a8ef4add84262e/mlx-0.31.0-cp310-cp310-manylinux_2_35_x86_64.whl", hash = "sha256:6a4342027e6608ce69807a8f079c750a7c6161f543ebb49e55654edd03c178d6", size = 672721, upload-time = "2026-02-27T23:49:17.978Z" }, + { url = "https://files.pythonhosted.org/packages/1e/d3/fcb8b9f645ae70b3295a353999c3c6c7a66fd43ed8aa716b13da12bf40d4/mlx-0.31.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:285313eaeba425e58cbb3238c2d1a3894e6252d58f243ce56681d5419a568d6c", size = 575602, upload-time = "2026-02-27T23:49:19.314Z" }, + { url = "https://files.pythonhosted.org/packages/bd/2a/d35072e8dc31d9550f8218cfc388c1cd12c7fd89e8246540a9c7b873d958/mlx-0.31.0-cp311-cp311-macosx_15_0_arm64.whl", hash = "sha256:acf4f04ff33a80784a0f15c492166dc889e65659b41c410ca5a7c2d78bee2a3a", size = 575603, upload-time = "2026-02-27T23:49:20.651Z" }, + { url = "https://files.pythonhosted.org/packages/43/fa/eca64a514cd50a4a38cc9b8827db85d9e554c3fe407ede043d061055b1ab/mlx-0.31.0-cp311-cp311-macosx_26_0_arm64.whl", hash = "sha256:f624571e23a86654496c42a507b4bb42ded0edb91f33161fabafdbf6b81ba024", size = 575637, upload-time = "2026-02-27T23:49:22.02Z" }, + { url = "https://files.pythonhosted.org/packages/72/cd/0ee01b646010c7a22872d2b849b766941f813c4fd777602306d01af3915f/mlx-0.31.0-cp311-cp311-manylinux_2_35_aarch64.whl", hash = "sha256:5b5306a0934b15c4e3a1088a10066bdde3966c21b95006c63ecc38ca8e3891e0", size = 639267, upload-time = "2026-02-27T23:49:23.265Z" }, + { url = "https://files.pythonhosted.org/packages/73/50/c72e2cabdeefc2bf51ae5c1111bdaa9055a0c2d18bc87314ef965ffff422/mlx-0.31.0-cp311-cp311-manylinux_2_35_x86_64.whl", hash = "sha256:18078bc67dfb7ed602fca233d00ce93e23d590d9347da5009472455a92831066", size = 672858, upload-time = "2026-02-27T23:49:24.627Z" }, + { url = "https://files.pythonhosted.org/packages/1a/7d/87fb0daa006dbbbd8894c3d496c7d9dfc52e4ade260482276d3eca137a15/mlx-0.31.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:de6c0a3e8aa0e7d1365d46634fdbb3f835c164fbdb6ba8a239e039a4efa07fe2", size = 575834, upload-time = "2026-02-27T23:49:26.61Z" }, + { url = "https://files.pythonhosted.org/packages/d4/e3/aa0fac5a9d52b1a4686c7097e56775c1a96dee3084f9c587b74e4c2cd284/mlx-0.31.0-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:d6af01b15177da995336a6fd9878e7c5994720a9f1614d8f4d1dbe9293167c30", size = 575836, upload-time = "2026-02-27T23:49:28.505Z" }, + { url = "https://files.pythonhosted.org/packages/8d/15/6aa3edaa34aeef370634756b7d131b8dc1cdb0002ddecdd3d876b5f9fa0c/mlx-0.31.0-cp312-cp312-macosx_26_0_arm64.whl", hash = "sha256:1ad14ddc3a15818f5bba0de35e88559ed8dcb93ccff2ef879ff604d02d663b25", size = 575828, upload-time = "2026-02-27T23:49:29.684Z" }, + { url = "https://files.pythonhosted.org/packages/6d/d3/53ac650a569f5f5111c0280611acf0dcbdfa5fd0da2d433bad0f5575de73/mlx-0.31.0-cp312-cp312-manylinux_2_35_aarch64.whl", hash = "sha256:a80754ecf64191f71da1946dc5de6cf903344cc90dd286c589792ee9d3fc62f9", size = 624405, upload-time = "2026-02-27T23:49:31.687Z" }, + { url = "https://files.pythonhosted.org/packages/9c/fe/a0c0b73c04f7673a50c505e155dd0088cc7a116d7b8d4eb4d1d9fdcd2c8f/mlx-0.31.0-cp312-cp312-manylinux_2_35_x86_64.whl", hash = "sha256:363282eb094785f6aba27810ff89331c0f7829c6961f571cd0feaad09d2c809f", size = 666952, upload-time = "2026-02-27T23:49:33.262Z" }, + { url = "https://files.pythonhosted.org/packages/4a/09/35d1192cf1f655438213d8baa2264a8bc2426b44d93802dabfc177fd8e81/mlx-0.31.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4f33e9aafc6d3ad29e72743dfb786c4ce67397414f0a091469058626381fc1bc", size = 575815, upload-time = "2026-02-27T23:49:34.607Z" }, + { url = "https://files.pythonhosted.org/packages/59/9d/29e0cb154a31ed05c9d24c776513bf1ec506b8570e214b4563b55bb19ef6/mlx-0.31.0-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:242806b8ad6a4d3ce86cdff513f86520552de7592786712770b2e1ebd178816a", size = 575821, upload-time = "2026-02-27T23:49:35.947Z" }, + { url = "https://files.pythonhosted.org/packages/5f/6c/437aefdca17216aab02d0fb7528cd63e2c3d8d9c1b079c07d579a770645f/mlx-0.31.0-cp313-cp313-macosx_26_0_arm64.whl", hash = "sha256:7f0bdbac084017820ce513a12318771a06c7ec10fad159839e27c998bc5dad89", size = 575810, upload-time = "2026-02-27T23:49:37.165Z" }, + { url = "https://files.pythonhosted.org/packages/a6/d5/986777b53e2c3eff709ee5a275b41ed84a9c04f60071e97f9d3b60dec845/mlx-0.31.0-cp313-cp313-manylinux_2_35_aarch64.whl", hash = "sha256:8642dda2b23195d9921973749ae9bf764e2c7d70bfc0e60b23b6335e660cc610", size = 624713, upload-time = "2026-02-27T23:49:38.672Z" }, + { url = "https://files.pythonhosted.org/packages/2d/29/da0875739d08760461a5b21207c34d959bc7572b27e46ccc0f48badae078/mlx-0.31.0-cp313-cp313-manylinux_2_35_x86_64.whl", hash = "sha256:c6daa671cfa3c194951d742aa09030c5008d9d9657034b2903389fa090b3ba92", size = 666888, upload-time = "2026-02-27T23:49:40.222Z" }, + { url = "https://files.pythonhosted.org/packages/66/60/0152a44ed737c3b16e9044909d01212b99e216c6ab4b2f76faa054ae8172/mlx-0.31.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:cce3e15cf11c608c9e721502fe56e54f9f48b897e9b80f1204a48643d68710c0", size = 577579, upload-time = "2026-02-27T23:49:41.723Z" }, + { url = "https://files.pythonhosted.org/packages/e3/6b/70f0a254d7ace58a030547a99219f1342c3cf383029e1af90eee3efaeb85/mlx-0.31.0-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:ba330fe40d73b202880bbb5cac62de0b639cf4c44a12853bcadb34a9e3ffe880", size = 577582, upload-time = "2026-02-27T23:49:42.998Z" }, + { url = "https://files.pythonhosted.org/packages/63/5a/81cf057dbc005a43d27b7dfaff88198c61bbfe76cb8da3499821083c3fca/mlx-0.31.0-cp314-cp314-macosx_26_0_arm64.whl", hash = "sha256:d2014d113070846c6cdee980653f561c92a4a663a449f64e70c15bbf74d637e1", size = 577535, upload-time = "2026-02-27T23:49:44.475Z" }, + { url = "https://files.pythonhosted.org/packages/75/22/1b2bddb2774c7951aa620d286157439f288186215ff6ce18d9a9a45e608e/mlx-0.31.0-cp314-cp314-manylinux_2_35_aarch64.whl", hash = "sha256:994fab25ff521621e03001177a8f0f1a7bf8294ff340f89910ec074f9f681ed9", size = 627410, upload-time = "2026-02-27T23:49:45.654Z" }, + { url = "https://files.pythonhosted.org/packages/46/f4/e9256326912ac21a9853b3a9856da19292b908270ff96cb27abb8421c8c6/mlx-0.31.0-cp314-cp314-manylinux_2_35_x86_64.whl", hash = "sha256:c3bb9961f40d098659326b0edb96e2a16adecfaf3c1f2518cad5a0b7e55a3a5d", size = 667351, upload-time = "2026-02-27T23:49:46.868Z" }, ] [[package]] name = "mlx-metal" -version = "0.30.6" +version = "0.31.0" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f3/85/44406b521f920248fad621334d4dc15e77660a494edf890e7cbee33bf38d/mlx_metal-0.30.6-py3-none-macosx_14_0_arm64.whl", hash = "sha256:ea6d0c973def9a5b4f652cc77036237db3f88c9d0af63701d76b5fddde99b820", size = 38437818, upload-time = "2026-02-06T03:44:56.19Z" }, - { url = "https://files.pythonhosted.org/packages/d0/cb/10a516995f7d0c154b0d7e633c54b51e96977a86a355105b6474cfcbe0d0/mlx_metal-0.30.6-py3-none-macosx_15_0_arm64.whl", hash = "sha256:0f8cb94634d07e06a372d6ad9a090f38a18bab1ff19a140aede60eacf707bb94", size = 38433701, upload-time = "2026-02-06T03:44:59.678Z" }, - { url = "https://files.pythonhosted.org/packages/4c/7d/70cb272f7373c334709f210ed8420511fc9d64d05a7a646c0b3b94c29c04/mlx_metal-0.30.6-py3-none-macosx_26_0_arm64.whl", hash = "sha256:d761ae26304f2c4b454eeea7f612a56919d9e5e57dbb1dc0788f8e34aa6f41c2", size = 47718448, upload-time = "2026-02-06T03:45:03.133Z" }, + { url = "https://files.pythonhosted.org/packages/94/4f/0a0671dfa62b59bf429edab0e2c9c7f9bc77865aa4218cd46f2f41d7d11a/mlx_metal-0.31.0-py3-none-macosx_14_0_arm64.whl", hash = "sha256:1c572a6e3634a63060c103b0c38ac309e2d217be15519e3d8f0d6b452bb015f5", size = 38596752, upload-time = "2026-02-27T23:29:39.52Z" }, + { url = "https://files.pythonhosted.org/packages/8d/42/c6d7bfd097b777f932d6cf8c79e41b565070b63cc452a069b8804e505140/mlx_metal-0.31.0-py3-none-macosx_15_0_arm64.whl", hash = "sha256:554dc7cb29e0ea5fb6941df42f11a1de385b095848e6183c7a99d7c1f1a11f5d", size = 38595434, upload-time = "2026-02-27T23:29:43.285Z" }, + { url = "https://files.pythonhosted.org/packages/ed/8f/cdaffd759b4c71e74c294e773daacad8aafabac103b93e0aa56d4468d279/mlx_metal-0.31.0-py3-none-macosx_26_0_arm64.whl", hash = "sha256:7fd412f55ddf9f1d90c2cd86ce281d19e8eb93d093c6dbd784a49f8bd7d0a22c", size = 47879607, upload-time = "2026-02-27T23:29:46.571Z" }, ] [[package]] @@ -3481,9 +3485,10 @@ dependencies = [ { name = "mlx" }, { name = "more-itertools" }, { name = "numba" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "scipy", version = "1.17.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "tiktoken" }, { name = "torch" }, { name = "tqdm" }, @@ -3715,9 +3720,10 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "joblib" }, { name = "matplotlib" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "scipy", version = "1.17.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "tqdm" }, ] sdist = { url = "https://files.pythonhosted.org/packages/11/08/539e3cff148b7f9bde5b4b060451a7445d708fa3fe5d8a2bc0c552976e52/noisereduce-3.0.3.tar.gz", hash = "sha256:ff64a28fb92e3c81f153cf29550e5c2db56b2523afa8f56f5e03c177cc5e918f", size = 20968, upload-time = "2024-10-06T13:43:45.431Z" } @@ -3727,40 +3733,44 @@ wheels = [ [[package]] name = "numba" -version = "0.61.2" +version = "0.64.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "llvmlite" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/1c/a0/e21f57604304aa03ebb8e098429222722ad99176a4f979d34af1d1ee80da/numba-0.61.2.tar.gz", hash = "sha256:8750ee147940a6637b80ecf7f95062185ad8726c8c28a2295b8ec1160a196f7d", size = 2820615, upload-time = "2025-04-09T02:58:07.659Z" } +sdist = { url = "https://files.pythonhosted.org/packages/23/c9/a0fb41787d01d621046138da30f6c2100d80857bf34b3390dd68040f27a3/numba-0.64.0.tar.gz", hash = "sha256:95e7300af648baa3308127b1955b52ce6d11889d16e8cfe637b4f85d2fca52b1", size = 2765679, upload-time = "2026-02-18T18:41:20.974Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/eb/ca/f470be59552ccbf9531d2d383b67ae0b9b524d435fb4a0d229fef135116e/numba-0.61.2-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:cf9f9fc00d6eca0c23fc840817ce9f439b9f03c8f03d6246c0e7f0cb15b7162a", size = 2775663, upload-time = "2025-04-09T02:57:34.143Z" }, - { url = "https://files.pythonhosted.org/packages/f5/13/3bdf52609c80d460a3b4acfb9fdb3817e392875c0d6270cf3fd9546f138b/numba-0.61.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ea0247617edcb5dd61f6106a56255baab031acc4257bddaeddb3a1003b4ca3fd", size = 2778344, upload-time = "2025-04-09T02:57:36.609Z" }, - { url = "https://files.pythonhosted.org/packages/e2/7d/bfb2805bcfbd479f04f835241ecf28519f6e3609912e3a985aed45e21370/numba-0.61.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ae8c7a522c26215d5f62ebec436e3d341f7f590079245a2f1008dfd498cc1642", size = 3824054, upload-time = "2025-04-09T02:57:38.162Z" }, - { url = "https://files.pythonhosted.org/packages/e3/27/797b2004745c92955470c73c82f0e300cf033c791f45bdecb4b33b12bdea/numba-0.61.2-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bd1e74609855aa43661edffca37346e4e8462f6903889917e9f41db40907daa2", size = 3518531, upload-time = "2025-04-09T02:57:39.709Z" }, - { url = "https://files.pythonhosted.org/packages/b1/c6/c2fb11e50482cb310afae87a997707f6c7d8a48967b9696271347441f650/numba-0.61.2-cp310-cp310-win_amd64.whl", hash = "sha256:ae45830b129c6137294093b269ef0a22998ccc27bf7cf096ab8dcf7bca8946f9", size = 2831612, upload-time = "2025-04-09T02:57:41.559Z" }, - { url = "https://files.pythonhosted.org/packages/3f/97/c99d1056aed767503c228f7099dc11c402906b42a4757fec2819329abb98/numba-0.61.2-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:efd3db391df53aaa5cfbee189b6c910a5b471488749fd6606c3f33fc984c2ae2", size = 2775825, upload-time = "2025-04-09T02:57:43.442Z" }, - { url = "https://files.pythonhosted.org/packages/95/9e/63c549f37136e892f006260c3e2613d09d5120672378191f2dc387ba65a2/numba-0.61.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:49c980e4171948ffebf6b9a2520ea81feed113c1f4890747ba7f59e74be84b1b", size = 2778695, upload-time = "2025-04-09T02:57:44.968Z" }, - { url = "https://files.pythonhosted.org/packages/97/c8/8740616c8436c86c1b9a62e72cb891177d2c34c2d24ddcde4c390371bf4c/numba-0.61.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3945615cd73c2c7eba2a85ccc9c1730c21cd3958bfcf5a44302abae0fb07bb60", size = 3829227, upload-time = "2025-04-09T02:57:46.63Z" }, - { url = "https://files.pythonhosted.org/packages/fc/06/66e99ae06507c31d15ff3ecd1f108f2f59e18b6e08662cd5f8a5853fbd18/numba-0.61.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:bbfdf4eca202cebade0b7d43896978e146f39398909a42941c9303f82f403a18", size = 3523422, upload-time = "2025-04-09T02:57:48.222Z" }, - { url = "https://files.pythonhosted.org/packages/0f/a4/2b309a6a9f6d4d8cfba583401c7c2f9ff887adb5d54d8e2e130274c0973f/numba-0.61.2-cp311-cp311-win_amd64.whl", hash = "sha256:76bcec9f46259cedf888041b9886e257ae101c6268261b19fda8cfbc52bec9d1", size = 2831505, upload-time = "2025-04-09T02:57:50.108Z" }, - { url = "https://files.pythonhosted.org/packages/b4/a0/c6b7b9c615cfa3b98c4c63f4316e3f6b3bbe2387740277006551784218cd/numba-0.61.2-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:34fba9406078bac7ab052efbf0d13939426c753ad72946baaa5bf9ae0ebb8dd2", size = 2776626, upload-time = "2025-04-09T02:57:51.857Z" }, - { url = "https://files.pythonhosted.org/packages/92/4a/fe4e3c2ecad72d88f5f8cd04e7f7cff49e718398a2fac02d2947480a00ca/numba-0.61.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4ddce10009bc097b080fc96876d14c051cc0c7679e99de3e0af59014dab7dfe8", size = 2779287, upload-time = "2025-04-09T02:57:53.658Z" }, - { url = "https://files.pythonhosted.org/packages/9a/2d/e518df036feab381c23a624dac47f8445ac55686ec7f11083655eb707da3/numba-0.61.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b1bb509d01f23d70325d3a5a0e237cbc9544dd50e50588bc581ba860c213546", size = 3885928, upload-time = "2025-04-09T02:57:55.206Z" }, - { url = "https://files.pythonhosted.org/packages/10/0f/23cced68ead67b75d77cfcca3df4991d1855c897ee0ff3fe25a56ed82108/numba-0.61.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:48a53a3de8f8793526cbe330f2a39fe9a6638efcbf11bd63f3d2f9757ae345cd", size = 3577115, upload-time = "2025-04-09T02:57:56.818Z" }, - { url = "https://files.pythonhosted.org/packages/68/1d/ddb3e704c5a8fb90142bf9dc195c27db02a08a99f037395503bfbc1d14b3/numba-0.61.2-cp312-cp312-win_amd64.whl", hash = "sha256:97cf4f12c728cf77c9c1d7c23707e4d8fb4632b46275f8f3397de33e5877af18", size = 2831929, upload-time = "2025-04-09T02:57:58.45Z" }, - { url = "https://files.pythonhosted.org/packages/0b/f3/0fe4c1b1f2569e8a18ad90c159298d862f96c3964392a20d74fc628aee44/numba-0.61.2-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:3a10a8fc9afac40b1eac55717cece1b8b1ac0b946f5065c89e00bde646b5b154", size = 2771785, upload-time = "2025-04-09T02:57:59.96Z" }, - { url = "https://files.pythonhosted.org/packages/e9/71/91b277d712e46bd5059f8a5866862ed1116091a7cb03bd2704ba8ebe015f/numba-0.61.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7d3bcada3c9afba3bed413fba45845f2fb9cd0d2b27dd58a1be90257e293d140", size = 2773289, upload-time = "2025-04-09T02:58:01.435Z" }, - { url = "https://files.pythonhosted.org/packages/0d/e0/5ea04e7ad2c39288c0f0f9e8d47638ad70f28e275d092733b5817cf243c9/numba-0.61.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bdbca73ad81fa196bd53dc12e3aaf1564ae036e0c125f237c7644fe64a4928ab", size = 3893918, upload-time = "2025-04-09T02:58:02.933Z" }, - { url = "https://files.pythonhosted.org/packages/17/58/064f4dcb7d7e9412f16ecf80ed753f92297e39f399c905389688cf950b81/numba-0.61.2-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:5f154aaea625fb32cfbe3b80c5456d514d416fcdf79733dd69c0df3a11348e9e", size = 3584056, upload-time = "2025-04-09T02:58:04.538Z" }, - { url = "https://files.pythonhosted.org/packages/af/a4/6d3a0f2d3989e62a18749e1e9913d5fa4910bbb3e3311a035baea6caf26d/numba-0.61.2-cp313-cp313-win_amd64.whl", hash = "sha256:59321215e2e0ac5fa928a8020ab00b8e57cda8a97384963ac0dfa4d4e6aa54e7", size = 2831846, upload-time = "2025-04-09T02:58:06.125Z" }, + { url = "https://files.pythonhosted.org/packages/4c/5e/604fed821cd7e3426bb3bc99a7ed6ac0bcb489f4cd93052256437d082f95/numba-0.64.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cc09b79440952e3098eeebea4bf6e8d2355fb7f12734fcd9fc5039f0dca90727", size = 2683250, upload-time = "2026-02-18T18:40:45.829Z" }, + { url = "https://files.pythonhosted.org/packages/4f/9f/9275a723d050b5f1a9b1c7fb7dbfce324fef301a8e50c5f88338569db06c/numba-0.64.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1afe3a80b8c2f376b211fb7a49e536ef9eafc92436afc95a2f41ea5392f8cc65", size = 3742168, upload-time = "2026-02-18T18:40:48.066Z" }, + { url = "https://files.pythonhosted.org/packages/e2/d1/97ca7dddaa36b16f4c46319bdb6b4913ba15d0245317d0d8ccde7b2d7d92/numba-0.64.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:23804194b93b8cd416c6444b5fbc4956082a45fed2d25436ef49c594666e7f7e", size = 3449103, upload-time = "2026-02-18T18:40:49.905Z" }, + { url = "https://files.pythonhosted.org/packages/52/0a/b9e137ad78415373e3353564500e8bf29dbce3c0d73633bb384d4e5d7537/numba-0.64.0-cp310-cp310-win_amd64.whl", hash = "sha256:e2a9fe998bb2cf848960b34db02c2c3b5e02cf82c07a26d9eef3494069740278", size = 2749950, upload-time = "2026-02-18T18:40:51.536Z" }, + { url = "https://files.pythonhosted.org/packages/89/a3/1a4286a1c16136c8896d8e2090d950e79b3ec626d3a8dc9620f6234d5a38/numba-0.64.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:766156ee4b8afeeb2b2e23c81307c5d19031f18d5ce76ae2c5fb1429e72fa92b", size = 2682938, upload-time = "2026-02-18T18:40:52.897Z" }, + { url = "https://files.pythonhosted.org/packages/19/16/aa6e3ba3cd45435c117d1101b278b646444ed05b7c712af631b91353f573/numba-0.64.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d17071b4ffc9d39b75d8e6c101a36f0c81b646123859898c9799cb31807c8f78", size = 3747376, upload-time = "2026-02-18T18:40:54.925Z" }, + { url = "https://files.pythonhosted.org/packages/c0/f1/dd2f25e18d75fdf897f730b78c5a7b00cc4450f2405564dbebfaf359f21f/numba-0.64.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4ead5630434133bac87fa67526eacb264535e4e9a2d5ec780e0b4fc381a7d275", size = 3453292, upload-time = "2026-02-18T18:40:56.818Z" }, + { url = "https://files.pythonhosted.org/packages/31/29/e09d5630578a50a2b3fa154990b6b839cf95327aa0709e2d50d0b6816cd1/numba-0.64.0-cp311-cp311-win_amd64.whl", hash = "sha256:f2b1fd93e7aaac07d6fbaed059c00679f591f2423885c206d8c1b55d65ca3f2d", size = 2749824, upload-time = "2026-02-18T18:40:58.392Z" }, + { url = "https://files.pythonhosted.org/packages/70/a6/9fc52cb4f0d5e6d8b5f4d81615bc01012e3cf24e1052a60f17a68deb8092/numba-0.64.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:69440a8e8bc1a81028446f06b363e28635aa67bd51b1e498023f03b812e0ce68", size = 2683418, upload-time = "2026-02-18T18:40:59.886Z" }, + { url = "https://files.pythonhosted.org/packages/9b/89/1a74ea99b180b7a5587b0301ed1b183a2937c4b4b67f7994689b5d36fc34/numba-0.64.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f13721011f693ba558b8dd4e4db7f2640462bba1b855bdc804be45bbeb55031a", size = 3804087, upload-time = "2026-02-18T18:41:01.699Z" }, + { url = "https://files.pythonhosted.org/packages/91/e1/583c647404b15f807410510fec1eb9b80cb8474165940b7749f026f21cbc/numba-0.64.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e0b180b1133f2b5d8b3f09d96b6d7a9e51a7da5dda3c09e998b5bcfac85d222c", size = 3504309, upload-time = "2026-02-18T18:41:03.252Z" }, + { url = "https://files.pythonhosted.org/packages/85/23/0fce5789b8a5035e7ace21216a468143f3144e02013252116616c58339aa/numba-0.64.0-cp312-cp312-win_amd64.whl", hash = "sha256:e63dc94023b47894849b8b106db28ccb98b49d5498b98878fac1a38f83ac007a", size = 2752740, upload-time = "2026-02-18T18:41:05.097Z" }, + { url = "https://files.pythonhosted.org/packages/52/80/2734de90f9300a6e2503b35ee50d9599926b90cbb7ac54f9e40074cd07f1/numba-0.64.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:3bab2c872194dcd985f1153b70782ec0fbbe348fffef340264eacd3a76d59fd6", size = 2683392, upload-time = "2026-02-18T18:41:06.563Z" }, + { url = "https://files.pythonhosted.org/packages/42/e8/14b5853ebefd5b37723ef365c5318a30ce0702d39057eaa8d7d76392859d/numba-0.64.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:703a246c60832cad231d2e73c1182f25bf3cc8b699759ec8fe58a2dbc689a70c", size = 3812245, upload-time = "2026-02-18T18:41:07.963Z" }, + { url = "https://files.pythonhosted.org/packages/8a/a2/f60dc6c96d19b7185144265a5fbf01c14993d37ff4cd324b09d0212aa7ce/numba-0.64.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7e2e49a7900ee971d32af7609adc0cfe6aa7477c6f6cccdf6d8138538cf7756f", size = 3511328, upload-time = "2026-02-18T18:41:09.504Z" }, + { url = "https://files.pythonhosted.org/packages/9c/2a/fe7003ea7e7237ee7014f8eaeeb7b0d228a2db22572ca85bab2648cf52cb/numba-0.64.0-cp313-cp313-win_amd64.whl", hash = "sha256:396f43c3f77e78d7ec84cdfc6b04969c78f8f169351b3c4db814b97e7acf4245", size = 2752668, upload-time = "2026-02-18T18:41:11.455Z" }, + { url = "https://files.pythonhosted.org/packages/3d/8a/77d26afe0988c592dd97cb8d4e80bfb3dfc7dbdacfca7d74a7c5c81dd8c2/numba-0.64.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:f565d55eaeff382cbc86c63c8c610347453af3d1e7afb2b6569aac1c9b5c93ce", size = 2683590, upload-time = "2026-02-18T18:41:12.897Z" }, + { url = "https://files.pythonhosted.org/packages/8e/4b/600b8b7cdbc7f9cebee9ea3d13bb70052a79baf28944024ffcb59f0712e3/numba-0.64.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9b55169b18892c783f85e9ad9e6f5297a6d12967e4414e6b71361086025ff0bb", size = 3781163, upload-time = "2026-02-18T18:41:15.377Z" }, + { url = "https://files.pythonhosted.org/packages/ff/73/53f2d32bfa45b7175e9944f6b816d8c32840178c3eee9325033db5bf838e/numba-0.64.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:196bcafa02c9dd1707e068434f6d5cedde0feb787e3432f7f1f0e993cc336c4c", size = 3481172, upload-time = "2026-02-18T18:41:17.281Z" }, + { url = "https://files.pythonhosted.org/packages/b5/00/aebd2f7f1e11e38814bb96e95a27580817a7b340608d3ac085fdbab83174/numba-0.64.0-cp314-cp314-win_amd64.whl", hash = "sha256:213e9acbe7f1c05090592e79020315c1749dd52517b90e94c517dca3f014d4a1", size = 2754700, upload-time = "2026-02-18T18:41:19.277Z" }, ] [[package]] name = "numpy" version = "2.2.6" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440, upload-time = "2025-05-17T22:38:04.611Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/9a/3e/ed6db5be21ce87955c0cbd3009f2803f59fa08df21b5df06862e2d8e2bdd/numpy-2.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb", size = 21165245, upload-time = "2025-05-17T21:27:58.555Z" }, @@ -3819,6 +3829,91 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/37/48/ac2a9584402fb6c0cd5b5d1a91dcf176b15760130dd386bbafdbfe3640bf/numpy-2.2.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d042d24c90c41b54fd506da306759e06e568864df8ec17ccc17e9e884634fd00", size = 12812666, upload-time = "2025-05-17T21:45:31.426Z" }, ] +[[package]] +name = "numpy" +version = "2.4.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.14'", + "python_full_version == '3.13.*'", + "python_full_version == '3.12.*'", + "python_full_version == '3.11.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/57/fd/0005efbd0af48e55eb3c7208af93f2862d4b1a56cd78e84309a2d959208d/numpy-2.4.2.tar.gz", hash = "sha256:659a6107e31a83c4e33f763942275fd278b21d095094044eb35569e86a21ddae", size = 20723651, upload-time = "2026-01-31T23:13:10.135Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d3/44/71852273146957899753e69986246d6a176061ea183407e95418c2aa4d9a/numpy-2.4.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e7e88598032542bd49af7c4747541422884219056c268823ef6e5e89851c8825", size = 16955478, upload-time = "2026-01-31T23:10:25.623Z" }, + { url = "https://files.pythonhosted.org/packages/74/41/5d17d4058bd0cd96bcbd4d9ff0fb2e21f52702aab9a72e4a594efa18692f/numpy-2.4.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7edc794af8b36ca37ef5fcb5e0d128c7e0595c7b96a2318d1badb6fcd8ee86b1", size = 14965467, upload-time = "2026-01-31T23:10:28.186Z" }, + { url = "https://files.pythonhosted.org/packages/49/48/fb1ce8136c19452ed15f033f8aee91d5defe515094e330ce368a0647846f/numpy-2.4.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:6e9f61981ace1360e42737e2bae58b27bf28a1b27e781721047d84bd754d32e7", size = 5475172, upload-time = "2026-01-31T23:10:30.848Z" }, + { url = "https://files.pythonhosted.org/packages/40/a9/3feb49f17bbd1300dd2570432961f5c8a4ffeff1db6f02c7273bd020a4c9/numpy-2.4.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:cb7bbb88aa74908950d979eeaa24dbdf1a865e3c7e45ff0121d8f70387b55f73", size = 6805145, upload-time = "2026-01-31T23:10:32.352Z" }, + { url = "https://files.pythonhosted.org/packages/3f/39/fdf35cbd6d6e2fcad42fcf85ac04a85a0d0fbfbf34b30721c98d602fd70a/numpy-2.4.2-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4f069069931240b3fc703f1e23df63443dbd6390614c8c44a87d96cd0ec81eb1", size = 15966084, upload-time = "2026-01-31T23:10:34.502Z" }, + { url = "https://files.pythonhosted.org/packages/1b/46/6fa4ea94f1ddf969b2ee941290cca6f1bfac92b53c76ae5f44afe17ceb69/numpy-2.4.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c02ef4401a506fb60b411467ad501e1429a3487abca4664871d9ae0b46c8ba32", size = 16899477, upload-time = "2026-01-31T23:10:37.075Z" }, + { url = "https://files.pythonhosted.org/packages/09/a1/2a424e162b1a14a5bd860a464ab4e07513916a64ab1683fae262f735ccd2/numpy-2.4.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2653de5c24910e49c2b106499803124dde62a5a1fe0eedeaecf4309a5f639390", size = 17323429, upload-time = "2026-01-31T23:10:39.704Z" }, + { url = "https://files.pythonhosted.org/packages/ce/a2/73014149ff250628df72c58204822ac01d768697913881aacf839ff78680/numpy-2.4.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1ae241bbfc6ae276f94a170b14785e561cb5e7f626b6688cf076af4110887413", size = 18635109, upload-time = "2026-01-31T23:10:41.924Z" }, + { url = "https://files.pythonhosted.org/packages/6c/0c/73e8be2f1accd56df74abc1c5e18527822067dced5ec0861b5bb882c2ce0/numpy-2.4.2-cp311-cp311-win32.whl", hash = "sha256:df1b10187212b198dd45fa943d8985a3c8cf854aed4923796e0e019e113a1bda", size = 6237915, upload-time = "2026-01-31T23:10:45.26Z" }, + { url = "https://files.pythonhosted.org/packages/76/ae/e0265e0163cf127c24c3969d29f1c4c64551a1e375d95a13d32eab25d364/numpy-2.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:b9c618d56a29c9cb1c4da979e9899be7578d2e0b3c24d52079c166324c9e8695", size = 12607972, upload-time = "2026-01-31T23:10:47.021Z" }, + { url = "https://files.pythonhosted.org/packages/29/a5/c43029af9b8014d6ea157f192652c50042e8911f4300f8f6ed3336bf437f/numpy-2.4.2-cp311-cp311-win_arm64.whl", hash = "sha256:47c5a6ed21d9452b10227e5e8a0e1c22979811cad7dcc19d8e3e2fb8fa03f1a3", size = 10485763, upload-time = "2026-01-31T23:10:50.087Z" }, + { url = "https://files.pythonhosted.org/packages/51/6e/6f394c9c77668153e14d4da83bcc247beb5952f6ead7699a1a2992613bea/numpy-2.4.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:21982668592194c609de53ba4933a7471880ccbaadcc52352694a59ecc860b3a", size = 16667963, upload-time = "2026-01-31T23:10:52.147Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f8/55483431f2b2fd015ae6ed4fe62288823ce908437ed49db5a03d15151678/numpy-2.4.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40397bda92382fcec844066efb11f13e1c9a3e2a8e8f318fb72ed8b6db9f60f1", size = 14693571, upload-time = "2026-01-31T23:10:54.789Z" }, + { url = "https://files.pythonhosted.org/packages/2f/20/18026832b1845cdc82248208dd929ca14c9d8f2bac391f67440707fff27c/numpy-2.4.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:b3a24467af63c67829bfaa61eecf18d5432d4f11992688537be59ecd6ad32f5e", size = 5203469, upload-time = "2026-01-31T23:10:57.343Z" }, + { url = "https://files.pythonhosted.org/packages/7d/33/2eb97c8a77daaba34eaa3fa7241a14ac5f51c46a6bd5911361b644c4a1e2/numpy-2.4.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:805cc8de9fd6e7a22da5aed858e0ab16be5a4db6c873dde1d7451c541553aa27", size = 6550820, upload-time = "2026-01-31T23:10:59.429Z" }, + { url = "https://files.pythonhosted.org/packages/b1/91/b97fdfd12dc75b02c44e26c6638241cc004d4079a0321a69c62f51470c4c/numpy-2.4.2-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d82351358ffbcdcd7b686b90742a9b86632d6c1c051016484fa0b326a0a1548", size = 15663067, upload-time = "2026-01-31T23:11:01.291Z" }, + { url = "https://files.pythonhosted.org/packages/f5/c6/a18e59f3f0b8071cc85cbc8d80cd02d68aa9710170b2553a117203d46936/numpy-2.4.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e35d3e0144137d9fdae62912e869136164534d64a169f86438bc9561b6ad49f", size = 16619782, upload-time = "2026-01-31T23:11:03.669Z" }, + { url = "https://files.pythonhosted.org/packages/b7/83/9751502164601a79e18847309f5ceec0b1446d7b6aa12305759b72cf98b2/numpy-2.4.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:adb6ed2ad29b9e15321d167d152ee909ec73395901b70936f029c3bc6d7f4460", size = 17013128, upload-time = "2026-01-31T23:11:05.913Z" }, + { url = "https://files.pythonhosted.org/packages/61/c4/c4066322256ec740acc1c8923a10047818691d2f8aec254798f3dd90f5f2/numpy-2.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8906e71fd8afcb76580404e2a950caef2685df3d2a57fe82a86ac8d33cc007ba", size = 18345324, upload-time = "2026-01-31T23:11:08.248Z" }, + { url = "https://files.pythonhosted.org/packages/ab/af/6157aa6da728fa4525a755bfad486ae7e3f76d4c1864138003eb84328497/numpy-2.4.2-cp312-cp312-win32.whl", hash = "sha256:ec055f6dae239a6299cace477b479cca2fc125c5675482daf1dd886933a1076f", size = 5960282, upload-time = "2026-01-31T23:11:10.497Z" }, + { url = "https://files.pythonhosted.org/packages/92/0f/7ceaaeaacb40567071e94dbf2c9480c0ae453d5bb4f52bea3892c39dc83c/numpy-2.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:209fae046e62d0ce6435fcfe3b1a10537e858249b3d9b05829e2a05218296a85", size = 12314210, upload-time = "2026-01-31T23:11:12.176Z" }, + { url = "https://files.pythonhosted.org/packages/2f/a3/56c5c604fae6dd40fa2ed3040d005fca97e91bd320d232ac9931d77ba13c/numpy-2.4.2-cp312-cp312-win_arm64.whl", hash = "sha256:fbde1b0c6e81d56f5dccd95dd4a711d9b95df1ae4009a60887e56b27e8d903fa", size = 10220171, upload-time = "2026-01-31T23:11:14.684Z" }, + { url = "https://files.pythonhosted.org/packages/a1/22/815b9fe25d1d7ae7d492152adbc7226d3eff731dffc38fe970589fcaaa38/numpy-2.4.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:25f2059807faea4b077a2b6837391b5d830864b3543627f381821c646f31a63c", size = 16663696, upload-time = "2026-01-31T23:11:17.516Z" }, + { url = "https://files.pythonhosted.org/packages/09/f0/817d03a03f93ba9c6c8993de509277d84e69f9453601915e4a69554102a1/numpy-2.4.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bd3a7a9f5847d2fb8c2c6d1c862fa109c31a9abeca1a3c2bd5a64572955b2979", size = 14688322, upload-time = "2026-01-31T23:11:19.883Z" }, + { url = "https://files.pythonhosted.org/packages/da/b4/f805ab79293c728b9a99438775ce51885fd4f31b76178767cfc718701a39/numpy-2.4.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:8e4549f8a3c6d13d55041925e912bfd834285ef1dd64d6bc7d542583355e2e98", size = 5198157, upload-time = "2026-01-31T23:11:22.375Z" }, + { url = "https://files.pythonhosted.org/packages/74/09/826e4289844eccdcd64aac27d13b0fd3f32039915dd5b9ba01baae1f436c/numpy-2.4.2-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:aea4f66ff44dfddf8c2cffd66ba6538c5ec67d389285292fe428cb2c738c8aef", size = 6546330, upload-time = "2026-01-31T23:11:23.958Z" }, + { url = "https://files.pythonhosted.org/packages/19/fb/cbfdbfa3057a10aea5422c558ac57538e6acc87ec1669e666d32ac198da7/numpy-2.4.2-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3cd545784805de05aafe1dde61752ea49a359ccba9760c1e5d1c88a93bbf2b7", size = 15660968, upload-time = "2026-01-31T23:11:25.713Z" }, + { url = "https://files.pythonhosted.org/packages/04/dc/46066ce18d01645541f0186877377b9371b8fa8017fa8262002b4ef22612/numpy-2.4.2-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d0d9b7c93578baafcbc5f0b83eaf17b79d345c6f36917ba0c67f45226911d499", size = 16607311, upload-time = "2026-01-31T23:11:28.117Z" }, + { url = "https://files.pythonhosted.org/packages/14/d9/4b5adfc39a43fa6bf918c6d544bc60c05236cc2f6339847fc5b35e6cb5b0/numpy-2.4.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f74f0f7779cc7ae07d1810aab8ac6b1464c3eafb9e283a40da7309d5e6e48fbb", size = 17012850, upload-time = "2026-01-31T23:11:30.888Z" }, + { url = "https://files.pythonhosted.org/packages/b7/20/adb6e6adde6d0130046e6fdfb7675cc62bc2f6b7b02239a09eb58435753d/numpy-2.4.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c7ac672d699bf36275c035e16b65539931347d68b70667d28984c9fb34e07fa7", size = 18334210, upload-time = "2026-01-31T23:11:33.214Z" }, + { url = "https://files.pythonhosted.org/packages/78/0e/0a73b3dff26803a8c02baa76398015ea2a5434d9b8265a7898a6028c1591/numpy-2.4.2-cp313-cp313-win32.whl", hash = "sha256:8e9afaeb0beff068b4d9cd20d322ba0ee1cecfb0b08db145e4ab4dd44a6b5110", size = 5958199, upload-time = "2026-01-31T23:11:35.385Z" }, + { url = "https://files.pythonhosted.org/packages/43/bc/6352f343522fcb2c04dbaf94cb30cca6fd32c1a750c06ad6231b4293708c/numpy-2.4.2-cp313-cp313-win_amd64.whl", hash = "sha256:7df2de1e4fba69a51c06c28f5a3de36731eb9639feb8e1cf7e4a7b0daf4cf622", size = 12310848, upload-time = "2026-01-31T23:11:38.001Z" }, + { url = "https://files.pythonhosted.org/packages/6e/8d/6da186483e308da5da1cc6918ce913dcfe14ffde98e710bfeff2a6158d4e/numpy-2.4.2-cp313-cp313-win_arm64.whl", hash = "sha256:0fece1d1f0a89c16b03442eae5c56dc0be0c7883b5d388e0c03f53019a4bfd71", size = 10221082, upload-time = "2026-01-31T23:11:40.392Z" }, + { url = "https://files.pythonhosted.org/packages/25/a1/9510aa43555b44781968935c7548a8926274f815de42ad3997e9e83680dd/numpy-2.4.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5633c0da313330fd20c484c78cdd3f9b175b55e1a766c4a174230c6b70ad8262", size = 14815866, upload-time = "2026-01-31T23:11:42.495Z" }, + { url = "https://files.pythonhosted.org/packages/36/30/6bbb5e76631a5ae46e7923dd16ca9d3f1c93cfa8d4ed79a129814a9d8db3/numpy-2.4.2-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d9f64d786b3b1dd742c946c42d15b07497ed14af1a1f3ce840cce27daa0ce913", size = 5325631, upload-time = "2026-01-31T23:11:44.7Z" }, + { url = "https://files.pythonhosted.org/packages/46/00/3a490938800c1923b567b3a15cd17896e68052e2145d8662aaf3e1ffc58f/numpy-2.4.2-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:b21041e8cb6a1eb5312dd1d2f80a94d91efffb7a06b70597d44f1bd2dfc315ab", size = 6646254, upload-time = "2026-01-31T23:11:46.341Z" }, + { url = "https://files.pythonhosted.org/packages/d3/e9/fac0890149898a9b609caa5af7455a948b544746e4b8fe7c212c8edd71f8/numpy-2.4.2-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:00ab83c56211a1d7c07c25e3217ea6695e50a3e2f255053686b081dc0b091a82", size = 15720138, upload-time = "2026-01-31T23:11:48.082Z" }, + { url = "https://files.pythonhosted.org/packages/ea/5c/08887c54e68e1e28df53709f1893ce92932cc6f01f7c3d4dc952f61ffd4e/numpy-2.4.2-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2fb882da679409066b4603579619341c6d6898fc83a8995199d5249f986e8e8f", size = 16655398, upload-time = "2026-01-31T23:11:50.293Z" }, + { url = "https://files.pythonhosted.org/packages/4d/89/253db0fa0e66e9129c745e4ef25631dc37d5f1314dad2b53e907b8538e6d/numpy-2.4.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:66cb9422236317f9d44b67b4d18f44efe6e9c7f8794ac0462978513359461554", size = 17079064, upload-time = "2026-01-31T23:11:52.927Z" }, + { url = "https://files.pythonhosted.org/packages/2a/d5/cbade46ce97c59c6c3da525e8d95b7abe8a42974a1dc5c1d489c10433e88/numpy-2.4.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0f01dcf33e73d80bd8dc0f20a71303abbafa26a19e23f6b68d1aa9990af90257", size = 18379680, upload-time = "2026-01-31T23:11:55.22Z" }, + { url = "https://files.pythonhosted.org/packages/40/62/48f99ae172a4b63d981babe683685030e8a3df4f246c893ea5c6ef99f018/numpy-2.4.2-cp313-cp313t-win32.whl", hash = "sha256:52b913ec40ff7ae845687b0b34d8d93b60cb66dcee06996dd5c99f2fc9328657", size = 6082433, upload-time = "2026-01-31T23:11:58.096Z" }, + { url = "https://files.pythonhosted.org/packages/07/38/e054a61cfe48ad9f1ed0d188e78b7e26859d0b60ef21cd9de4897cdb5326/numpy-2.4.2-cp313-cp313t-win_amd64.whl", hash = "sha256:5eea80d908b2c1f91486eb95b3fb6fab187e569ec9752ab7d9333d2e66bf2d6b", size = 12451181, upload-time = "2026-01-31T23:11:59.782Z" }, + { url = "https://files.pythonhosted.org/packages/6e/a4/a05c3a6418575e185dd84d0b9680b6bb2e2dc3e4202f036b7b4e22d6e9dc/numpy-2.4.2-cp313-cp313t-win_arm64.whl", hash = "sha256:fd49860271d52127d61197bb50b64f58454e9f578cb4b2c001a6de8b1f50b0b1", size = 10290756, upload-time = "2026-01-31T23:12:02.438Z" }, + { url = "https://files.pythonhosted.org/packages/18/88/b7df6050bf18fdcfb7046286c6535cabbdd2064a3440fca3f069d319c16e/numpy-2.4.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:444be170853f1f9d528428eceb55f12918e4fda5d8805480f36a002f1415e09b", size = 16663092, upload-time = "2026-01-31T23:12:04.521Z" }, + { url = "https://files.pythonhosted.org/packages/25/7a/1fee4329abc705a469a4afe6e69b1ef7e915117747886327104a8493a955/numpy-2.4.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d1240d50adff70c2a88217698ca844723068533f3f5c5fa6ee2e3220e3bdb000", size = 14698770, upload-time = "2026-01-31T23:12:06.96Z" }, + { url = "https://files.pythonhosted.org/packages/fb/0b/f9e49ba6c923678ad5bc38181c08ac5e53b7a5754dbca8e581aa1a56b1ff/numpy-2.4.2-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:7cdde6de52fb6664b00b056341265441192d1291c130e99183ec0d4b110ff8b1", size = 5208562, upload-time = "2026-01-31T23:12:09.632Z" }, + { url = "https://files.pythonhosted.org/packages/7d/12/d7de8f6f53f9bb76997e5e4c069eda2051e3fe134e9181671c4391677bb2/numpy-2.4.2-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:cda077c2e5b780200b6b3e09d0b42205a3d1c68f30c6dceb90401c13bff8fe74", size = 6543710, upload-time = "2026-01-31T23:12:11.969Z" }, + { url = "https://files.pythonhosted.org/packages/09/63/c66418c2e0268a31a4cf8a8b512685748200f8e8e8ec6c507ce14e773529/numpy-2.4.2-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d30291931c915b2ab5717c2974bb95ee891a1cf22ebc16a8006bd59cd210d40a", size = 15677205, upload-time = "2026-01-31T23:12:14.33Z" }, + { url = "https://files.pythonhosted.org/packages/5d/6c/7f237821c9642fb2a04d2f1e88b4295677144ca93285fd76eff3bcba858d/numpy-2.4.2-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bba37bc29d4d85761deed3954a1bc62be7cf462b9510b51d367b769a8c8df325", size = 16611738, upload-time = "2026-01-31T23:12:16.525Z" }, + { url = "https://files.pythonhosted.org/packages/c2/a7/39c4cdda9f019b609b5c473899d87abff092fc908cfe4d1ecb2fcff453b0/numpy-2.4.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b2f0073ed0868db1dcd86e052d37279eef185b9c8db5bf61f30f46adac63c909", size = 17028888, upload-time = "2026-01-31T23:12:19.306Z" }, + { url = "https://files.pythonhosted.org/packages/da/b3/e84bb64bdfea967cc10950d71090ec2d84b49bc691df0025dddb7c26e8e3/numpy-2.4.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7f54844851cdb630ceb623dcec4db3240d1ac13d4990532446761baede94996a", size = 18339556, upload-time = "2026-01-31T23:12:21.816Z" }, + { url = "https://files.pythonhosted.org/packages/88/f5/954a291bc1192a27081706862ac62bb5920fbecfbaa302f64682aa90beed/numpy-2.4.2-cp314-cp314-win32.whl", hash = "sha256:12e26134a0331d8dbd9351620f037ec470b7c75929cb8a1537f6bfe411152a1a", size = 6006899, upload-time = "2026-01-31T23:12:24.14Z" }, + { url = "https://files.pythonhosted.org/packages/05/cb/eff72a91b2efdd1bc98b3b8759f6a1654aa87612fc86e3d87d6fe4f948c4/numpy-2.4.2-cp314-cp314-win_amd64.whl", hash = "sha256:068cdb2d0d644cdb45670810894f6a0600797a69c05f1ac478e8d31670b8ee75", size = 12443072, upload-time = "2026-01-31T23:12:26.33Z" }, + { url = "https://files.pythonhosted.org/packages/37/75/62726948db36a56428fce4ba80a115716dc4fad6a3a4352487f8bb950966/numpy-2.4.2-cp314-cp314-win_arm64.whl", hash = "sha256:6ed0be1ee58eef41231a5c943d7d1375f093142702d5723ca2eb07db9b934b05", size = 10494886, upload-time = "2026-01-31T23:12:28.488Z" }, + { url = "https://files.pythonhosted.org/packages/36/2f/ee93744f1e0661dc267e4b21940870cabfae187c092e1433b77b09b50ac4/numpy-2.4.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:98f16a80e917003a12c0580f97b5f875853ebc33e2eaa4bccfc8201ac6869308", size = 14818567, upload-time = "2026-01-31T23:12:30.709Z" }, + { url = "https://files.pythonhosted.org/packages/a7/24/6535212add7d76ff938d8bdc654f53f88d35cddedf807a599e180dcb8e66/numpy-2.4.2-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:20abd069b9cda45874498b245c8015b18ace6de8546bf50dfa8cea1696ed06ef", size = 5328372, upload-time = "2026-01-31T23:12:32.962Z" }, + { url = "https://files.pythonhosted.org/packages/5e/9d/c48f0a035725f925634bf6b8994253b43f2047f6778a54147d7e213bc5a7/numpy-2.4.2-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:e98c97502435b53741540a5717a6749ac2ada901056c7db951d33e11c885cc7d", size = 6649306, upload-time = "2026-01-31T23:12:34.797Z" }, + { url = "https://files.pythonhosted.org/packages/81/05/7c73a9574cd4a53a25907bad38b59ac83919c0ddc8234ec157f344d57d9a/numpy-2.4.2-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:da6cad4e82cb893db4b69105c604d805e0c3ce11501a55b5e9f9083b47d2ffe8", size = 15722394, upload-time = "2026-01-31T23:12:36.565Z" }, + { url = "https://files.pythonhosted.org/packages/35/fa/4de10089f21fc7d18442c4a767ab156b25c2a6eaf187c0db6d9ecdaeb43f/numpy-2.4.2-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e4424677ce4b47fe73c8b5556d876571f7c6945d264201180db2dc34f676ab5", size = 16653343, upload-time = "2026-01-31T23:12:39.188Z" }, + { url = "https://files.pythonhosted.org/packages/b8/f9/d33e4ffc857f3763a57aa85650f2e82486832d7492280ac21ba9efda80da/numpy-2.4.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:2b8f157c8a6f20eb657e240f8985cc135598b2b46985c5bccbde7616dc9c6b1e", size = 17078045, upload-time = "2026-01-31T23:12:42.041Z" }, + { url = "https://files.pythonhosted.org/packages/c8/b8/54bdb43b6225badbea6389fa038c4ef868c44f5890f95dd530a218706da3/numpy-2.4.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5daf6f3914a733336dab21a05cdec343144600e964d2fcdabaac0c0269874b2a", size = 18380024, upload-time = "2026-01-31T23:12:44.331Z" }, + { url = "https://files.pythonhosted.org/packages/a5/55/6e1a61ded7af8df04016d81b5b02daa59f2ea9252ee0397cb9f631efe9e5/numpy-2.4.2-cp314-cp314t-win32.whl", hash = "sha256:8c50dd1fc8826f5b26a5ee4d77ca55d88a895f4e4819c7ecc2a9f5905047a443", size = 6153937, upload-time = "2026-01-31T23:12:47.229Z" }, + { url = "https://files.pythonhosted.org/packages/45/aa/fa6118d1ed6d776b0983f3ceac9b1a5558e80df9365b1c3aa6d42bf9eee4/numpy-2.4.2-cp314-cp314t-win_amd64.whl", hash = "sha256:fcf92bee92742edd401ba41135185866f7026c502617f422eb432cfeca4fe236", size = 12631844, upload-time = "2026-01-31T23:12:48.997Z" }, + { url = "https://files.pythonhosted.org/packages/32/0a/2ec5deea6dcd158f254a7b372fb09cfba5719419c8d66343bab35237b3fb/numpy-2.4.2-cp314-cp314t-win_arm64.whl", hash = "sha256:1f92f53998a17265194018d1cc321b2e96e900ca52d54c7c77837b71b9465181", size = 10565379, upload-time = "2026-01-31T23:12:51.345Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f8/50e14d36d915ef64d8f8bc4a087fc8264d82c785eda6711f80ab7e620335/numpy-2.4.2-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:89f7268c009bc492f506abd6f5265defa7cb3f7487dc21d357c3d290add45082", size = 16833179, upload-time = "2026-01-31T23:12:53.5Z" }, + { url = "https://files.pythonhosted.org/packages/17/17/809b5cad63812058a8189e91a1e2d55a5a18fd04611dbad244e8aeae465c/numpy-2.4.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:e6dee3bb76aa4009d5a912180bf5b2de012532998d094acee25d9cb8dee3e44a", size = 14889755, upload-time = "2026-01-31T23:12:55.933Z" }, + { url = "https://files.pythonhosted.org/packages/3e/ea/181b9bcf7627fc8371720316c24db888dcb9829b1c0270abf3d288b2e29b/numpy-2.4.2-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:cd2bd2bbed13e213d6b55dc1d035a4f91748a7d3edc9480c13898b0353708920", size = 5399500, upload-time = "2026-01-31T23:12:58.671Z" }, + { url = "https://files.pythonhosted.org/packages/33/9f/413adf3fc955541ff5536b78fcf0754680b3c6d95103230252a2c9408d23/numpy-2.4.2-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:cf28c0c1d4c4bf00f509fa7eb02c58d7caf221b50b467bcb0d9bbf1584d5c821", size = 6714252, upload-time = "2026-01-31T23:13:00.518Z" }, + { url = "https://files.pythonhosted.org/packages/91/da/643aad274e29ccbdf42ecd94dafe524b81c87bcb56b83872d54827f10543/numpy-2.4.2-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e04ae107ac591763a47398bb45b568fc38f02dbc4aa44c063f67a131f99346cb", size = 15797142, upload-time = "2026-01-31T23:13:02.219Z" }, + { url = "https://files.pythonhosted.org/packages/66/27/965b8525e9cb5dc16481b30a1b3c21e50c7ebf6e9dbd48d0c4d0d5089c7e/numpy-2.4.2-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:602f65afdef699cda27ec0b9224ae5dc43e328f4c24c689deaf77133dbee74d0", size = 16727979, upload-time = "2026-01-31T23:13:04.62Z" }, + { url = "https://files.pythonhosted.org/packages/de/e5/b7d20451657664b07986c2f6e3be564433f5dcaf3482d68eaecd79afaf03/numpy-2.4.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:be71bf1edb48ebbbf7f6337b5bfd2f895d1902f6335a5830b20141fc126ffba0", size = 12502577, upload-time = "2026-01-31T23:13:07.08Z" }, +] + [[package]] name = "nvidia-cublas-cu12" version = "12.8.4.1" @@ -3974,7 +4069,8 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "coloredlogs" }, { name = "flatbuffers" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "packaging" }, { name = "protobuf" }, { name = "sympy" }, @@ -4028,7 +4124,8 @@ name = "opencv-python" version = "4.13.0.92" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/fc/6f/5a28fef4c4a382be06afe3938c64cc168223016fa520c5abaf37e8862aa5/opencv_python-4.13.0.92-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:caf60c071ec391ba51ed00a4a920f996d0b64e3e46068aac1f646b5de0326a19", size = 46247052, upload-time = "2026-02-05T07:01:25.046Z" }, @@ -4397,7 +4494,8 @@ dependencies = [ { name = "markdown" }, { name = "nltk" }, { name = "numba" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "onnxruntime" }, { name = "openai" }, { name = "pillow" }, @@ -4642,7 +4740,7 @@ docs = [ { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, { name = "sphinx-autodoc-typehints", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "sphinx-autodoc-typehints", version = "3.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.11.*'" }, - { name = "sphinx-autodoc-typehints", version = "3.6.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, + { name = "sphinx-autodoc-typehints", version = "3.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, { name = "sphinx-markdown-builder" }, { name = "sphinx-rtd-theme" }, { name = "toml" }, @@ -4691,7 +4789,7 @@ requires-dist = [ { name = "mlx-whisper", marker = "extra == 'mlx-whisper'", specifier = "~=0.4.2" }, { name = "nltk", specifier = ">=3.9.3,<4" }, { name = "noisereduce", marker = "extra == 'noisereduce'", specifier = "~=3.0.3" }, - { name = "numba", specifier = "==0.61.2" }, + { name = "numba", specifier = ">=0.61.2" }, { name = "numpy", specifier = ">=1.26.4,<3" }, { name = "nvidia-riva-client", marker = "extra == 'nvidia'", specifier = "~=2.21.1" }, { name = "onnxruntime", specifier = "~=1.23.2" }, @@ -4791,7 +4889,8 @@ name = "pipecat-ai-krisp" version = "0.4.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/1d/37/0f1d11d1dc33234a36de01992a9e5adc3c5e1dce71cc87b2bf909fa2f698/pipecat_ai_krisp-0.4.0.tar.gz", hash = "sha256:4f0e05e218dcf15874957e9851299e219c713a0aa8353d2fd811f1b54001a602", size = 13338, upload-time = "2025-06-09T16:13:08.209Z" } @@ -4855,7 +4954,7 @@ wheels = [ [[package]] name = "posthog" -version = "7.9.3" +version = "7.9.4" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "backoff" }, @@ -4865,9 +4964,9 @@ dependencies = [ { name = "six" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/7e/06/bcffcd262c861695fbaa74490b872e37d6fc41d3dcc1a43207d20525522f/posthog-7.9.3.tar.gz", hash = "sha256:55f7580265d290936ac4c112a4e2031a41743be4f90d4183ac9f85b721ff13ae", size = 172336, upload-time = "2026-02-18T22:20:24.085Z" } +sdist = { url = "https://files.pythonhosted.org/packages/55/50/5c0d9232118fdc1434c1b7bbc1a14de5b310498ede09a7e2123ae1f5f8bd/posthog-7.9.4.tar.gz", hash = "sha256:50acc94ef6267d7030575d2ff54e89e748fac2e98525ac672aeb0423160f77cf", size = 172973, upload-time = "2026-02-25T15:28:47.065Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/11/7e/0e06a96823fa7c11ce73920e6ff77e82445db62ac4eae0b6f211edb4c4c2/posthog-7.9.3-py3-none-any.whl", hash = "sha256:2ddcacdef6c4afb124ebfcf27d7be58388943a7e24f8d4a51a52732c9b90bad6", size = 197819, upload-time = "2026-02-18T22:20:22.015Z" }, + { url = "https://files.pythonhosted.org/packages/df/6f/794a4e94e3640282e75013ce18e65f0a01afc8d71f733664b4a272f98bce/posthog-7.9.4-py3-none-any.whl", hash = "sha256:414125ddd7a48b9c67feb24d723df1f666af41ad10f8a9a8bbaf5e3b536a2e26", size = 198651, upload-time = "2026-02-25T15:28:45.398Z" }, ] [[package]] @@ -5392,9 +5491,10 @@ version = "0.1.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "future" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "scipy", version = "1.17.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/75/b5/39d59c44ecd828fabfdbd796b50a561e6543ca90ef440ab307374f107856/pyloudnorm-0.1.1.tar.gz", hash = "sha256:63cd4e197dea4e7795160ea08ed02d318091bce883e436a6dbc5963326b71e1e", size = 8588, upload-time = "2023-01-05T16:11:28.601Z" } wheels = [ @@ -5462,7 +5562,8 @@ dependencies = [ { name = "audiolab" }, { name = "click" }, { name = "matplotlib" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "tqdm" }, ] wheels = [ @@ -5530,6 +5631,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, ] +[[package]] +name = "python-discovery" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "platformdirs" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/82/bb/93a3e83bdf9322c7e21cafd092e56a4a17c4d8ef4277b6eb01af1a540a6f/python_discovery-1.1.0.tar.gz", hash = "sha256:447941ba1aed8cc2ab7ee3cb91be5fc137c5bdbb05b7e6ea62fbdcb66e50b268", size = 55674, upload-time = "2026-02-26T09:42:49.668Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/06/54/82a6e2ef37f0f23dccac604b9585bdcbd0698604feb64807dcb72853693e/python_discovery-1.1.0-py3-none-any.whl", hash = "sha256:a162893b8809727f54594a99ad2179d2ede4bf953e12d4c7abc3cc9cdbd1437b", size = 30687, upload-time = "2026-02-26T09:42:48.548Z" }, +] + [[package]] name = "python-dotenv" version = "1.2.1" @@ -5680,20 +5794,21 @@ wheels = [ [[package]] name = "qdrant-client" -version = "1.16.1" +version = "1.17.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "grpcio" }, { name = "httpx", extra = ["http2"] }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "portalocker" }, { name = "protobuf" }, { name = "pydantic" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d9/68/fec3816a223c0b73b0e0036460be45c61ce2770ffb9197ac371e4f615ddc/qdrant_client-1.16.1.tar.gz", hash = "sha256:676c7c10fd4d4cb2981b8fcb32fd764f5f661b04b7334d024034d07212f971fd", size = 332130, upload-time = "2025-11-25T04:31:54.212Z" } +sdist = { url = "https://files.pythonhosted.org/packages/20/fb/c9c4cecf6e7fdff2dbaeee0de40e93fe495379eb5fe2775b184ea45315da/qdrant_client-1.17.0.tar.gz", hash = "sha256:47eb033edb9be33a4babb4d87b0d8d5eaf03d52112dca0218db7f2030bf41ba9", size = 344839, upload-time = "2026-02-19T16:03:17.069Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/60/e2/60a20d04b0595c641516463168909c5bbcc192d3d6eacb637c1677109c6a/qdrant_client-1.16.1-py3-none-any.whl", hash = "sha256:1eefe89f66e8a468ba0de1680e28b441e69825cfb62e8fb2e457c15e24ce5e3b", size = 378481, upload-time = "2025-11-25T04:31:52.629Z" }, + { url = "https://files.pythonhosted.org/packages/c1/15/dfadbc9d8c9872e8ac45fa96f5099bb2855f23426bfea1bbcdc85e64ef6e/qdrant_client-1.17.0-py3-none-any.whl", hash = "sha256:f5b452c68c42b3580d3d266446fb00d3c6e3aae89c916e16585b3c704e108438", size = 390381, upload-time = "2026-02-19T16:03:15.486Z" }, ] [[package]] @@ -5725,123 +5840,123 @@ wheels = [ [[package]] name = "regex" -version = "2026.2.19" +version = "2026.2.28" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ff/c0/d8079d4f6342e4cec5c3e7d7415b5cd3e633d5f4124f7a4626908dbe84c7/regex-2026.2.19.tar.gz", hash = "sha256:6fb8cb09b10e38f3ae17cc6dc04a1df77762bd0351b6ba9041438e7cc85ec310", size = 414973, upload-time = "2026-02-19T19:03:47.899Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8b/71/41455aa99a5a5ac1eaf311f5d8efd9ce6433c03ac1e0962de163350d0d97/regex-2026.2.28.tar.gz", hash = "sha256:a729e47d418ea11d03469f321aaf67cdee8954cde3ff2cf8403ab87951ad10f2", size = 415184, upload-time = "2026-02-28T02:19:42.792Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/af/de/f10b4506acfd684de4e42b0aa56ccea1a778a18864da8f6d319a40591062/regex-2026.2.19-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f5a37a17d110f9d5357a43aa7e3507cb077bf3143d1c549a45c4649e90e40a70", size = 488369, upload-time = "2026-02-19T18:59:45.01Z" }, - { url = "https://files.pythonhosted.org/packages/8b/2f/b4eaef1f0b4d0bf2a73eaf07c08f6c13422918a4180c9211ce0521746d0c/regex-2026.2.19-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:676c4e6847a83a1d5732b4ed553881ad36f0a8133627bb695a89ecf3571499d3", size = 290743, upload-time = "2026-02-19T18:59:48.527Z" }, - { url = "https://files.pythonhosted.org/packages/76/7c/805413bd0a88d04688c0725c222cfb811bd54a2f571004c24199a1ae55d6/regex-2026.2.19-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:82336faeecac33297cd42857c3b36f12b91810e3fdd276befdd128f73a2b43fa", size = 288652, upload-time = "2026-02-19T18:59:50.2Z" }, - { url = "https://files.pythonhosted.org/packages/08/ff/2c4cd530a878b1975398e76faef4285f11e7c9ccf1aaedfd528bfcc1f580/regex-2026.2.19-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:52136f5b71f095cb74b736cc3a1b578030dada2e361ef2f07ca582240b703946", size = 781759, upload-time = "2026-02-19T18:59:51.836Z" }, - { url = "https://files.pythonhosted.org/packages/37/45/9608ab1b41f6740ff4076eabadde8e8b3f3400942b348ac41e8599ccc131/regex-2026.2.19-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4192464fe3e6cb0ef6751f7d3b16f886d8270d359ed1590dd555539d364f0ff7", size = 850947, upload-time = "2026-02-19T18:59:53.739Z" }, - { url = "https://files.pythonhosted.org/packages/90/3a/66471b6c4f7cac17e14bf5300e46661bba2b17ffb0871bd2759e837a6f82/regex-2026.2.19-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e561dd47a85d2660d3d3af4e6cb2da825cf20f121e577147963f875b83d32786", size = 898794, upload-time = "2026-02-19T18:59:55.993Z" }, - { url = "https://files.pythonhosted.org/packages/c2/d2/38c53929a5931f7398e5e49f5a5a3079cb2aba30119b4350608364cfad8c/regex-2026.2.19-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00ec994d7824bf01cd6c7d14c7a6a04d9aeaf7c42a2bc22d2359d715634d539b", size = 791922, upload-time = "2026-02-19T18:59:58.216Z" }, - { url = "https://files.pythonhosted.org/packages/8b/bd/b046e065630fa25059d9c195b7b5308ea94da45eee65d40879772500f74c/regex-2026.2.19-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2cb00aabd96b345d56a8c2bc328c8d6c4d29935061e05078bf1f02302e12abf5", size = 783345, upload-time = "2026-02-19T18:59:59.948Z" }, - { url = "https://files.pythonhosted.org/packages/d4/8f/045c643d2fa255a985e8f87d848e4be230b711a8935e4bdc58e60b8f7b84/regex-2026.2.19-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f374366ed35673ea81b86a8859c457d4fae6ba092b71024857e9e237410c7404", size = 768055, upload-time = "2026-02-19T19:00:01.65Z" }, - { url = "https://files.pythonhosted.org/packages/72/9f/ab7ae9f5447559562f1a788bbc85c0e526528c5e6c20542d18e4afc86aad/regex-2026.2.19-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f9417fd853fcd00b7d55167e692966dd12d95ba1a88bf08a62002ccd85030790", size = 774955, upload-time = "2026-02-19T19:00:03.368Z" }, - { url = "https://files.pythonhosted.org/packages/37/5c/f16fc23c56f60b6f4ff194604a6e53bb8aec7b6e8e4a23a482dee8d77235/regex-2026.2.19-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:12e86a01594031abf892686fcb309b041bf3de3d13d99eb7e2b02a8f3c687df1", size = 846010, upload-time = "2026-02-19T19:00:05.079Z" }, - { url = "https://files.pythonhosted.org/packages/51/c8/6be4c854135d7c9f35d4deeafdaf124b039ecb4ffcaeb7ed0495ad2c97ca/regex-2026.2.19-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:79014115e6fdf18fd9b32e291d58181bf42d4298642beaa13fd73e69810e4cb6", size = 755938, upload-time = "2026-02-19T19:00:07.148Z" }, - { url = "https://files.pythonhosted.org/packages/d6/8d/f683d49b9663a5324b95a328e69d397f6dade7cb84154eec116bf79fe150/regex-2026.2.19-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:31aefac2506967b7dd69af2c58eca3cc8b086d4110b66d6ac6e9026f0ee5b697", size = 835773, upload-time = "2026-02-19T19:00:08.939Z" }, - { url = "https://files.pythonhosted.org/packages/16/cd/619224b90da09f167fe4497c350a0d0b30edc539ee9244bf93e604c073c3/regex-2026.2.19-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:49cef7bb2a491f91a8869c7cdd90babf0a417047ab0bf923cd038ed2eab2ccb8", size = 780075, upload-time = "2026-02-19T19:00:10.838Z" }, - { url = "https://files.pythonhosted.org/packages/5b/88/19cfb0c262d6f9d722edef29157125418bf90eb3508186bf79335afeedae/regex-2026.2.19-cp310-cp310-win32.whl", hash = "sha256:3a039474986e7a314ace6efb9ce52f5da2bdb80ac4955358723d350ec85c32ad", size = 266004, upload-time = "2026-02-19T19:00:12.371Z" }, - { url = "https://files.pythonhosted.org/packages/82/af/5b487e0287ef72545d7ae92edecdacbe3d44e531cac24fda7de5598ba8dd/regex-2026.2.19-cp310-cp310-win_amd64.whl", hash = "sha256:5b81ff4f9cad99f90c807a00c5882fbcda86d8b3edd94e709fb531fc52cb3d25", size = 277895, upload-time = "2026-02-19T19:00:13.75Z" }, - { url = "https://files.pythonhosted.org/packages/4c/19/b6715a187ffca4d2979af92a46ce922445ba41f910bf187ccd666a2d52ef/regex-2026.2.19-cp310-cp310-win_arm64.whl", hash = "sha256:a032bc01a4bc73fc3cadba793fce28eb420da39338f47910c59ffcc11a5ba5ef", size = 270465, upload-time = "2026-02-19T19:00:15.127Z" }, - { url = "https://files.pythonhosted.org/packages/6f/93/43f405a98f54cc59c786efb4fc0b644615ed2392fc89d57d30da11f35b5b/regex-2026.2.19-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:93b16a18cadb938f0f2306267161d57eb33081a861cee9ffcd71e60941eb5dfc", size = 488365, upload-time = "2026-02-19T19:00:17.857Z" }, - { url = "https://files.pythonhosted.org/packages/66/46/da0efce22cd8f5ae28eeb25ac69703f49edcad3331ac22440776f4ea0867/regex-2026.2.19-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:78af1e499cab704131f6f4e2f155b7f54ce396ca2acb6ef21a49507e4752e0be", size = 290737, upload-time = "2026-02-19T19:00:19.869Z" }, - { url = "https://files.pythonhosted.org/packages/fb/19/f735078448132c1c974974d30d5306337bc297fe6b6f126164bff72c1019/regex-2026.2.19-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:eb20c11aa4c3793c9ad04c19a972078cdadb261b8429380364be28e867a843f2", size = 288654, upload-time = "2026-02-19T19:00:21.307Z" }, - { url = "https://files.pythonhosted.org/packages/e2/3e/6d7c24a2f423c03ad03e3fbddefa431057186ac1c4cb4fa98b03c7f39808/regex-2026.2.19-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:db5fd91eec71e7b08de10011a2223d0faa20448d4e1380b9daa179fa7bf58906", size = 793785, upload-time = "2026-02-19T19:00:22.926Z" }, - { url = "https://files.pythonhosted.org/packages/67/32/fdb8107504b3122a79bde6705ac1f9d495ed1fe35b87d7cfc1864471999a/regex-2026.2.19-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fdbade8acba71bb45057c2b72f477f0b527c4895f9c83e6cfc30d4a006c21726", size = 860731, upload-time = "2026-02-19T19:00:25.196Z" }, - { url = "https://files.pythonhosted.org/packages/9a/fd/cc8c6f05868defd840be6e75919b1c3f462357969ac2c2a0958363b4dc23/regex-2026.2.19-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:31a5f561eb111d6aae14202e7043fb0b406d3c8dddbbb9e60851725c9b38ab1d", size = 907350, upload-time = "2026-02-19T19:00:27.093Z" }, - { url = "https://files.pythonhosted.org/packages/b5/1b/4590db9caa8db3d5a3fe31197c4e42c15aab3643b549ef6a454525fa3a61/regex-2026.2.19-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4584a3ee5f257b71e4b693cc9be3a5104249399f4116fe518c3f79b0c6fc7083", size = 800628, upload-time = "2026-02-19T19:00:29.392Z" }, - { url = "https://files.pythonhosted.org/packages/76/05/513eaa5b96fa579fd0b813e19ec047baaaf573d7374ff010fa139b384bf7/regex-2026.2.19-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:196553ba2a2f47904e5dc272d948a746352e2644005627467e055be19d73b39e", size = 773711, upload-time = "2026-02-19T19:00:30.996Z" }, - { url = "https://files.pythonhosted.org/packages/95/65/5aed06d8c54563d37fea496cf888be504879a3981a7c8e12c24b2c92c209/regex-2026.2.19-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0c10869d18abb759a3317c757746cc913d6324ce128b8bcec99350df10419f18", size = 783186, upload-time = "2026-02-19T19:00:34.598Z" }, - { url = "https://files.pythonhosted.org/packages/2c/57/79a633ad90f2371b4ef9cd72ba3a69a1a67d0cfaab4fe6fa8586d46044ef/regex-2026.2.19-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e689fed279cbe797a6b570bd18ff535b284d057202692c73420cb93cca41aa32", size = 854854, upload-time = "2026-02-19T19:00:37.306Z" }, - { url = "https://files.pythonhosted.org/packages/eb/2d/0f113d477d9e91ec4545ec36c82e58be25038d06788229c91ad52da2b7f5/regex-2026.2.19-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:0782bd983f19ac7594039c9277cd6f75c89598c1d72f417e4d30d874105eb0c7", size = 762279, upload-time = "2026-02-19T19:00:39.793Z" }, - { url = "https://files.pythonhosted.org/packages/39/cb/237e9fa4f61469fd4f037164dbe8e675a376c88cf73aaaa0aedfd305601c/regex-2026.2.19-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:dbb240c81cfed5d4a67cb86d7676d9f7ec9c3f186310bec37d8a1415210e111e", size = 846172, upload-time = "2026-02-19T19:00:42.134Z" }, - { url = "https://files.pythonhosted.org/packages/ac/7c/104779c5915cc4eb557a33590f8a3f68089269c64287dd769afd76c7ce61/regex-2026.2.19-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:80d31c3f1fe7e4c6cd1831cd4478a0609903044dfcdc4660abfe6fb307add7f0", size = 789078, upload-time = "2026-02-19T19:00:43.908Z" }, - { url = "https://files.pythonhosted.org/packages/a8/4a/eae4e88b1317fb2ff57794915e0099198f51e760f6280b320adfa0ad396d/regex-2026.2.19-cp311-cp311-win32.whl", hash = "sha256:66e6a43225ff1064f8926adbafe0922b370d381c3330edaf9891cade52daa790", size = 266013, upload-time = "2026-02-19T19:00:47.274Z" }, - { url = "https://files.pythonhosted.org/packages/f9/29/ba89eb8fae79705e07ad1bd69e568f776159d2a8093c9dbc5303ee618298/regex-2026.2.19-cp311-cp311-win_amd64.whl", hash = "sha256:59a7a5216485a1896c5800e9feb8ff9213e11967b482633b6195d7da11450013", size = 277906, upload-time = "2026-02-19T19:00:49.011Z" }, - { url = "https://files.pythonhosted.org/packages/e3/1a/042d8f04b28e318df92df69d8becb0f42221eb3dd4fe5e976522f4337c76/regex-2026.2.19-cp311-cp311-win_arm64.whl", hash = "sha256:ec661807ffc14c8d14bb0b8c1bb3d5906e476bc96f98b565b709d03962ee4dd4", size = 270463, upload-time = "2026-02-19T19:00:50.988Z" }, - { url = "https://files.pythonhosted.org/packages/b3/73/13b39c7c9356f333e564ab4790b6cb0df125b8e64e8d6474e73da49b1955/regex-2026.2.19-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:c1665138776e4ac1aa75146669236f7a8a696433ec4e525abf092ca9189247cc", size = 489541, upload-time = "2026-02-19T19:00:52.728Z" }, - { url = "https://files.pythonhosted.org/packages/15/77/fcc7bd9a67000d07fbcc11ed226077287a40d5c84544e62171d29d3ef59c/regex-2026.2.19-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d792b84709021945597e05656aac059526df4e0c9ef60a0eaebb306f8fafcaa8", size = 291414, upload-time = "2026-02-19T19:00:54.51Z" }, - { url = "https://files.pythonhosted.org/packages/f9/87/3997fc72dc59233426ef2e18dfdd105bb123812fff740ee9cc348f1a3243/regex-2026.2.19-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:db970bcce4d63b37b3f9eb8c893f0db980bbf1d404a1d8d2b17aa8189de92c53", size = 289140, upload-time = "2026-02-19T19:00:56.841Z" }, - { url = "https://files.pythonhosted.org/packages/f3/d0/b7dd3883ed1cff8ee0c0c9462d828aaf12be63bf5dc55453cbf423523b13/regex-2026.2.19-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:03d706fbe7dfec503c8c3cb76f9352b3e3b53b623672aa49f18a251a6c71b8e6", size = 798767, upload-time = "2026-02-19T19:00:59.014Z" }, - { url = "https://files.pythonhosted.org/packages/4a/7e/8e2d09103832891b2b735a2515abf377db21144c6dd5ede1fb03c619bf09/regex-2026.2.19-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8dbff048c042beef60aa1848961384572c5afb9e8b290b0f1203a5c42cf5af65", size = 864436, upload-time = "2026-02-19T19:01:00.772Z" }, - { url = "https://files.pythonhosted.org/packages/8a/2e/afea8d23a6db1f67f45e3a0da3057104ce32e154f57dd0c8997274d45fcd/regex-2026.2.19-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ccaaf9b907ea6b4223d5cbf5fa5dff5f33dc66f4907a25b967b8a81339a6e332", size = 912391, upload-time = "2026-02-19T19:01:02.865Z" }, - { url = "https://files.pythonhosted.org/packages/59/3c/ea5a4687adaba5e125b9bd6190153d0037325a0ba3757cc1537cc2c8dd90/regex-2026.2.19-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:75472631eee7898e16a8a20998d15106cb31cfde21cdf96ab40b432a7082af06", size = 803702, upload-time = "2026-02-19T19:01:05.298Z" }, - { url = "https://files.pythonhosted.org/packages/dc/c5/624a0705e8473a26488ec1a3a4e0b8763ecfc682a185c302dfec71daea35/regex-2026.2.19-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d89f85a5ccc0cec125c24be75610d433d65295827ebaf0d884cbe56df82d4774", size = 775980, upload-time = "2026-02-19T19:01:07.047Z" }, - { url = "https://files.pythonhosted.org/packages/4d/4b/ed776642533232b5599b7c1f9d817fe11faf597e8a92b7a44b841daaae76/regex-2026.2.19-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0d9f81806abdca3234c3dd582b8a97492e93de3602c8772013cb4affa12d1668", size = 788122, upload-time = "2026-02-19T19:01:08.744Z" }, - { url = "https://files.pythonhosted.org/packages/8c/58/e93e093921d13b9784b4f69896b6e2a9e09580a265c59d9eb95e87d288f2/regex-2026.2.19-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:9dadc10d1c2bbb1326e572a226d2ec56474ab8aab26fdb8cf19419b372c349a9", size = 858910, upload-time = "2026-02-19T19:01:10.488Z" }, - { url = "https://files.pythonhosted.org/packages/85/77/ff1d25a0c56cd546e0455cbc93235beb33474899690e6a361fa6b52d265b/regex-2026.2.19-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:6bc25d7e15f80c9dc7853cbb490b91c1ec7310808b09d56bd278fe03d776f4f6", size = 764153, upload-time = "2026-02-19T19:01:12.156Z" }, - { url = "https://files.pythonhosted.org/packages/cd/ef/8ec58df26d52d04443b1dc56f9be4b409f43ed5ae6c0248a287f52311fc4/regex-2026.2.19-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:965d59792f5037d9138da6fed50ba943162160443b43d4895b182551805aff9c", size = 850348, upload-time = "2026-02-19T19:01:14.147Z" }, - { url = "https://files.pythonhosted.org/packages/f5/b3/c42fd5ed91639ce5a4225b9df909180fc95586db071f2bf7c68d2ccbfbe6/regex-2026.2.19-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:38d88c6ed4a09ed61403dbdf515d969ccba34669af3961ceb7311ecd0cef504a", size = 789977, upload-time = "2026-02-19T19:01:15.838Z" }, - { url = "https://files.pythonhosted.org/packages/b6/22/bc3b58ebddbfd6ca5633e71fd41829ee931963aad1ebeec55aad0c23044e/regex-2026.2.19-cp312-cp312-win32.whl", hash = "sha256:5df947cabab4b643d4791af5e28aecf6bf62e6160e525651a12eba3d03755e6b", size = 266381, upload-time = "2026-02-19T19:01:17.952Z" }, - { url = "https://files.pythonhosted.org/packages/fc/4a/6ff550b63e67603ee60e69dc6bd2d5694e85046a558f663b2434bdaeb285/regex-2026.2.19-cp312-cp312-win_amd64.whl", hash = "sha256:4146dc576ea99634ae9c15587d0c43273b4023a10702998edf0fa68ccb60237a", size = 277274, upload-time = "2026-02-19T19:01:19.826Z" }, - { url = "https://files.pythonhosted.org/packages/cc/29/9ec48b679b1e87e7bc8517dff45351eab38f74fbbda1fbcf0e9e6d4e8174/regex-2026.2.19-cp312-cp312-win_arm64.whl", hash = "sha256:cdc0a80f679353bd68450d2a42996090c30b2e15ca90ded6156c31f1a3b63f3b", size = 270509, upload-time = "2026-02-19T19:01:22.075Z" }, - { url = "https://files.pythonhosted.org/packages/d2/2d/a849835e76ac88fcf9e8784e642d3ea635d183c4112150ca91499d6703af/regex-2026.2.19-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8df08decd339e8b3f6a2eb5c05c687fe9d963ae91f352bc57beb05f5b2ac6879", size = 489329, upload-time = "2026-02-19T19:01:23.841Z" }, - { url = "https://files.pythonhosted.org/packages/da/aa/78ff4666d3855490bae87845a5983485e765e1f970da20adffa2937b241d/regex-2026.2.19-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3aa0944f1dc6e92f91f3b306ba7f851e1009398c84bfd370633182ee4fc26a64", size = 291308, upload-time = "2026-02-19T19:01:25.605Z" }, - { url = "https://files.pythonhosted.org/packages/cd/58/714384efcc07ae6beba528a541f6e99188c5cc1bc0295337f4e8a868296d/regex-2026.2.19-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c13228fbecb03eadbfd8f521732c5fda09ef761af02e920a3148e18ad0e09968", size = 289033, upload-time = "2026-02-19T19:01:27.243Z" }, - { url = "https://files.pythonhosted.org/packages/75/ec/6438a9344d2869cf5265236a06af1ca6d885e5848b6561e10629bc8e5a11/regex-2026.2.19-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0d0e72703c60d68b18b27cde7cdb65ed2570ae29fb37231aa3076bfb6b1d1c13", size = 798798, upload-time = "2026-02-19T19:01:28.877Z" }, - { url = "https://files.pythonhosted.org/packages/c2/be/b1ce2d395e3fd2ce5f2fde2522f76cade4297cfe84cd61990ff48308749c/regex-2026.2.19-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:46e69a4bf552e30e74a8aa73f473c87efcb7f6e8c8ece60d9fd7bf13d5c86f02", size = 864444, upload-time = "2026-02-19T19:01:30.933Z" }, - { url = "https://files.pythonhosted.org/packages/d5/97/a3406460c504f7136f140d9461960c25f058b0240e4424d6fb73c7a067ab/regex-2026.2.19-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8edda06079bd770f7f0cf7f3bba1a0b447b96b4a543c91fe0c142d034c166161", size = 912633, upload-time = "2026-02-19T19:01:32.744Z" }, - { url = "https://files.pythonhosted.org/packages/8b/d9/e5dbef95008d84e9af1dc0faabbc34a7fbc8daa05bc5807c5cf86c2bec49/regex-2026.2.19-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9cbc69eae834afbf634f7c902fc72ff3e993f1c699156dd1af1adab5d06b7fe7", size = 803718, upload-time = "2026-02-19T19:01:34.61Z" }, - { url = "https://files.pythonhosted.org/packages/2f/e5/61d80132690a1ef8dc48e0f44248036877aebf94235d43f63a20d1598888/regex-2026.2.19-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bcf57d30659996ee5c7937999874504c11b5a068edc9515e6a59221cc2744dd1", size = 775975, upload-time = "2026-02-19T19:01:36.525Z" }, - { url = "https://files.pythonhosted.org/packages/05/32/ae828b3b312c972cf228b634447de27237d593d61505e6ad84723f8eabba/regex-2026.2.19-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8e6e77cd92216eb489e21e5652a11b186afe9bdefca8a2db739fd6b205a9e0a4", size = 788129, upload-time = "2026-02-19T19:01:38.498Z" }, - { url = "https://files.pythonhosted.org/packages/cb/25/d74f34676f22bec401eddf0e5e457296941e10cbb2a49a571ca7a2c16e5a/regex-2026.2.19-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b9ab8dec42afefa6314ea9b31b188259ffdd93f433d77cad454cd0b8d235ce1c", size = 858818, upload-time = "2026-02-19T19:01:40.409Z" }, - { url = "https://files.pythonhosted.org/packages/1e/eb/0bc2b01a6b0b264e1406e5ef11cae3f634c3bd1a6e61206fd3227ce8e89c/regex-2026.2.19-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:294c0fb2e87c6bcc5f577c8f609210f5700b993151913352ed6c6af42f30f95f", size = 764186, upload-time = "2026-02-19T19:01:43.009Z" }, - { url = "https://files.pythonhosted.org/packages/eb/37/5fe5a630d0d99ecf0c3570f8905dafbc160443a2d80181607770086c9812/regex-2026.2.19-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:c0924c64b082d4512b923ac016d6e1dcf647a3560b8a4c7e55cbbd13656cb4ed", size = 850363, upload-time = "2026-02-19T19:01:45.015Z" }, - { url = "https://files.pythonhosted.org/packages/c3/45/ef68d805294b01ec030cfd388724ba76a5a21a67f32af05b17924520cb0b/regex-2026.2.19-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:790dbf87b0361606cb0d79b393c3e8f4436a14ee56568a7463014565d97da02a", size = 790026, upload-time = "2026-02-19T19:01:47.51Z" }, - { url = "https://files.pythonhosted.org/packages/d6/3a/40d3b66923dfc5aeba182f194f0ca35d09afe8c031a193e6ae46971a0a0e/regex-2026.2.19-cp313-cp313-win32.whl", hash = "sha256:43cdde87006271be6963896ed816733b10967baaf0e271d529c82e93da66675b", size = 266372, upload-time = "2026-02-19T19:01:49.469Z" }, - { url = "https://files.pythonhosted.org/packages/3d/f2/39082e8739bfd553497689e74f9d5e5bb531d6f8936d0b94f43e18f219c0/regex-2026.2.19-cp313-cp313-win_amd64.whl", hash = "sha256:127ea69273485348a126ebbf3d6052604d3c7da284f797bba781f364c0947d47", size = 277253, upload-time = "2026-02-19T19:01:51.208Z" }, - { url = "https://files.pythonhosted.org/packages/c2/c2/852b9600d53fb47e47080c203e2cdc0ac7e84e37032a57e0eaa37446033a/regex-2026.2.19-cp313-cp313-win_arm64.whl", hash = "sha256:5e56c669535ac59cbf96ca1ece0ef26cb66809990cda4fa45e1e32c3b146599e", size = 270505, upload-time = "2026-02-19T19:01:52.865Z" }, - { url = "https://files.pythonhosted.org/packages/a9/a2/e0b4575b93bc84db3b1fab24183e008691cd2db5c0ef14ed52681fbd94dd/regex-2026.2.19-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:93d881cab5afdc41a005dba1524a40947d6f7a525057aa64aaf16065cf62faa9", size = 492202, upload-time = "2026-02-19T19:01:54.816Z" }, - { url = "https://files.pythonhosted.org/packages/24/b5/b84fec8cbb5f92a7eed2b6b5353a6a9eed9670fee31817c2da9eb85dc797/regex-2026.2.19-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:80caaa1ddcc942ec7be18427354f9d58a79cee82dea2a6b3d4fd83302e1240d7", size = 292884, upload-time = "2026-02-19T19:01:58.254Z" }, - { url = "https://files.pythonhosted.org/packages/70/0c/fe89966dfae43da46f475362401f03e4d7dc3a3c955b54f632abc52669e0/regex-2026.2.19-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d793c5b4d2b4c668524cd1651404cfc798d40694c759aec997e196fe9729ec60", size = 291236, upload-time = "2026-02-19T19:01:59.966Z" }, - { url = "https://files.pythonhosted.org/packages/f2/f7/bda2695134f3e63eb5cccbbf608c2a12aab93d261ff4e2fe49b47fabc948/regex-2026.2.19-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5100acb20648d9efd3f4e7e91f51187f95f22a741dcd719548a6cf4e1b34b3f", size = 807660, upload-time = "2026-02-19T19:02:01.632Z" }, - { url = "https://files.pythonhosted.org/packages/11/56/6e3a4bf5e60d17326b7003d91bbde8938e439256dec211d835597a44972d/regex-2026.2.19-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5e3a31e94d10e52a896adaa3adf3621bd526ad2b45b8c2d23d1bbe74c7423007", size = 873585, upload-time = "2026-02-19T19:02:03.522Z" }, - { url = "https://files.pythonhosted.org/packages/35/5e/c90c6aa4d1317cc11839359479cfdd2662608f339e84e81ba751c8a4e461/regex-2026.2.19-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8497421099b981f67c99eba4154cf0dfd8e47159431427a11cfb6487f7791d9e", size = 915243, upload-time = "2026-02-19T19:02:05.608Z" }, - { url = "https://files.pythonhosted.org/packages/90/7c/981ea0694116793001496aaf9524e5c99e122ec3952d9e7f1878af3a6bf1/regex-2026.2.19-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1e7a08622f7d51d7a068f7e4052a38739c412a3e74f55817073d2e2418149619", size = 812922, upload-time = "2026-02-19T19:02:08.115Z" }, - { url = "https://files.pythonhosted.org/packages/2d/be/9eda82afa425370ffdb3fa9f3ea42450b9ae4da3ff0a4ec20466f69e371b/regex-2026.2.19-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8abe671cf0f15c26b1ad389bf4043b068ce7d3b1c5d9313e12895f57d6738555", size = 781318, upload-time = "2026-02-19T19:02:10.072Z" }, - { url = "https://files.pythonhosted.org/packages/c6/d5/50f0bbe56a8199f60a7b6c714e06e54b76b33d31806a69d0703b23ce2a9e/regex-2026.2.19-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5a8f28dd32a4ce9c41758d43b5b9115c1c497b4b1f50c457602c1d571fa98ce1", size = 795649, upload-time = "2026-02-19T19:02:11.96Z" }, - { url = "https://files.pythonhosted.org/packages/c5/09/d039f081e44a8b0134d0bb2dd805b0ddf390b69d0b58297ae098847c572f/regex-2026.2.19-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:654dc41a5ba9b8cc8432b3f1aa8906d8b45f3e9502442a07c2f27f6c63f85db5", size = 868844, upload-time = "2026-02-19T19:02:14.043Z" }, - { url = "https://files.pythonhosted.org/packages/ef/53/e2903b79a19ec8557fe7cd21cd093956ff2dbc2e0e33969e3adbe5b184dd/regex-2026.2.19-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:4a02faea614e7fdd6ba8b3bec6c8e79529d356b100381cec76e638f45d12ca04", size = 770113, upload-time = "2026-02-19T19:02:16.161Z" }, - { url = "https://files.pythonhosted.org/packages/8f/e2/784667767b55714ebb4e59bf106362327476b882c0b2f93c25e84cc99b1a/regex-2026.2.19-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:d96162140bb819814428800934c7b71b7bffe81fb6da2d6abc1dcca31741eca3", size = 854922, upload-time = "2026-02-19T19:02:18.155Z" }, - { url = "https://files.pythonhosted.org/packages/59/78/9ef4356bd4aed752775bd18071034979b85f035fec51f3a4f9dea497a254/regex-2026.2.19-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c227f2922153ee42bbeb355fd6d009f8c81d9d7bdd666e2276ce41f53ed9a743", size = 799636, upload-time = "2026-02-19T19:02:20.04Z" }, - { url = "https://files.pythonhosted.org/packages/cf/54/fcfc9287f20c5c9bd8db755aafe3e8cf4d99a6a3f1c7162ee182e0ca9374/regex-2026.2.19-cp313-cp313t-win32.whl", hash = "sha256:a178df8ec03011153fbcd2c70cb961bc98cbbd9694b28f706c318bee8927c3db", size = 268968, upload-time = "2026-02-19T19:02:22.816Z" }, - { url = "https://files.pythonhosted.org/packages/1e/a0/ff24c6cb1273e42472706d277147fc38e1f9074a280fb6034b0fc9b69415/regex-2026.2.19-cp313-cp313t-win_amd64.whl", hash = "sha256:2c1693ca6f444d554aa246b592355b5cec030ace5a2729eae1b04ab6e853e768", size = 280390, upload-time = "2026-02-19T19:02:25.231Z" }, - { url = "https://files.pythonhosted.org/packages/1a/b6/a3f6ad89d780ffdeebb4d5e2e3e30bd2ef1f70f6a94d1760e03dd1e12c60/regex-2026.2.19-cp313-cp313t-win_arm64.whl", hash = "sha256:c0761d7ae8d65773e01515ebb0b304df1bf37a0a79546caad9cbe79a42c12af7", size = 271643, upload-time = "2026-02-19T19:02:27.175Z" }, - { url = "https://files.pythonhosted.org/packages/2d/e2/7ad4e76a6dddefc0d64dbe12a4d3ca3947a19ddc501f864a5df2a8222ddd/regex-2026.2.19-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:03d191a9bcf94d31af56d2575210cb0d0c6a054dbcad2ea9e00aa4c42903b919", size = 489306, upload-time = "2026-02-19T19:02:29.058Z" }, - { url = "https://files.pythonhosted.org/packages/14/95/ee1736135733afbcf1846c58671046f99c4d5170102a150ebb3dd8d701d9/regex-2026.2.19-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:516ee067c6c721d0d0bfb80a2004edbd060fffd07e456d4e1669e38fe82f922e", size = 291218, upload-time = "2026-02-19T19:02:31.083Z" }, - { url = "https://files.pythonhosted.org/packages/ef/08/180d1826c3d7065200a5168c6b993a44947395c7bb6e04b2c2a219c34225/regex-2026.2.19-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:997862c619994c4a356cb7c3592502cbd50c2ab98da5f61c5c871f10f22de7e5", size = 289097, upload-time = "2026-02-19T19:02:33.485Z" }, - { url = "https://files.pythonhosted.org/packages/28/93/0651924c390c5740f5f896723f8ddd946a6c63083a7d8647231c343912ff/regex-2026.2.19-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02b9e1b8a7ebe2807cd7bbdf662510c8e43053a23262b9f46ad4fc2dfc9d204e", size = 799147, upload-time = "2026-02-19T19:02:35.669Z" }, - { url = "https://files.pythonhosted.org/packages/a7/00/2078bd8bcd37d58a756989adbfd9f1d0151b7ca4085a9c2a07e917fbac61/regex-2026.2.19-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6c8fb3b19652e425ff24169dad3ee07f99afa7996caa9dfbb3a9106cd726f49a", size = 865239, upload-time = "2026-02-19T19:02:38.012Z" }, - { url = "https://files.pythonhosted.org/packages/2a/13/75195161ec16936b35a365fa8c1dd2ab29fd910dd2587765062b174d8cfc/regex-2026.2.19-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:50f1ee9488dd7a9fda850ec7c68cad7a32fa49fd19733f5403a3f92b451dcf73", size = 911904, upload-time = "2026-02-19T19:02:40.737Z" }, - { url = "https://files.pythonhosted.org/packages/96/72/ac42f6012179343d1c4bd0ffee8c948d841cb32ea188d37e96d80527fcc9/regex-2026.2.19-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ab780092b1424d13200aa5a62996e95f65ee3db8509be366437439cdc0af1a9f", size = 803518, upload-time = "2026-02-19T19:02:42.923Z" }, - { url = "https://files.pythonhosted.org/packages/bc/d1/75a08e2269b007b9783f0f86aa64488e023141219cb5f14dc1e69cda56c6/regex-2026.2.19-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:17648e1a88e72d88641b12635e70e6c71c5136ba14edba29bf8fc6834005a265", size = 775866, upload-time = "2026-02-19T19:02:45.189Z" }, - { url = "https://files.pythonhosted.org/packages/92/41/70e7d05faf6994c2ca7a9fcaa536da8f8e4031d45b0ec04b57040ede201f/regex-2026.2.19-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2f914ae8c804c8a8a562fe216100bc156bfb51338c1f8d55fe32cf407774359a", size = 788224, upload-time = "2026-02-19T19:02:47.804Z" }, - { url = "https://files.pythonhosted.org/packages/c8/83/34a2dd601f9deb13c20545c674a55f4a05c90869ab73d985b74d639bac43/regex-2026.2.19-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c7e121a918bbee3f12ac300ce0a0d2f2c979cf208fb071ed8df5a6323281915c", size = 859682, upload-time = "2026-02-19T19:02:50.583Z" }, - { url = "https://files.pythonhosted.org/packages/8e/30/136db9a09a7f222d6e48b806f3730e7af6499a8cad9c72ac0d49d52c746e/regex-2026.2.19-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2fedd459c791da24914ecc474feecd94cf7845efb262ac3134fe27cbd7eda799", size = 764223, upload-time = "2026-02-19T19:02:52.777Z" }, - { url = "https://files.pythonhosted.org/packages/9e/ea/bb947743c78a16df481fa0635c50aa1a439bb80b0e6dc24cd4e49c716679/regex-2026.2.19-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:ea8dfc99689240e61fb21b5fc2828f68b90abf7777d057b62d3166b7c1543c4c", size = 850101, upload-time = "2026-02-19T19:02:55.87Z" }, - { url = "https://files.pythonhosted.org/packages/25/27/e3bfe6e97a99f7393665926be02fef772da7f8aa59e50bc3134e4262a032/regex-2026.2.19-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9fff45852160960f29e184ec8a5be5ab4063cfd0b168d439d1fc4ac3744bf29e", size = 789904, upload-time = "2026-02-19T19:02:58.523Z" }, - { url = "https://files.pythonhosted.org/packages/84/7b/7e2be6f00cea59d08761b027ad237002e90cac74b1607200ebaa2ba3d586/regex-2026.2.19-cp314-cp314-win32.whl", hash = "sha256:5390b130cce14a7d1db226a3896273b7b35be10af35e69f1cca843b6e5d2bb2d", size = 271784, upload-time = "2026-02-19T19:03:00.418Z" }, - { url = "https://files.pythonhosted.org/packages/f7/f6/639911530335773e7ec60bcaa519557b719586024c1d7eaad1daf87b646b/regex-2026.2.19-cp314-cp314-win_amd64.whl", hash = "sha256:e581f75d5c0b15669139ca1c2d3e23a65bb90e3c06ba9d9ea194c377c726a904", size = 280506, upload-time = "2026-02-19T19:03:02.302Z" }, - { url = "https://files.pythonhosted.org/packages/cd/ec/2582b56b4e036d46bb9b5d74a18548439ffa16c11cf59076419174d80f48/regex-2026.2.19-cp314-cp314-win_arm64.whl", hash = "sha256:7187fdee1be0896c1499a991e9bf7c78e4b56b7863e7405d7bb687888ac10c4b", size = 273557, upload-time = "2026-02-19T19:03:04.836Z" }, - { url = "https://files.pythonhosted.org/packages/49/0b/f901cfeb4efd83e4f5c3e9f91a6de77e8e5ceb18555698aca3a27e215ed3/regex-2026.2.19-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:5ec1d7c080832fdd4e150c6f5621fe674c70c63b3ae5a4454cebd7796263b175", size = 492196, upload-time = "2026-02-19T19:03:08.188Z" }, - { url = "https://files.pythonhosted.org/packages/94/0a/349b959e3da874e15eda853755567b4cde7e5309dbb1e07bfe910cfde452/regex-2026.2.19-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:8457c1bc10ee9b29cdfd897ccda41dce6bde0e9abd514bcfef7bcd05e254d411", size = 292878, upload-time = "2026-02-19T19:03:10.272Z" }, - { url = "https://files.pythonhosted.org/packages/98/b0/9d81b3c2c5ddff428f8c506713737278979a2c476f6e3675a9c51da0c389/regex-2026.2.19-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cce8027010d1ffa3eb89a0b19621cdc78ae548ea2b49fea1f7bfb3ea77064c2b", size = 291235, upload-time = "2026-02-19T19:03:12.5Z" }, - { url = "https://files.pythonhosted.org/packages/04/e7/be7818df8691dbe9508c381ea2cc4c1153e4fdb1c4b06388abeaa93bd712/regex-2026.2.19-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:11c138febb40546ff9e026dbbc41dc9fb8b29e61013fa5848ccfe045f5b23b83", size = 807893, upload-time = "2026-02-19T19:03:15.064Z" }, - { url = "https://files.pythonhosted.org/packages/0c/b6/b898a8b983190cfa0276031c17beb73cfd1db07c03c8c37f606d80b655e2/regex-2026.2.19-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:74ff212aa61532246bb3036b3dfea62233414b0154b8bc3676975da78383cac3", size = 873696, upload-time = "2026-02-19T19:03:17.848Z" }, - { url = "https://files.pythonhosted.org/packages/1a/98/126ba671d54f19080ec87cad228fb4f3cc387fff8c4a01cb4e93f4ff9d94/regex-2026.2.19-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d00c95a2b6bfeb3ea1cb68d1751b1dfce2b05adc2a72c488d77a780db06ab867", size = 915493, upload-time = "2026-02-19T19:03:20.343Z" }, - { url = "https://files.pythonhosted.org/packages/b2/10/550c84a1a1a7371867fe8be2bea7df55e797cbca4709974811410e195c5d/regex-2026.2.19-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:311fcccb76af31be4c588d5a17f8f1a059ae8f4b097192896ebffc95612f223a", size = 813094, upload-time = "2026-02-19T19:03:23.287Z" }, - { url = "https://files.pythonhosted.org/packages/29/fb/ba221d2fc76a27b6b7d7a60f73a7a6a7bac21c6ba95616a08be2bcb434b0/regex-2026.2.19-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:77cfd6b5e7c4e8bf7a39d243ea05882acf5e3c7002b0ef4756de6606893b0ecd", size = 781583, upload-time = "2026-02-19T19:03:26.872Z" }, - { url = "https://files.pythonhosted.org/packages/26/f1/af79231301297c9e962679efc04a31361b58dc62dec1fc0cb4b8dd95956a/regex-2026.2.19-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6380f29ff212ec922b6efb56100c089251940e0526a0d05aa7c2d9b571ddf2fe", size = 795875, upload-time = "2026-02-19T19:03:29.223Z" }, - { url = "https://files.pythonhosted.org/packages/a0/90/1e1d76cb0a2d0a4f38a039993e1c5cd971ae50435d751c5bae4f10e1c302/regex-2026.2.19-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:655f553a1fa3ab8a7fd570eca793408b8d26a80bfd89ed24d116baaf13a38969", size = 868916, upload-time = "2026-02-19T19:03:31.415Z" }, - { url = "https://files.pythonhosted.org/packages/9a/67/a1c01da76dbcfed690855a284c665cc0a370e7d02d1bd635cf9ff7dd74b8/regex-2026.2.19-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:015088b8558502f1f0bccd58754835aa154a7a5b0bd9d4c9b7b96ff4ae9ba876", size = 770386, upload-time = "2026-02-19T19:03:33.972Z" }, - { url = "https://files.pythonhosted.org/packages/49/6f/94842bf294f432ff3836bfd91032e2ecabea6d284227f12d1f935318c9c4/regex-2026.2.19-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:9e6693b8567a59459b5dda19104c4a4dbbd4a1c78833eacc758796f2cfef1854", size = 855007, upload-time = "2026-02-19T19:03:36.238Z" }, - { url = "https://files.pythonhosted.org/packages/ff/93/393cd203ca0d1d368f05ce12d2c7e91a324bc93c240db2e6d5ada05835f4/regex-2026.2.19-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4071209fd4376ab5ceec72ad3507e9d3517c59e38a889079b98916477a871868", size = 799863, upload-time = "2026-02-19T19:03:38.497Z" }, - { url = "https://files.pythonhosted.org/packages/43/d9/35afda99bd92bf1a5831e55a4936d37ea4bed6e34c176a3c2238317faf4f/regex-2026.2.19-cp314-cp314t-win32.whl", hash = "sha256:2905ff4a97fad42f2d0834d8b1ea3c2f856ec209837e458d71a061a7d05f9f01", size = 274742, upload-time = "2026-02-19T19:03:40.804Z" }, - { url = "https://files.pythonhosted.org/packages/ae/42/7edc3344dcc87b698e9755f7f685d463852d481302539dae07135202d3ca/regex-2026.2.19-cp314-cp314t-win_amd64.whl", hash = "sha256:64128549b600987e0f335c2365879895f860a9161f283b14207c800a6ed623d3", size = 284443, upload-time = "2026-02-19T19:03:42.954Z" }, - { url = "https://files.pythonhosted.org/packages/3a/45/affdf2d851b42adf3d13fc5b3b059372e9bd299371fd84cf5723c45871fa/regex-2026.2.19-cp314-cp314t-win_arm64.whl", hash = "sha256:a09ae430e94c049dc6957f6baa35ee3418a3a77f3c12b6e02883bd80a2b679b0", size = 274932, upload-time = "2026-02-19T19:03:45.488Z" }, + { url = "https://files.pythonhosted.org/packages/70/b8/845a927e078f5e5cc55d29f57becbfde0003d52806544531ab3f2da4503c/regex-2026.2.28-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:fc48c500838be6882b32748f60a15229d2dea96e59ef341eaa96ec83538f498d", size = 488461, upload-time = "2026-02-28T02:15:48.405Z" }, + { url = "https://files.pythonhosted.org/packages/32/f9/8a0034716684e38a729210ded6222249f29978b24b684f448162ef21f204/regex-2026.2.28-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2afa673660928d0b63d84353c6c08a8a476ddfc4a47e11742949d182e6863ce8", size = 290774, upload-time = "2026-02-28T02:15:51.738Z" }, + { url = "https://files.pythonhosted.org/packages/a6/ba/b27feefffbb199528dd32667cd172ed484d9c197618c575f01217fbe6103/regex-2026.2.28-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7ab218076eb0944549e7fe74cf0e2b83a82edb27e81cc87411f76240865e04d5", size = 288737, upload-time = "2026-02-28T02:15:53.534Z" }, + { url = "https://files.pythonhosted.org/packages/18/c5/65379448ca3cbfe774fcc33774dc8295b1ee97dc3237ae3d3c7b27423c9d/regex-2026.2.28-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94d63db12e45a9b9f064bfe4800cefefc7e5f182052e4c1b774d46a40ab1d9bb", size = 782675, upload-time = "2026-02-28T02:15:55.488Z" }, + { url = "https://files.pythonhosted.org/packages/aa/30/6fa55bef48090f900fbd4649333791fc3e6467380b9e775e741beeb3231f/regex-2026.2.28-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:195237dc327858a7721bf8b0bbbef797554bc13563c3591e91cd0767bacbe359", size = 850514, upload-time = "2026-02-28T02:15:57.509Z" }, + { url = "https://files.pythonhosted.org/packages/a9/28/9ca180fb3787a54150209754ac06a42409913571fa94994f340b3bba4e1e/regex-2026.2.28-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b387a0d092dac157fb026d737dde35ff3e49ef27f285343e7c6401851239df27", size = 896612, upload-time = "2026-02-28T02:15:59.682Z" }, + { url = "https://files.pythonhosted.org/packages/46/b5/f30d7d3936d6deecc3ea7bea4f7d3c5ee5124e7c8de372226e436b330a55/regex-2026.2.28-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3935174fa4d9f70525a4367aaff3cb8bc0548129d114260c29d9dfa4a5b41692", size = 791691, upload-time = "2026-02-28T02:16:01.752Z" }, + { url = "https://files.pythonhosted.org/packages/f5/34/96631bcf446a56ba0b2a7f684358a76855dfe315b7c2f89b35388494ede0/regex-2026.2.28-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2b2b23587b26496ff5fd40df4278becdf386813ec00dc3533fa43a4cf0e2ad3c", size = 783111, upload-time = "2026-02-28T02:16:03.651Z" }, + { url = "https://files.pythonhosted.org/packages/39/54/f95cb7a85fe284d41cd2f3625e0f2ae30172b55dfd2af1d9b4eaef6259d7/regex-2026.2.28-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3b24bd7e9d85dc7c6a8bd2aa14ecd234274a0248335a02adeb25448aecdd420d", size = 767512, upload-time = "2026-02-28T02:16:05.616Z" }, + { url = "https://files.pythonhosted.org/packages/3d/af/a650f64a79c02a97f73f64d4e7fc4cc1984e64affab14075e7c1f9a2db34/regex-2026.2.28-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:bd477d5f79920338107f04aa645f094032d9e3030cc55be581df3d1ef61aa318", size = 773920, upload-time = "2026-02-28T02:16:08.325Z" }, + { url = "https://files.pythonhosted.org/packages/72/f8/3f9c2c2af37aedb3f5a1e7227f81bea065028785260d9cacc488e43e6997/regex-2026.2.28-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:b49eb78048c6354f49e91e4b77da21257fecb92256b6d599ae44403cab30b05b", size = 846681, upload-time = "2026-02-28T02:16:10.381Z" }, + { url = "https://files.pythonhosted.org/packages/54/12/8db04a334571359f4d127d8f89550917ec6561a2fddfd69cd91402b47482/regex-2026.2.28-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:a25c7701e4f7a70021db9aaf4a4a0a67033c6318752146e03d1b94d32006217e", size = 755565, upload-time = "2026-02-28T02:16:11.972Z" }, + { url = "https://files.pythonhosted.org/packages/da/bc/91c22f384d79324121b134c267a86ca90d11f8016aafb1dc5bee05890ee3/regex-2026.2.28-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:9dd450db6458387167e033cfa80887a34c99c81d26da1bf8b0b41bf8c9cac88e", size = 835789, upload-time = "2026-02-28T02:16:14.036Z" }, + { url = "https://files.pythonhosted.org/packages/46/a7/4cc94fd3af01dcfdf5a9ed75c8e15fd80fcd62cc46da7592b1749e9c35db/regex-2026.2.28-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:2954379dd20752e82d22accf3ff465311cbb2bac6c1f92c4afd400e1757f7451", size = 780094, upload-time = "2026-02-28T02:16:15.468Z" }, + { url = "https://files.pythonhosted.org/packages/3c/21/e5a38f420af3c77cab4a65f0c3a55ec02ac9babf04479cfd282d356988a6/regex-2026.2.28-cp310-cp310-win32.whl", hash = "sha256:1f8b17be5c27a684ea6759983c13506bd77bfc7c0347dff41b18ce5ddd2ee09a", size = 266025, upload-time = "2026-02-28T02:16:16.828Z" }, + { url = "https://files.pythonhosted.org/packages/4d/0a/205c4c1466a36e04d90afcd01d8908bac327673050c7fe316b2416d99d3d/regex-2026.2.28-cp310-cp310-win_amd64.whl", hash = "sha256:dd8847c4978bc3c7e6c826fb745f5570e518b8459ac2892151ce6627c7bc00d5", size = 277965, upload-time = "2026-02-28T02:16:18.752Z" }, + { url = "https://files.pythonhosted.org/packages/c3/4d/29b58172f954b6ec2c5ed28529a65e9026ab96b4b7016bcd3858f1c31d3c/regex-2026.2.28-cp310-cp310-win_arm64.whl", hash = "sha256:73cdcdbba8028167ea81490c7f45280113e41db2c7afb65a276f4711fa3bcbff", size = 270336, upload-time = "2026-02-28T02:16:20.735Z" }, + { url = "https://files.pythonhosted.org/packages/04/db/8cbfd0ba3f302f2d09dd0019a9fcab74b63fee77a76c937d0e33161fb8c1/regex-2026.2.28-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e621fb7c8dc147419b28e1702f58a0177ff8308a76fa295c71f3e7827849f5d9", size = 488462, upload-time = "2026-02-28T02:16:22.616Z" }, + { url = "https://files.pythonhosted.org/packages/5d/10/ccc22c52802223f2368731964ddd117799e1390ffc39dbb31634a83022ee/regex-2026.2.28-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0d5bef2031cbf38757a0b0bc4298bb4824b6332d28edc16b39247228fbdbad97", size = 290774, upload-time = "2026-02-28T02:16:23.993Z" }, + { url = "https://files.pythonhosted.org/packages/62/b9/6796b3bf3101e64117201aaa3a5a030ec677ecf34b3cd6141b5d5c6c67d5/regex-2026.2.28-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bcb399ed84eabf4282587ba151f2732ad8168e66f1d3f85b1d038868fe547703", size = 288724, upload-time = "2026-02-28T02:16:25.403Z" }, + { url = "https://files.pythonhosted.org/packages/9c/02/291c0ae3f3a10cea941d0f5366da1843d8d1fa8a25b0671e20a0e454bb38/regex-2026.2.28-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7c1b34dfa72f826f535b20712afa9bb3ba580020e834f3c69866c5bddbf10098", size = 791924, upload-time = "2026-02-28T02:16:26.863Z" }, + { url = "https://files.pythonhosted.org/packages/0f/57/f0235cc520d9672742196c5c15098f8f703f2758d48d5a7465a56333e496/regex-2026.2.28-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:851fa70df44325e1e4cdb79c5e676e91a78147b1b543db2aec8734d2add30ec2", size = 860095, upload-time = "2026-02-28T02:16:28.772Z" }, + { url = "https://files.pythonhosted.org/packages/b3/7c/393c94cbedda79a0f5f2435ebd01644aba0b338d327eb24b4aa5b8d6c07f/regex-2026.2.28-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:516604edd17b1c2c3e579cf4e9b25a53bf8fa6e7cedddf1127804d3e0140ca64", size = 906583, upload-time = "2026-02-28T02:16:30.977Z" }, + { url = "https://files.pythonhosted.org/packages/2c/73/a72820f47ca5abf2b5d911d0407ba5178fc52cf9780191ed3a54f5f419a2/regex-2026.2.28-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e7ce83654d1ab701cb619285a18a8e5a889c1216d746ddc710c914ca5fd71022", size = 800234, upload-time = "2026-02-28T02:16:32.55Z" }, + { url = "https://files.pythonhosted.org/packages/34/b3/6e6a4b7b31fa998c4cf159a12cbeaf356386fbd1a8be743b1e80a3da51e4/regex-2026.2.28-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f2791948f7c70bb9335a9102df45e93d428f4b8128020d85920223925d73b9e1", size = 772803, upload-time = "2026-02-28T02:16:34.029Z" }, + { url = "https://files.pythonhosted.org/packages/10/e7/5da0280c765d5a92af5e1cd324b3fe8464303189cbaa449de9a71910e273/regex-2026.2.28-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:03a83cc26aa2acda6b8b9dfe748cf9e84cbd390c424a1de34fdcef58961a297a", size = 781117, upload-time = "2026-02-28T02:16:36.253Z" }, + { url = "https://files.pythonhosted.org/packages/76/39/0b8d7efb256ae34e1b8157acc1afd8758048a1cf0196e1aec2e71fd99f4b/regex-2026.2.28-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:ec6f5674c5dc836994f50f1186dd1fafde4be0666aae201ae2fcc3d29d8adf27", size = 854224, upload-time = "2026-02-28T02:16:38.119Z" }, + { url = "https://files.pythonhosted.org/packages/21/ff/a96d483ebe8fe6d1c67907729202313895d8de8495569ec319c6f29d0438/regex-2026.2.28-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:50c2fc924749543e0eacc93ada6aeeb3ea5f6715825624baa0dccaec771668ae", size = 761898, upload-time = "2026-02-28T02:16:40.333Z" }, + { url = "https://files.pythonhosted.org/packages/89/bd/d4f2e75cb4a54b484e796017e37c0d09d8a0a837de43d17e238adf163f4e/regex-2026.2.28-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:ba55c50f408fb5c346a3a02d2ce0ebc839784e24f7c9684fde328ff063c3cdea", size = 844832, upload-time = "2026-02-28T02:16:41.875Z" }, + { url = "https://files.pythonhosted.org/packages/8a/a7/428a135cf5e15e4e11d1e696eb2bf968362f8ea8a5f237122e96bc2ae950/regex-2026.2.28-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:edb1b1b3a5576c56f08ac46f108c40333f222ebfd5cf63afdfa3aab0791ebe5b", size = 788347, upload-time = "2026-02-28T02:16:43.472Z" }, + { url = "https://files.pythonhosted.org/packages/a9/59/68691428851cf9c9c3707217ab1d9b47cfeec9d153a49919e6c368b9e926/regex-2026.2.28-cp311-cp311-win32.whl", hash = "sha256:948c12ef30ecedb128903c2c2678b339746eb7c689c5c21957c4a23950c96d15", size = 266033, upload-time = "2026-02-28T02:16:45.094Z" }, + { url = "https://files.pythonhosted.org/packages/42/8b/1483de1c57024e89296cbcceb9cccb3f625d416ddb46e570be185c9b05a9/regex-2026.2.28-cp311-cp311-win_amd64.whl", hash = "sha256:fd63453f10d29097cc3dc62d070746523973fb5aa1c66d25f8558bebd47fed61", size = 277978, upload-time = "2026-02-28T02:16:46.75Z" }, + { url = "https://files.pythonhosted.org/packages/a4/36/abec45dc6e7252e3dbc797120496e43bb5730a7abf0d9cb69340696a2f2d/regex-2026.2.28-cp311-cp311-win_arm64.whl", hash = "sha256:00f2b8d9615aa165fdff0a13f1a92049bfad555ee91e20d246a51aa0b556c60a", size = 270340, upload-time = "2026-02-28T02:16:48.626Z" }, + { url = "https://files.pythonhosted.org/packages/07/42/9061b03cf0fc4b5fa2c3984cbbaed54324377e440a5c5a29d29a72518d62/regex-2026.2.28-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:fcf26c3c6d0da98fada8ae4ef0aa1c3405a431c0a77eb17306d38a89b02adcd7", size = 489574, upload-time = "2026-02-28T02:16:50.455Z" }, + { url = "https://files.pythonhosted.org/packages/77/83/0c8a5623a233015595e3da499c5a1c13720ac63c107897a6037bb97af248/regex-2026.2.28-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:02473c954af35dd2defeb07e44182f5705b30ea3f351a7cbffa9177beb14da5d", size = 291426, upload-time = "2026-02-28T02:16:52.52Z" }, + { url = "https://files.pythonhosted.org/packages/9e/06/3ef1ac6910dc3295ebd71b1f9bfa737e82cfead211a18b319d45f85ddd09/regex-2026.2.28-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9b65d33a17101569f86d9c5966a8b1d7fbf8afdda5a8aa219301b0a80f58cf7d", size = 289200, upload-time = "2026-02-28T02:16:54.08Z" }, + { url = "https://files.pythonhosted.org/packages/dd/c9/8cc8d850b35ab5650ff6756a1cb85286e2000b66c97520b29c1587455344/regex-2026.2.28-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e71dcecaa113eebcc96622c17692672c2d104b1d71ddf7adeda90da7ddeb26fc", size = 796765, upload-time = "2026-02-28T02:16:55.905Z" }, + { url = "https://files.pythonhosted.org/packages/e9/5d/57702597627fc23278ebf36fbb497ac91c0ce7fec89ac6c81e420ca3e38c/regex-2026.2.28-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:481df4623fa4969c8b11f3433ed7d5e3dc9cec0f008356c3212b3933fb77e3d8", size = 863093, upload-time = "2026-02-28T02:16:58.094Z" }, + { url = "https://files.pythonhosted.org/packages/02/6d/f3ecad537ca2811b4d26b54ca848cf70e04fcfc138667c146a9f3157779c/regex-2026.2.28-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:64e7c6ad614573e0640f271e811a408d79a9e1fe62a46adb602f598df42a818d", size = 909455, upload-time = "2026-02-28T02:17:00.918Z" }, + { url = "https://files.pythonhosted.org/packages/9e/40/bb226f203caa22c1043c1ca79b36340156eca0f6a6742b46c3bb222a3a57/regex-2026.2.28-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6b08a06976ff4fb0d83077022fde3eca06c55432bb997d8c0495b9a4e9872f4", size = 802037, upload-time = "2026-02-28T02:17:02.842Z" }, + { url = "https://files.pythonhosted.org/packages/44/7c/c6d91d8911ac6803b45ca968e8e500c46934e58c0903cbc6d760ee817a0a/regex-2026.2.28-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:864cdd1a2ef5716b0ab468af40139e62ede1b3a53386b375ec0786bb6783fc05", size = 775113, upload-time = "2026-02-28T02:17:04.506Z" }, + { url = "https://files.pythonhosted.org/packages/dc/8d/4a9368d168d47abd4158580b8c848709667b1cd293ff0c0c277279543bd0/regex-2026.2.28-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:511f7419f7afab475fd4d639d4aedfc54205bcb0800066753ef68a59f0f330b5", size = 784194, upload-time = "2026-02-28T02:17:06.888Z" }, + { url = "https://files.pythonhosted.org/packages/cc/bf/2c72ab5d8b7be462cb1651b5cc333da1d0068740342f350fcca3bca31947/regex-2026.2.28-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:b42f7466e32bf15a961cf09f35fa6323cc72e64d3d2c990b10de1274a5da0a59", size = 856846, upload-time = "2026-02-28T02:17:09.11Z" }, + { url = "https://files.pythonhosted.org/packages/7c/f4/6b65c979bb6d09f51bb2d2a7bc85de73c01ec73335d7ddd202dcb8cd1c8f/regex-2026.2.28-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:8710d61737b0c0ce6836b1da7109f20d495e49b3809f30e27e9560be67a257bf", size = 763516, upload-time = "2026-02-28T02:17:11.004Z" }, + { url = "https://files.pythonhosted.org/packages/8e/32/29ea5e27400ee86d2cc2b4e80aa059df04eaf78b4f0c18576ae077aeff68/regex-2026.2.28-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4390c365fd2d45278f45afd4673cb90f7285f5701607e3ad4274df08e36140ae", size = 849278, upload-time = "2026-02-28T02:17:12.693Z" }, + { url = "https://files.pythonhosted.org/packages/1d/91/3233d03b5f865111cd517e1c95ee8b43e8b428d61fa73764a80c9bb6f537/regex-2026.2.28-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cb3b1db8ff6c7b8bf838ab05583ea15230cb2f678e569ab0e3a24d1e8320940b", size = 790068, upload-time = "2026-02-28T02:17:14.9Z" }, + { url = "https://files.pythonhosted.org/packages/76/92/abc706c1fb03b4580a09645b206a3fc032f5a9f457bc1a8038ac555658ab/regex-2026.2.28-cp312-cp312-win32.whl", hash = "sha256:f8ed9a5d4612df9d4de15878f0bc6aa7a268afbe5af21a3fdd97fa19516e978c", size = 266416, upload-time = "2026-02-28T02:17:17.15Z" }, + { url = "https://files.pythonhosted.org/packages/fa/06/2a6f7dff190e5fa9df9fb4acf2fdf17a1aa0f7f54596cba8de608db56b3a/regex-2026.2.28-cp312-cp312-win_amd64.whl", hash = "sha256:01d65fd24206c8e1e97e2e31b286c59009636c022eb5d003f52760b0f42155d4", size = 277297, upload-time = "2026-02-28T02:17:18.723Z" }, + { url = "https://files.pythonhosted.org/packages/b7/f0/58a2484851fadf284458fdbd728f580d55c1abac059ae9f048c63b92f427/regex-2026.2.28-cp312-cp312-win_arm64.whl", hash = "sha256:c0b5ccbb8ffb433939d248707d4a8b31993cb76ab1a0187ca886bf50e96df952", size = 270408, upload-time = "2026-02-28T02:17:20.328Z" }, + { url = "https://files.pythonhosted.org/packages/87/f6/dc9ef48c61b79c8201585bf37fa70cd781977da86e466cd94e8e95d2443b/regex-2026.2.28-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6d63a07e5ec8ce7184452cb00c41c37b49e67dc4f73b2955b5b8e782ea970784", size = 489311, upload-time = "2026-02-28T02:17:22.591Z" }, + { url = "https://files.pythonhosted.org/packages/95/c8/c20390f2232d3f7956f420f4ef1852608ad57aa26c3dd78516cb9f3dc913/regex-2026.2.28-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e59bc8f30414d283ae8ee1617b13d8112e7135cb92830f0ec3688cb29152585a", size = 291285, upload-time = "2026-02-28T02:17:24.355Z" }, + { url = "https://files.pythonhosted.org/packages/d2/a6/ba1068a631ebd71a230e7d8013fcd284b7c89c35f46f34a7da02082141b1/regex-2026.2.28-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:de0cf053139f96219ccfabb4a8dd2d217c8c82cb206c91d9f109f3f552d6b43d", size = 289051, upload-time = "2026-02-28T02:17:26.722Z" }, + { url = "https://files.pythonhosted.org/packages/1d/1b/7cc3b7af4c244c204b7a80924bd3d85aecd9ba5bc82b485c5806ee8cda9e/regex-2026.2.28-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fb4db2f17e6484904f986c5a657cec85574c76b5c5e61c7aae9ffa1bc6224f95", size = 796842, upload-time = "2026-02-28T02:17:29.064Z" }, + { url = "https://files.pythonhosted.org/packages/24/87/26bd03efc60e0d772ac1e7b60a2e6325af98d974e2358f659c507d3c76db/regex-2026.2.28-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:52b017b35ac2214d0db5f4f90e303634dc44e4aba4bd6235a27f97ecbe5b0472", size = 863083, upload-time = "2026-02-28T02:17:31.363Z" }, + { url = "https://files.pythonhosted.org/packages/ae/54/aeaf4afb1aa0a65e40de52a61dc2ac5b00a83c6cb081c8a1d0dda74f3010/regex-2026.2.28-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:69fc560ccbf08a09dc9b52ab69cacfae51e0ed80dc5693078bdc97db2f91ae96", size = 909412, upload-time = "2026-02-28T02:17:33.248Z" }, + { url = "https://files.pythonhosted.org/packages/12/2f/049901def913954e640d199bbc6a7ca2902b6aeda0e5da9d17f114100ec2/regex-2026.2.28-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e61eea47230eba62a31f3e8a0e3164d0f37ef9f40529fb2c79361bc6b53d2a92", size = 802101, upload-time = "2026-02-28T02:17:35.053Z" }, + { url = "https://files.pythonhosted.org/packages/7d/a5/512fb9ff7f5b15ea204bb1967ebb649059446decacccb201381f9fa6aad4/regex-2026.2.28-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4f5c0b182ad4269e7381b7c27fdb0408399881f7a92a4624fd5487f2971dfc11", size = 775260, upload-time = "2026-02-28T02:17:37.692Z" }, + { url = "https://files.pythonhosted.org/packages/d1/a8/9a92935878aba19bd72706b9db5646a6f993d99b3f6ed42c02ec8beb1d61/regex-2026.2.28-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:96f6269a2882fbb0ee76967116b83679dc628e68eaea44e90884b8d53d833881", size = 784311, upload-time = "2026-02-28T02:17:39.855Z" }, + { url = "https://files.pythonhosted.org/packages/09/d3/fc51a8a738a49a6b6499626580554c9466d3ea561f2b72cfdc72e4149773/regex-2026.2.28-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b5acd4b6a95f37c3c3828e5d053a7d4edaedb85de551db0153754924cb7c83e3", size = 856876, upload-time = "2026-02-28T02:17:42.317Z" }, + { url = "https://files.pythonhosted.org/packages/08/b7/2e641f3d084b120ca4c52e8c762a78da0b32bf03ef546330db3e2635dc5f/regex-2026.2.28-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2234059cfe33d9813a3677ef7667999caea9eeaa83fef98eb6ce15c6cf9e0215", size = 763632, upload-time = "2026-02-28T02:17:45.073Z" }, + { url = "https://files.pythonhosted.org/packages/fe/6d/0009021d97e79ee99f3d8641f0a8d001eed23479ade4c3125a5480bf3e2d/regex-2026.2.28-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:c15af43c72a7fb0c97cbc66fa36a43546eddc5c06a662b64a0cbf30d6ac40944", size = 849320, upload-time = "2026-02-28T02:17:47.192Z" }, + { url = "https://files.pythonhosted.org/packages/05/7a/51cfbad5758f8edae430cb21961a9c8d04bce1dae4d2d18d4186eec7cfa1/regex-2026.2.28-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9185cc63359862a6e80fe97f696e04b0ad9a11c4ac0a4a927f979f611bfe3768", size = 790152, upload-time = "2026-02-28T02:17:49.067Z" }, + { url = "https://files.pythonhosted.org/packages/90/3d/a83e2b6b3daa142acb8c41d51de3876186307d5cb7490087031747662500/regex-2026.2.28-cp313-cp313-win32.whl", hash = "sha256:fb66e5245db9652abd7196ace599b04d9c0e4aa7c8f0e2803938377835780081", size = 266398, upload-time = "2026-02-28T02:17:50.744Z" }, + { url = "https://files.pythonhosted.org/packages/85/4f/16e9ebb1fe5425e11b9596c8d57bf8877dcb32391da0bfd33742e3290637/regex-2026.2.28-cp313-cp313-win_amd64.whl", hash = "sha256:71a911098be38c859ceb3f9a9ce43f4ed9f4c6720ad8684a066ea246b76ad9ff", size = 277282, upload-time = "2026-02-28T02:17:53.074Z" }, + { url = "https://files.pythonhosted.org/packages/07/b4/92851335332810c5a89723bf7a7e35c7209f90b7d4160024501717b28cc9/regex-2026.2.28-cp313-cp313-win_arm64.whl", hash = "sha256:39bb5727650b9a0275c6a6690f9bb3fe693a7e6cc5c3155b1240aedf8926423e", size = 270382, upload-time = "2026-02-28T02:17:54.888Z" }, + { url = "https://files.pythonhosted.org/packages/24/07/6c7e4cec1e585959e96cbc24299d97e4437a81173217af54f1804994e911/regex-2026.2.28-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:97054c55db06ab020342cc0d35d6f62a465fa7662871190175f1ad6c655c028f", size = 492541, upload-time = "2026-02-28T02:17:56.813Z" }, + { url = "https://files.pythonhosted.org/packages/7c/13/55eb22ada7f43d4f4bb3815b6132183ebc331c81bd496e2d1f3b8d862e0d/regex-2026.2.28-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0d25a10811de831c2baa6aef3c0be91622f44dd8d31dd12e69f6398efb15e48b", size = 292984, upload-time = "2026-02-28T02:17:58.538Z" }, + { url = "https://files.pythonhosted.org/packages/5b/11/c301f8cb29ce9644a5ef85104c59244e6e7e90994a0f458da4d39baa8e17/regex-2026.2.28-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d6cfe798d8da41bb1862ed6e0cba14003d387c3c0c4a5d45591076ae9f0ce2f8", size = 291509, upload-time = "2026-02-28T02:18:00.208Z" }, + { url = "https://files.pythonhosted.org/packages/b5/43/aabe384ec1994b91796e903582427bc2ffaed9c4103819ed3c16d8e749f3/regex-2026.2.28-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fd0ce43e71d825b7c0661f9c54d4d74bd97c56c3fd102a8985bcfea48236bacb", size = 809429, upload-time = "2026-02-28T02:18:02.328Z" }, + { url = "https://files.pythonhosted.org/packages/04/b8/8d2d987a816720c4f3109cee7c06a4b24ad0e02d4fc74919ab619e543737/regex-2026.2.28-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:00945d007fd74a9084d2ab79b695b595c6b7ba3698972fadd43e23230c6979c1", size = 869422, upload-time = "2026-02-28T02:18:04.23Z" }, + { url = "https://files.pythonhosted.org/packages/fc/ad/2c004509e763c0c3719f97c03eca26473bffb3868d54c5f280b8cd4f9e3d/regex-2026.2.28-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:bec23c11cbbf09a4df32fe50d57cbdd777bc442269b6e39a1775654f1c95dee2", size = 915175, upload-time = "2026-02-28T02:18:06.791Z" }, + { url = "https://files.pythonhosted.org/packages/55/c2/fd429066da487ef555a9da73bf214894aec77fc8c66a261ee355a69871a8/regex-2026.2.28-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5cdcc17d935c8f9d3f4db5c2ebe2640c332e3822ad5d23c2f8e0228e6947943a", size = 812044, upload-time = "2026-02-28T02:18:08.736Z" }, + { url = "https://files.pythonhosted.org/packages/5b/ca/feedb7055c62a3f7f659971bf45f0e0a87544b6b0cf462884761453f97c5/regex-2026.2.28-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a448af01e3d8031c89c5d902040b124a5e921a25c4e5e07a861ca591ce429341", size = 782056, upload-time = "2026-02-28T02:18:10.777Z" }, + { url = "https://files.pythonhosted.org/packages/95/30/1aa959ed0d25c1dd7dd5047ea8ba482ceaef38ce363c401fd32a6b923e60/regex-2026.2.28-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:10d28e19bd4888e4abf43bd3925f3c134c52fdf7259219003588a42e24c2aa25", size = 798743, upload-time = "2026-02-28T02:18:13.025Z" }, + { url = "https://files.pythonhosted.org/packages/3b/1f/dadb9cf359004784051c897dcf4d5d79895f73a1bbb7b827abaa4814ae80/regex-2026.2.28-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:99985a2c277dcb9ccb63f937451af5d65177af1efdeb8173ac55b61095a0a05c", size = 864633, upload-time = "2026-02-28T02:18:16.84Z" }, + { url = "https://files.pythonhosted.org/packages/a7/f1/b9a25eb24e1cf79890f09e6ec971ee5b511519f1851de3453bc04f6c902b/regex-2026.2.28-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:e1e7b24cb3ae9953a560c563045d1ba56ee4749fbd05cf21ba571069bd7be81b", size = 770862, upload-time = "2026-02-28T02:18:18.892Z" }, + { url = "https://files.pythonhosted.org/packages/02/9a/c5cb10b7aa6f182f9247a30cc9527e326601f46f4df864ac6db588d11fcd/regex-2026.2.28-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:d8511a01d0e4ee1992eb3ba19e09bc1866fe03f05129c3aec3fdc4cbc77aad3f", size = 854788, upload-time = "2026-02-28T02:18:21.475Z" }, + { url = "https://files.pythonhosted.org/packages/0a/50/414ba0731c4bd40b011fa4703b2cc86879ec060c64f2a906e65a56452589/regex-2026.2.28-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:aaffaecffcd2479ce87aa1e74076c221700b7c804e48e98e62500ee748f0f550", size = 800184, upload-time = "2026-02-28T02:18:23.492Z" }, + { url = "https://files.pythonhosted.org/packages/69/50/0c7290987f97e7e6830b0d853f69dc4dc5852c934aae63e7fdcd76b4c383/regex-2026.2.28-cp313-cp313t-win32.whl", hash = "sha256:ef77bdde9c9eba3f7fa5b58084b29bbcc74bcf55fdbeaa67c102a35b5bd7e7cc", size = 269137, upload-time = "2026-02-28T02:18:25.375Z" }, + { url = "https://files.pythonhosted.org/packages/68/80/ef26ff90e74ceb4051ad6efcbbb8a4be965184a57e879ebcbdef327d18fa/regex-2026.2.28-cp313-cp313t-win_amd64.whl", hash = "sha256:98adf340100cbe6fbaf8e6dc75e28f2c191b1be50ffefe292fb0e6f6eefdb0d8", size = 280682, upload-time = "2026-02-28T02:18:27.205Z" }, + { url = "https://files.pythonhosted.org/packages/69/8b/fbad9c52e83ffe8f97e3ed1aa0516e6dff6bb633a41da9e64645bc7efdc5/regex-2026.2.28-cp313-cp313t-win_arm64.whl", hash = "sha256:2fb950ac1d88e6b6a9414381f403797b236f9fa17e1eee07683af72b1634207b", size = 271735, upload-time = "2026-02-28T02:18:29.015Z" }, + { url = "https://files.pythonhosted.org/packages/cf/03/691015f7a7cb1ed6dacb2ea5de5682e4858e05a4c5506b2839cd533bbcd6/regex-2026.2.28-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:78454178c7df31372ea737996fb7f36b3c2c92cccc641d251e072478afb4babc", size = 489497, upload-time = "2026-02-28T02:18:30.889Z" }, + { url = "https://files.pythonhosted.org/packages/c6/ba/8db8fd19afcbfa0e1036eaa70c05f20ca8405817d4ad7a38a6b4c2f031ac/regex-2026.2.28-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:5d10303dd18cedfd4d095543998404df656088240bcfd3cd20a8f95b861f74bd", size = 291295, upload-time = "2026-02-28T02:18:33.426Z" }, + { url = "https://files.pythonhosted.org/packages/5a/79/9aa0caf089e8defef9b857b52fc53801f62ff868e19e5c83d4a96612eba1/regex-2026.2.28-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:19a9c9e0a8f24f39d575a6a854d516b48ffe4cbdcb9de55cb0570a032556ecff", size = 289275, upload-time = "2026-02-28T02:18:35.247Z" }, + { url = "https://files.pythonhosted.org/packages/eb/26/ee53117066a30ef9c883bf1127eece08308ccf8ccd45c45a966e7a665385/regex-2026.2.28-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09500be324f49b470d907b3ef8af9afe857f5cca486f853853f7945ddbf75911", size = 797176, upload-time = "2026-02-28T02:18:37.15Z" }, + { url = "https://files.pythonhosted.org/packages/05/1b/67fb0495a97259925f343ae78b5d24d4a6624356ae138b57f18bd43006e4/regex-2026.2.28-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fb1c4ff62277d87a7335f2c1ea4e0387b8f2b3ad88a64efd9943906aafad4f33", size = 863813, upload-time = "2026-02-28T02:18:39.478Z" }, + { url = "https://files.pythonhosted.org/packages/a0/1d/93ac9bbafc53618091c685c7ed40239a90bf9f2a82c983f0baa97cb7ae07/regex-2026.2.28-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b8b3f1be1738feadc69f62daa250c933e85c6f34fa378f54a7ff43807c1b9117", size = 908678, upload-time = "2026-02-28T02:18:41.619Z" }, + { url = "https://files.pythonhosted.org/packages/c7/7a/a8f5e0561702b25239846a16349feece59712ae20598ebb205580332a471/regex-2026.2.28-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dc8ed8c3f41c27acb83f7b6a9eb727a73fc6663441890c5cb3426a5f6a91ce7d", size = 801528, upload-time = "2026-02-28T02:18:43.624Z" }, + { url = "https://files.pythonhosted.org/packages/96/5d/ed6d4cbde80309854b1b9f42d9062fee38ade15f7eb4909f6ef2440403b5/regex-2026.2.28-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fa539be029844c0ce1114762d2952ab6cfdd7c7c9bd72e0db26b94c3c36dcc5a", size = 775373, upload-time = "2026-02-28T02:18:46.102Z" }, + { url = "https://files.pythonhosted.org/packages/6a/e9/6e53c34e8068b9deec3e87210086ecb5b9efebdefca6b0d3fa43d66dcecb/regex-2026.2.28-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7900157786428a79615a8264dac1f12c9b02957c473c8110c6b1f972dcecaddf", size = 784859, upload-time = "2026-02-28T02:18:48.269Z" }, + { url = "https://files.pythonhosted.org/packages/48/3c/736e1c7ca7f0dcd2ae33819888fdc69058a349b7e5e84bc3e2f296bbf794/regex-2026.2.28-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:0b1d2b07614d95fa2bf8a63fd1e98bd8fa2b4848dc91b1efbc8ba219fdd73952", size = 857813, upload-time = "2026-02-28T02:18:50.576Z" }, + { url = "https://files.pythonhosted.org/packages/6e/7c/48c4659ad9da61f58e79dbe8c05223e0006696b603c16eb6b5cbfbb52c27/regex-2026.2.28-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:b389c61aa28a79c2e0527ac36da579869c2e235a5b208a12c5b5318cda2501d8", size = 763705, upload-time = "2026-02-28T02:18:52.59Z" }, + { url = "https://files.pythonhosted.org/packages/cf/a1/bc1c261789283128165f71b71b4b221dd1b79c77023752a6074c102f18d8/regex-2026.2.28-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:f467cb602f03fbd1ab1908f68b53c649ce393fde056628dc8c7e634dab6bfc07", size = 848734, upload-time = "2026-02-28T02:18:54.595Z" }, + { url = "https://files.pythonhosted.org/packages/10/d8/979407faf1397036e25a5ae778157366a911c0f382c62501009f4957cf86/regex-2026.2.28-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e8c8cb2deba42f5ec1ede46374e990f8adc5e6456a57ac1a261b19be6f28e4e6", size = 789871, upload-time = "2026-02-28T02:18:57.34Z" }, + { url = "https://files.pythonhosted.org/packages/03/23/da716821277115fcb1f4e3de1e5dc5023a1e6533598c486abf5448612579/regex-2026.2.28-cp314-cp314-win32.whl", hash = "sha256:9036b400b20e4858d56d117108d7813ed07bb7803e3eed766675862131135ca6", size = 271825, upload-time = "2026-02-28T02:18:59.202Z" }, + { url = "https://files.pythonhosted.org/packages/91/ff/90696f535d978d5f16a52a419be2770a8d8a0e7e0cfecdbfc31313df7fab/regex-2026.2.28-cp314-cp314-win_amd64.whl", hash = "sha256:1d367257cd86c1cbb97ea94e77b373a0bbc2224976e247f173d19e8f18b4afa7", size = 280548, upload-time = "2026-02-28T02:19:01.049Z" }, + { url = "https://files.pythonhosted.org/packages/69/f9/5e1b5652fc0af3fcdf7677e7df3ad2a0d47d669b34ac29a63bb177bb731b/regex-2026.2.28-cp314-cp314-win_arm64.whl", hash = "sha256:5e68192bb3a1d6fb2836da24aa494e413ea65853a21505e142e5b1064a595f3d", size = 273444, upload-time = "2026-02-28T02:19:03.255Z" }, + { url = "https://files.pythonhosted.org/packages/d3/eb/8389f9e940ac89bcf58d185e230a677b4fd07c5f9b917603ad5c0f8fa8fe/regex-2026.2.28-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:a5dac14d0872eeb35260a8e30bac07ddf22adc1e3a0635b52b02e180d17c9c7e", size = 492546, upload-time = "2026-02-28T02:19:05.378Z" }, + { url = "https://files.pythonhosted.org/packages/7b/c7/09441d27ce2a6fa6a61ea3150ea4639c1dcda9b31b2ea07b80d6937b24dd/regex-2026.2.28-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:ec0c608b7a7465ffadb344ed7c987ff2f11ee03f6a130b569aa74d8a70e8333c", size = 292986, upload-time = "2026-02-28T02:19:07.24Z" }, + { url = "https://files.pythonhosted.org/packages/fb/69/4144b60ed7760a6bd235e4087041f487aa4aa62b45618ce018b0c14833ea/regex-2026.2.28-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c7815afb0ca45456613fdaf60ea9c993715511c8d53a83bc468305cbc0ee23c7", size = 291518, upload-time = "2026-02-28T02:19:09.698Z" }, + { url = "https://files.pythonhosted.org/packages/2d/be/77e5426cf5948c82f98c53582009ca9e94938c71f73a8918474f2e2990bb/regex-2026.2.28-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b059e71ec363968671693a78c5053bd9cb2fe410f9b8e4657e88377ebd603a2e", size = 809464, upload-time = "2026-02-28T02:19:12.494Z" }, + { url = "https://files.pythonhosted.org/packages/45/99/2c8c5ac90dc7d05c6e7d8e72c6a3599dc08cd577ac476898e91ca787d7f1/regex-2026.2.28-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b8cf76f1a29f0e99dcfd7aef1551a9827588aae5a737fe31442021165f1920dc", size = 869553, upload-time = "2026-02-28T02:19:15.151Z" }, + { url = "https://files.pythonhosted.org/packages/53/34/daa66a342f0271e7737003abf6c3097aa0498d58c668dbd88362ef94eb5d/regex-2026.2.28-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:180e08a435a0319e6a4821c3468da18dc7001987e1c17ae1335488dfe7518dd8", size = 915289, upload-time = "2026-02-28T02:19:17.331Z" }, + { url = "https://files.pythonhosted.org/packages/c5/c7/e22c2aaf0a12e7e22ab19b004bb78d32ca1ecc7ef245949935463c5567de/regex-2026.2.28-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1e496956106fd59ba6322a8ea17141a27c5040e5ee8f9433ae92d4e5204462a0", size = 812156, upload-time = "2026-02-28T02:19:20.011Z" }, + { url = "https://files.pythonhosted.org/packages/7f/bb/2dc18c1efd9051cf389cd0d7a3a4d90f6804b9fff3a51b5dc3c85b935f71/regex-2026.2.28-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bba2b18d70eeb7b79950f12f633beeecd923f7c9ad6f6bae28e59b4cb3ab046b", size = 782215, upload-time = "2026-02-28T02:19:22.047Z" }, + { url = "https://files.pythonhosted.org/packages/17/1e/9e4ec9b9013931faa32226ec4aa3c71fe664a6d8a2b91ac56442128b332f/regex-2026.2.28-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6db7bfae0f8a2793ff1f7021468ea55e2699d0790eb58ee6ab36ae43aa00bc5b", size = 798925, upload-time = "2026-02-28T02:19:24.173Z" }, + { url = "https://files.pythonhosted.org/packages/71/57/a505927e449a9ccb41e2cc8d735e2abe3444b0213d1cf9cb364a8c1f2524/regex-2026.2.28-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:d0b02e8b7e5874b48ae0f077ecca61c1a6a9f9895e9c6dfb191b55b242862033", size = 864701, upload-time = "2026-02-28T02:19:26.376Z" }, + { url = "https://files.pythonhosted.org/packages/a6/ad/c62cb60cdd93e13eac5b3d9d6bd5d284225ed0e3329426f94d2552dd7cca/regex-2026.2.28-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:25b6eb660c5cf4b8c3407a1ed462abba26a926cc9965e164268a3267bcc06a43", size = 770899, upload-time = "2026-02-28T02:19:29.38Z" }, + { url = "https://files.pythonhosted.org/packages/3c/5a/874f861f5c3d5ab99633e8030dee1bc113db8e0be299d1f4b07f5b5ec349/regex-2026.2.28-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:5a932ea8ad5d0430351ff9c76c8db34db0d9f53c1d78f06022a21f4e290c5c18", size = 854727, upload-time = "2026-02-28T02:19:31.494Z" }, + { url = "https://files.pythonhosted.org/packages/6b/ca/d2c03b0efde47e13db895b975b2be6a73ed90b8ba963677927283d43bf74/regex-2026.2.28-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:1c2c95e1a2b0f89d01e821ff4de1be4b5d73d1f4b0bf679fa27c1ad8d2327f1a", size = 800366, upload-time = "2026-02-28T02:19:34.248Z" }, + { url = "https://files.pythonhosted.org/packages/14/bd/ee13b20b763b8989f7c75d592bfd5de37dc1181814a2a2747fedcf97e3ba/regex-2026.2.28-cp314-cp314t-win32.whl", hash = "sha256:bbb882061f742eb5d46f2f1bd5304055be0a66b783576de3d7eef1bed4778a6e", size = 274936, upload-time = "2026-02-28T02:19:36.313Z" }, + { url = "https://files.pythonhosted.org/packages/cb/e7/d8020e39414c93af7f0d8688eabcecece44abfd5ce314b21dfda0eebd3d8/regex-2026.2.28-cp314-cp314t-win_amd64.whl", hash = "sha256:6591f281cb44dc13de9585b552cec6fc6cf47fb2fe7a48892295ee9bc4a612f9", size = 284779, upload-time = "2026-02-28T02:19:38.625Z" }, + { url = "https://files.pythonhosted.org/packages/13/c0/ad225f4a405827486f1955283407cf758b6d2fb966712644c5f5aef33d1b/regex-2026.2.28-cp314-cp314t-win_arm64.whl", hash = "sha256:dee50f1be42222f89767b64b283283ef963189da0dda4a515aa54a5563c62dec", size = 275010, upload-time = "2026-02-28T02:19:40.65Z" }, ] [[package]] @@ -5877,7 +5992,8 @@ version = "0.4.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numba" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/29/f1/34be702a69a5d272e844c98cee82351f880985cfbca0cc86378011078497/resampy-0.4.3.tar.gz", hash = "sha256:a0d1c28398f0e55994b739650afef4e3974115edbe96cd4bb81968425e916e47", size = 3080604, upload-time = "2024-03-05T20:36:08.119Z" } wheels = [ @@ -5908,16 +6024,16 @@ wheels = [ [[package]] name = "rich-toolkit" -version = "0.19.4" +version = "0.19.7" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, { name = "rich" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d0/c9/4bbf4bfee195ed1b7d7a6733cc523ca61dbfb4a3e3c12ea090aaffd97597/rich_toolkit-0.19.4.tar.gz", hash = "sha256:52e23d56f9dc30d1343eb3b3f6f18764c313fbfea24e52e6a1d6069bec9c18eb", size = 193951, upload-time = "2026-02-12T10:08:15.814Z" } +sdist = { url = "https://files.pythonhosted.org/packages/42/ba/dae9e3096651042754da419a4042bc1c75e07d615f9b15066d738838e4df/rich_toolkit-0.19.7.tar.gz", hash = "sha256:133c0915872da91d4c25d85342d5ec1dfacc69b63448af1a08a0d4b4f23ef46e", size = 195877, upload-time = "2026-02-24T16:06:20.555Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/28/31/97d39719def09c134385bfcfbedfed255168b571e7beb3ad7765aae660ca/rich_toolkit-0.19.4-py3-none-any.whl", hash = "sha256:34ac344de8862801644be8b703e26becf44b047e687f208d7829e8f7cfc311d6", size = 32757, upload-time = "2026-02-12T10:08:15.037Z" }, + { url = "https://files.pythonhosted.org/packages/fb/3c/c923619f6d2f5fafcc96fec0aaf9550a46cd5b6481f06e0c6b66a2a4fed0/rich_toolkit-0.19.7-py3-none-any.whl", hash = "sha256:0288e9203728c47c5a4eb60fd2f0692d9df7455a65901ab6f898437a2ba5989d", size = 32963, upload-time = "2026-02-24T16:06:22.066Z" }, ] [[package]] @@ -6186,27 +6302,27 @@ wheels = [ [[package]] name = "ruff" -version = "0.15.2" +version = "0.15.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/06/04/eab13a954e763b0606f460443fcbf6bb5a0faf06890ea3754ff16523dce5/ruff-0.15.2.tar.gz", hash = "sha256:14b965afee0969e68bb871eba625343b8673375f457af4abe98553e8bbb98342", size = 4558148, upload-time = "2026-02-19T22:32:20.271Z" } +sdist = { url = "https://files.pythonhosted.org/packages/da/31/d6e536cdebb6568ae75a7f00e4b4819ae0ad2640c3604c305a0428680b0c/ruff-0.15.4.tar.gz", hash = "sha256:3412195319e42d634470cc97aa9803d07e9d5c9223b99bcb1518f0c725f26ae1", size = 4569550, upload-time = "2026-02-26T20:04:14.959Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2f/70/3a4dc6d09b13cb3e695f28307e5d889b2e1a66b7af9c5e257e796695b0e6/ruff-0.15.2-py3-none-linux_armv6l.whl", hash = "sha256:120691a6fdae2f16d65435648160f5b81a9625288f75544dc40637436b5d3c0d", size = 10430565, upload-time = "2026-02-19T22:32:41.824Z" }, - { url = "https://files.pythonhosted.org/packages/71/0b/bb8457b56185ece1305c666dc895832946d24055be90692381c31d57466d/ruff-0.15.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:a89056d831256099658b6bba4037ac6dd06f49d194199215befe2bb10457ea5e", size = 10820354, upload-time = "2026-02-19T22:32:07.366Z" }, - { url = "https://files.pythonhosted.org/packages/2d/c1/e0532d7f9c9e0b14c46f61b14afd563298b8b83f337b6789ddd987e46121/ruff-0.15.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:e36dee3a64be0ebd23c86ffa3aa3fd3ac9a712ff295e192243f814a830b6bd87", size = 10170767, upload-time = "2026-02-19T22:32:13.188Z" }, - { url = "https://files.pythonhosted.org/packages/47/e8/da1aa341d3af017a21c7a62fb5ec31d4e7ad0a93ab80e3a508316efbcb23/ruff-0.15.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9fb47b6d9764677f8c0a193c0943ce9a05d6763523f132325af8a858eadc2b9", size = 10529591, upload-time = "2026-02-19T22:32:02.547Z" }, - { url = "https://files.pythonhosted.org/packages/93/74/184fbf38e9f3510231fbc5e437e808f0b48c42d1df9434b208821efcd8d6/ruff-0.15.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f376990f9d0d6442ea9014b19621d8f2aaf2b8e39fdbfc79220b7f0c596c9b80", size = 10260771, upload-time = "2026-02-19T22:32:36.938Z" }, - { url = "https://files.pythonhosted.org/packages/05/ac/605c20b8e059a0bc4b42360414baa4892ff278cec1c91fff4be0dceedefd/ruff-0.15.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2dcc987551952d73cbf5c88d9fdee815618d497e4df86cd4c4824cc59d5dd75f", size = 11045791, upload-time = "2026-02-19T22:32:31.642Z" }, - { url = "https://files.pythonhosted.org/packages/fd/52/db6e419908f45a894924d410ac77d64bdd98ff86901d833364251bd08e22/ruff-0.15.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:42a47fd785cbe8c01b9ff45031af875d101b040ad8f4de7bbb716487c74c9a77", size = 11879271, upload-time = "2026-02-19T22:32:29.305Z" }, - { url = "https://files.pythonhosted.org/packages/3e/d8/7992b18f2008bdc9231d0f10b16df7dda964dbf639e2b8b4c1b4e91b83af/ruff-0.15.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cbe9f49354866e575b4c6943856989f966421870e85cd2ac94dccb0a9dcb2fea", size = 11303707, upload-time = "2026-02-19T22:32:22.492Z" }, - { url = "https://files.pythonhosted.org/packages/d7/02/849b46184bcfdd4b64cde61752cc9a146c54759ed036edd11857e9b8443b/ruff-0.15.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7a672c82b5f9887576087d97be5ce439f04bbaf548ee987b92d3a7dede41d3a", size = 11149151, upload-time = "2026-02-19T22:32:44.234Z" }, - { url = "https://files.pythonhosted.org/packages/70/04/f5284e388bab60d1d3b99614a5a9aeb03e0f333847e2429bebd2aaa1feec/ruff-0.15.2-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:72ecc64f46f7019e2bcc3cdc05d4a7da958b629a5ab7033195e11a438403d956", size = 11091132, upload-time = "2026-02-19T22:32:24.691Z" }, - { url = "https://files.pythonhosted.org/packages/fa/ae/88d844a21110e14d92cf73d57363fab59b727ebeabe78009b9ccb23500af/ruff-0.15.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:8dcf243b15b561c655c1ef2f2b0050e5d50db37fe90115507f6ff37d865dc8b4", size = 10504717, upload-time = "2026-02-19T22:32:26.75Z" }, - { url = "https://files.pythonhosted.org/packages/64/27/867076a6ada7f2b9c8292884ab44d08fd2ba71bd2b5364d4136f3cd537e1/ruff-0.15.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:dab6941c862c05739774677c6273166d2510d254dac0695c0e3f5efa1b5585de", size = 10263122, upload-time = "2026-02-19T22:32:10.036Z" }, - { url = "https://files.pythonhosted.org/packages/e7/ef/faf9321d550f8ebf0c6373696e70d1758e20ccdc3951ad7af00c0956be7c/ruff-0.15.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1b9164f57fc36058e9a6806eb92af185b0697c9fe4c7c52caa431c6554521e5c", size = 10735295, upload-time = "2026-02-19T22:32:39.227Z" }, - { url = "https://files.pythonhosted.org/packages/2f/55/e8089fec62e050ba84d71b70e7834b97709ca9b7aba10c1a0b196e493f97/ruff-0.15.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:80d24fcae24d42659db7e335b9e1531697a7102c19185b8dc4a028b952865fd8", size = 11241641, upload-time = "2026-02-19T22:32:34.617Z" }, - { url = "https://files.pythonhosted.org/packages/23/01/1c30526460f4d23222d0fabd5888868262fd0e2b71a00570ca26483cd993/ruff-0.15.2-py3-none-win32.whl", hash = "sha256:fd5ff9e5f519a7e1bd99cbe8daa324010a74f5e2ebc97c6242c08f26f3714f6f", size = 10507885, upload-time = "2026-02-19T22:32:15.635Z" }, - { url = "https://files.pythonhosted.org/packages/5c/10/3d18e3bbdf8fc50bbb4ac3cc45970aa5a9753c5cb51bf9ed9a3cd8b79fa3/ruff-0.15.2-py3-none-win_amd64.whl", hash = "sha256:d20014e3dfa400f3ff84830dfb5755ece2de45ab62ecea4af6b7262d0fb4f7c5", size = 11623725, upload-time = "2026-02-19T22:32:04.947Z" }, - { url = "https://files.pythonhosted.org/packages/6d/78/097c0798b1dab9f8affe73da9642bb4500e098cb27fd8dc9724816ac747b/ruff-0.15.2-py3-none-win_arm64.whl", hash = "sha256:cabddc5822acdc8f7b5527b36ceac55cc51eec7b1946e60181de8fe83ca8876e", size = 10941649, upload-time = "2026-02-19T22:32:18.108Z" }, + { url = "https://files.pythonhosted.org/packages/f2/82/c11a03cfec3a4d26a0ea1e571f0f44be5993b923f905eeddfc397c13d360/ruff-0.15.4-py3-none-linux_armv6l.whl", hash = "sha256:a1810931c41606c686bae8b5b9a8072adac2f611bb433c0ba476acba17a332e0", size = 10453333, upload-time = "2026-02-26T20:04:20.093Z" }, + { url = "https://files.pythonhosted.org/packages/ce/5d/6a1f271f6e31dffb31855996493641edc3eef8077b883eaf007a2f1c2976/ruff-0.15.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:5a1632c66672b8b4d3e1d1782859e98d6e0b4e70829530666644286600a33992", size = 10853356, upload-time = "2026-02-26T20:04:05.808Z" }, + { url = "https://files.pythonhosted.org/packages/b1/d8/0fab9f8842b83b1a9c2bf81b85063f65e93fb512e60effa95b0be49bfc54/ruff-0.15.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a4386ba2cd6c0f4ff75252845906acc7c7c8e1ac567b7bc3d373686ac8c222ba", size = 10187434, upload-time = "2026-02-26T20:03:54.656Z" }, + { url = "https://files.pythonhosted.org/packages/85/cc/cc220fd9394eff5db8d94dec199eec56dd6c9f3651d8869d024867a91030/ruff-0.15.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2496488bdfd3732747558b6f95ae427ff066d1fcd054daf75f5a50674411e75", size = 10535456, upload-time = "2026-02-26T20:03:52.738Z" }, + { url = "https://files.pythonhosted.org/packages/fa/0f/bced38fa5cf24373ec767713c8e4cadc90247f3863605fb030e597878661/ruff-0.15.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3f1c4893841ff2d54cbda1b2860fa3260173df5ddd7b95d370186f8a5e66a4ac", size = 10287772, upload-time = "2026-02-26T20:04:08.138Z" }, + { url = "https://files.pythonhosted.org/packages/2b/90/58a1802d84fed15f8f281925b21ab3cecd813bde52a8ca033a4de8ab0e7a/ruff-0.15.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:820b8766bd65503b6c30aaa6331e8ef3a6e564f7999c844e9a547c40179e440a", size = 11049051, upload-time = "2026-02-26T20:04:03.53Z" }, + { url = "https://files.pythonhosted.org/packages/d2/ac/b7ad36703c35f3866584564dc15f12f91cb1a26a897dc2fd13d7cb3ae1af/ruff-0.15.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9fb74bab47139c1751f900f857fa503987253c3ef89129b24ed375e72873e85", size = 11890494, upload-time = "2026-02-26T20:04:10.497Z" }, + { url = "https://files.pythonhosted.org/packages/93/3d/3eb2f47a39a8b0da99faf9c54d3eb24720add1e886a5309d4d1be73a6380/ruff-0.15.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f80c98765949c518142b3a50a5db89343aa90f2c2bf7799de9986498ae6176db", size = 11326221, upload-time = "2026-02-26T20:04:12.84Z" }, + { url = "https://files.pythonhosted.org/packages/ff/90/bf134f4c1e5243e62690e09d63c55df948a74084c8ac3e48a88468314da6/ruff-0.15.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:451a2e224151729b3b6c9ffb36aed9091b2996fe4bdbd11f47e27d8f2e8888ec", size = 11168459, upload-time = "2026-02-26T20:04:00.969Z" }, + { url = "https://files.pythonhosted.org/packages/b5/e5/a64d27688789b06b5d55162aafc32059bb8c989c61a5139a36e1368285eb/ruff-0.15.4-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:a8f157f2e583c513c4f5f896163a93198297371f34c04220daf40d133fdd4f7f", size = 11104366, upload-time = "2026-02-26T20:03:48.099Z" }, + { url = "https://files.pythonhosted.org/packages/f1/f6/32d1dcb66a2559763fc3027bdd65836cad9eb09d90f2ed6a63d8e9252b02/ruff-0.15.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:917cc68503357021f541e69b35361c99387cdbbf99bd0ea4aa6f28ca99ff5338", size = 10510887, upload-time = "2026-02-26T20:03:45.771Z" }, + { url = "https://files.pythonhosted.org/packages/ff/92/22d1ced50971c5b6433aed166fcef8c9343f567a94cf2b9d9089f6aa80fe/ruff-0.15.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e9737c8161da79fd7cfec19f1e35620375bd8b2a50c3e77fa3d2c16f574105cc", size = 10285939, upload-time = "2026-02-26T20:04:22.42Z" }, + { url = "https://files.pythonhosted.org/packages/e6/f4/7c20aec3143837641a02509a4668fb146a642fd1211846634edc17eb5563/ruff-0.15.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:291258c917539e18f6ba40482fe31d6f5ac023994ee11d7bdafd716f2aab8a68", size = 10765471, upload-time = "2026-02-26T20:03:58.924Z" }, + { url = "https://files.pythonhosted.org/packages/d0/09/6d2f7586f09a16120aebdff8f64d962d7c4348313c77ebb29c566cefc357/ruff-0.15.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:3f83c45911da6f2cd5936c436cf86b9f09f09165f033a99dcf7477e34041cbc3", size = 11263382, upload-time = "2026-02-26T20:04:24.424Z" }, + { url = "https://files.pythonhosted.org/packages/1b/fa/2ef715a1cd329ef47c1a050e10dee91a9054b7ce2fcfdd6a06d139afb7ec/ruff-0.15.4-py3-none-win32.whl", hash = "sha256:65594a2d557d4ee9f02834fcdf0a28daa8b3b9f6cb2cb93846025a36db47ef22", size = 10506664, upload-time = "2026-02-26T20:03:50.56Z" }, + { url = "https://files.pythonhosted.org/packages/d0/a8/c688ef7e29983976820d18710f955751d9f4d4eb69df658af3d006e2ba3e/ruff-0.15.4-py3-none-win_amd64.whl", hash = "sha256:04196ad44f0df220c2ece5b0e959c2f37c777375ec744397d21d15b50a75264f", size = 11651048, upload-time = "2026-02-26T20:04:17.191Z" }, + { url = "https://files.pythonhosted.org/packages/3e/0a/9e1be9035b37448ce2e68c978f0591da94389ade5a5abafa4cf99985d1b2/ruff-0.15.4-py3-none-win_arm64.whl", hash = "sha256:60d5177e8cfc70e51b9c5fad936c634872a74209f934c1e79107d11787ad5453", size = 10966776, upload-time = "2026-02-26T20:03:56.908Z" }, ] [[package]] @@ -6271,7 +6387,7 @@ resolution-markers = [ "python_full_version < '3.11'", ] dependencies = [ - { name = "numpy", marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/0f/37/6964b830433e654ec7485e45a00fc9a27cf868d622838f6b6d9c5ec0d532/scipy-1.15.3.tar.gz", hash = "sha256:eae3cf522bc7df64b42cad3925c876e1b0b6c35c1337c93e12c0f366f55b0eaf", size = 59419214, upload-time = "2025-05-08T16:13:05.955Z" } wheels = [ @@ -6324,7 +6440,7 @@ wheels = [ [[package]] name = "scipy" -version = "1.17.0" +version = "1.17.1" source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version >= '3.14'", @@ -6333,70 +6449,70 @@ resolution-markers = [ "python_full_version == '3.11.*'", ] dependencies = [ - { name = "numpy", marker = "python_full_version >= '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/56/3e/9cca699f3486ce6bc12ff46dc2031f1ec8eb9ccc9a320fdaf925f1417426/scipy-1.17.0.tar.gz", hash = "sha256:2591060c8e648d8b96439e111ac41fd8342fdeff1876be2e19dea3fe8930454e", size = 30396830, upload-time = "2026-01-10T21:34:23.009Z" } +sdist = { url = "https://files.pythonhosted.org/packages/7a/97/5a3609c4f8d58b039179648e62dd220f89864f56f7357f5d4f45c29eb2cc/scipy-1.17.1.tar.gz", hash = "sha256:95d8e012d8cb8816c226aef832200b1d45109ed4464303e997c5b13122b297c0", size = 30573822, upload-time = "2026-02-23T00:26:24.851Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1e/4b/c89c131aa87cad2b77a54eb0fb94d633a842420fa7e919dc2f922037c3d8/scipy-1.17.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:2abd71643797bd8a106dff97894ff7869eeeb0af0f7a5ce02e4227c6a2e9d6fd", size = 31381316, upload-time = "2026-01-10T21:24:33.42Z" }, - { url = "https://files.pythonhosted.org/packages/5e/5f/a6b38f79a07d74989224d5f11b55267714707582908a5f1ae854cf9a9b84/scipy-1.17.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:ef28d815f4d2686503e5f4f00edc387ae58dfd7a2f42e348bb53359538f01558", size = 27966760, upload-time = "2026-01-10T21:24:38.911Z" }, - { url = "https://files.pythonhosted.org/packages/c1/20/095ad24e031ee8ed3c5975954d816b8e7e2abd731e04f8be573de8740885/scipy-1.17.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:272a9f16d6bb4667e8b50d25d71eddcc2158a214df1b566319298de0939d2ab7", size = 20138701, upload-time = "2026-01-10T21:24:43.249Z" }, - { url = "https://files.pythonhosted.org/packages/89/11/4aad2b3858d0337756f3323f8960755704e530b27eb2a94386c970c32cbe/scipy-1.17.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:7204fddcbec2fe6598f1c5fdf027e9f259106d05202a959a9f1aecf036adc9f6", size = 22480574, upload-time = "2026-01-10T21:24:47.266Z" }, - { url = "https://files.pythonhosted.org/packages/85/bd/f5af70c28c6da2227e510875cadf64879855193a687fb19951f0f44cfd6b/scipy-1.17.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fc02c37a5639ee67d8fb646ffded6d793c06c5622d36b35cfa8fe5ececb8f042", size = 32862414, upload-time = "2026-01-10T21:24:52.566Z" }, - { url = "https://files.pythonhosted.org/packages/ef/df/df1457c4df3826e908879fe3d76bc5b6e60aae45f4ee42539512438cfd5d/scipy-1.17.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dac97a27520d66c12a34fd90a4fe65f43766c18c0d6e1c0a80f114d2260080e4", size = 35112380, upload-time = "2026-01-10T21:24:58.433Z" }, - { url = "https://files.pythonhosted.org/packages/5f/bb/88e2c16bd1dd4de19d80d7c5e238387182993c2fb13b4b8111e3927ad422/scipy-1.17.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ebb7446a39b3ae0fe8f416a9a3fdc6fba3f11c634f680f16a239c5187bc487c0", size = 34922676, upload-time = "2026-01-10T21:25:04.287Z" }, - { url = "https://files.pythonhosted.org/packages/02/ba/5120242cc735f71fc002cff0303d536af4405eb265f7c60742851e7ccfe9/scipy-1.17.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:474da16199f6af66601a01546144922ce402cb17362e07d82f5a6cf8f963e449", size = 37507599, upload-time = "2026-01-10T21:25:09.851Z" }, - { url = "https://files.pythonhosted.org/packages/52/c8/08629657ac6c0da198487ce8cd3de78e02cfde42b7f34117d56a3fe249dc/scipy-1.17.0-cp311-cp311-win_amd64.whl", hash = "sha256:255c0da161bd7b32a6c898e7891509e8a9289f0b1c6c7d96142ee0d2b114c2ea", size = 36380284, upload-time = "2026-01-10T21:25:15.632Z" }, - { url = "https://files.pythonhosted.org/packages/6c/4a/465f96d42c6f33ad324a40049dfd63269891db9324aa66c4a1c108c6f994/scipy-1.17.0-cp311-cp311-win_arm64.whl", hash = "sha256:85b0ac3ad17fa3be50abd7e69d583d98792d7edc08367e01445a1e2076005379", size = 24370427, upload-time = "2026-01-10T21:25:20.514Z" }, - { url = "https://files.pythonhosted.org/packages/0b/11/7241a63e73ba5a516f1930ac8d5b44cbbfabd35ac73a2d08ca206df007c4/scipy-1.17.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:0d5018a57c24cb1dd828bcf51d7b10e65986d549f52ef5adb6b4d1ded3e32a57", size = 31364580, upload-time = "2026-01-10T21:25:25.717Z" }, - { url = "https://files.pythonhosted.org/packages/ed/1d/5057f812d4f6adc91a20a2d6f2ebcdb517fdbc87ae3acc5633c9b97c8ba5/scipy-1.17.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:88c22af9e5d5a4f9e027e26772cc7b5922fab8bcc839edb3ae33de404feebd9e", size = 27969012, upload-time = "2026-01-10T21:25:30.921Z" }, - { url = "https://files.pythonhosted.org/packages/e3/21/f6ec556c1e3b6ec4e088da667d9987bb77cc3ab3026511f427dc8451187d/scipy-1.17.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:f3cd947f20fe17013d401b64e857c6b2da83cae567adbb75b9dcba865abc66d8", size = 20140691, upload-time = "2026-01-10T21:25:34.802Z" }, - { url = "https://files.pythonhosted.org/packages/7a/fe/5e5ad04784964ba964a96f16c8d4676aa1b51357199014dce58ab7ec5670/scipy-1.17.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:e8c0b331c2c1f531eb51f1b4fc9ba709521a712cce58f1aa627bc007421a5306", size = 22463015, upload-time = "2026-01-10T21:25:39.277Z" }, - { url = "https://files.pythonhosted.org/packages/4a/69/7c347e857224fcaf32a34a05183b9d8a7aca25f8f2d10b8a698b8388561a/scipy-1.17.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5194c445d0a1c7a6c1a4a4681b6b7c71baad98ff66d96b949097e7513c9d6742", size = 32724197, upload-time = "2026-01-10T21:25:44.084Z" }, - { url = "https://files.pythonhosted.org/packages/d1/fe/66d73b76d378ba8cc2fe605920c0c75092e3a65ae746e1e767d9d020a75a/scipy-1.17.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9eeb9b5f5997f75507814ed9d298ab23f62cf79f5a3ef90031b1ee2506abdb5b", size = 35009148, upload-time = "2026-01-10T21:25:50.591Z" }, - { url = "https://files.pythonhosted.org/packages/af/07/07dec27d9dc41c18d8c43c69e9e413431d20c53a0339c388bcf72f353c4b/scipy-1.17.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:40052543f7bbe921df4408f46003d6f01c6af109b9e2c8a66dd1cf6cf57f7d5d", size = 34798766, upload-time = "2026-01-10T21:25:59.41Z" }, - { url = "https://files.pythonhosted.org/packages/81/61/0470810c8a093cdacd4ba7504b8a218fd49ca070d79eca23a615f5d9a0b0/scipy-1.17.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0cf46c8013fec9d3694dc572f0b54100c28405d55d3e2cb15e2895b25057996e", size = 37405953, upload-time = "2026-01-10T21:26:07.75Z" }, - { url = "https://files.pythonhosted.org/packages/92/ce/672ed546f96d5d41ae78c4b9b02006cedd0b3d6f2bf5bb76ea455c320c28/scipy-1.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:0937a0b0d8d593a198cededd4c439a0ea216a3f36653901ea1f3e4be949056f8", size = 36328121, upload-time = "2026-01-10T21:26:16.509Z" }, - { url = "https://files.pythonhosted.org/packages/9d/21/38165845392cae67b61843a52c6455d47d0cc2a40dd495c89f4362944654/scipy-1.17.0-cp312-cp312-win_arm64.whl", hash = "sha256:f603d8a5518c7426414d1d8f82e253e454471de682ce5e39c29adb0df1efb86b", size = 24314368, upload-time = "2026-01-10T21:26:23.087Z" }, - { url = "https://files.pythonhosted.org/packages/0c/51/3468fdfd49387ddefee1636f5cf6d03ce603b75205bf439bbf0e62069bfd/scipy-1.17.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:65ec32f3d32dfc48c72df4291345dae4f048749bc8d5203ee0a3f347f96c5ce6", size = 31344101, upload-time = "2026-01-10T21:26:30.25Z" }, - { url = "https://files.pythonhosted.org/packages/b2/9a/9406aec58268d437636069419e6977af953d1e246df941d42d3720b7277b/scipy-1.17.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:1f9586a58039d7229ce77b52f8472c972448cded5736eaf102d5658bbac4c269", size = 27950385, upload-time = "2026-01-10T21:26:36.801Z" }, - { url = "https://files.pythonhosted.org/packages/4f/98/e7342709e17afdfd1b26b56ae499ef4939b45a23a00e471dfb5375eea205/scipy-1.17.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:9fad7d3578c877d606b1150135c2639e9de9cecd3705caa37b66862977cc3e72", size = 20122115, upload-time = "2026-01-10T21:26:42.107Z" }, - { url = "https://files.pythonhosted.org/packages/fd/0e/9eeeb5357a64fd157cbe0302c213517c541cc16b8486d82de251f3c68ede/scipy-1.17.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:423ca1f6584fc03936972b5f7c06961670dbba9f234e71676a7c7ccf938a0d61", size = 22442402, upload-time = "2026-01-10T21:26:48.029Z" }, - { url = "https://files.pythonhosted.org/packages/c9/10/be13397a0e434f98e0c79552b2b584ae5bb1c8b2be95db421533bbca5369/scipy-1.17.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fe508b5690e9eaaa9467fc047f833af58f1152ae51a0d0aed67aa5801f4dd7d6", size = 32696338, upload-time = "2026-01-10T21:26:55.521Z" }, - { url = "https://files.pythonhosted.org/packages/63/1e/12fbf2a3bb240161651c94bb5cdd0eae5d4e8cc6eaeceb74ab07b12a753d/scipy-1.17.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6680f2dfd4f6182e7d6db161344537da644d1cf85cf293f015c60a17ecf08752", size = 34977201, upload-time = "2026-01-10T21:27:03.501Z" }, - { url = "https://files.pythonhosted.org/packages/19/5b/1a63923e23ccd20bd32156d7dd708af5bbde410daa993aa2500c847ab2d2/scipy-1.17.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eec3842ec9ac9de5917899b277428886042a93db0b227ebbe3a333b64ec7643d", size = 34777384, upload-time = "2026-01-10T21:27:11.423Z" }, - { url = "https://files.pythonhosted.org/packages/39/22/b5da95d74edcf81e540e467202a988c50fef41bd2011f46e05f72ba07df6/scipy-1.17.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d7425fcafbc09a03731e1bc05581f5fad988e48c6a861f441b7ab729a49a55ea", size = 37379586, upload-time = "2026-01-10T21:27:20.171Z" }, - { url = "https://files.pythonhosted.org/packages/b9/b6/8ac583d6da79e7b9e520579f03007cb006f063642afd6b2eeb16b890bf93/scipy-1.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:87b411e42b425b84777718cc41516b8a7e0795abfa8e8e1d573bf0ef014f0812", size = 36287211, upload-time = "2026-01-10T21:28:43.122Z" }, - { url = "https://files.pythonhosted.org/packages/55/fb/7db19e0b3e52f882b420417644ec81dd57eeef1bd1705b6f689d8ff93541/scipy-1.17.0-cp313-cp313-win_arm64.whl", hash = "sha256:357ca001c6e37601066092e7c89cca2f1ce74e2a520ca78d063a6d2201101df2", size = 24312646, upload-time = "2026-01-10T21:28:49.893Z" }, - { url = "https://files.pythonhosted.org/packages/20/b6/7feaa252c21cc7aff335c6c55e1b90ab3e3306da3f048109b8b639b94648/scipy-1.17.0-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:ec0827aa4d36cb79ff1b81de898e948a51ac0b9b1c43e4a372c0508c38c0f9a3", size = 31693194, upload-time = "2026-01-10T21:27:27.454Z" }, - { url = "https://files.pythonhosted.org/packages/76/bb/bbb392005abce039fb7e672cb78ac7d158700e826b0515cab6b5b60c26fb/scipy-1.17.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:819fc26862b4b3c73a60d486dbb919202f3d6d98c87cf20c223511429f2d1a97", size = 28365415, upload-time = "2026-01-10T21:27:34.26Z" }, - { url = "https://files.pythonhosted.org/packages/37/da/9d33196ecc99fba16a409c691ed464a3a283ac454a34a13a3a57c0d66f3a/scipy-1.17.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:363ad4ae2853d88ebcde3ae6ec46ccca903ea9835ee8ba543f12f575e7b07e4e", size = 20537232, upload-time = "2026-01-10T21:27:40.306Z" }, - { url = "https://files.pythonhosted.org/packages/56/9d/f4b184f6ddb28e9a5caea36a6f98e8ecd2a524f9127354087ce780885d83/scipy-1.17.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:979c3a0ff8e5ba254d45d59ebd38cde48fce4f10b5125c680c7a4bfe177aab07", size = 22791051, upload-time = "2026-01-10T21:27:46.539Z" }, - { url = "https://files.pythonhosted.org/packages/9b/9d/025cccdd738a72140efc582b1641d0dd4caf2e86c3fb127568dc80444e6e/scipy-1.17.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:130d12926ae34399d157de777472bf82e9061c60cc081372b3118edacafe1d00", size = 32815098, upload-time = "2026-01-10T21:27:54.389Z" }, - { url = "https://files.pythonhosted.org/packages/48/5f/09b879619f8bca15ce392bfc1894bd9c54377e01d1b3f2f3b595a1b4d945/scipy-1.17.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6e886000eb4919eae3a44f035e63f0fd8b651234117e8f6f29bad1cd26e7bc45", size = 35031342, upload-time = "2026-01-10T21:28:03.012Z" }, - { url = "https://files.pythonhosted.org/packages/f2/9a/f0f0a9f0aa079d2f106555b984ff0fbb11a837df280f04f71f056ea9c6e4/scipy-1.17.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:13c4096ac6bc31d706018f06a49abe0485f96499deb82066b94d19b02f664209", size = 34893199, upload-time = "2026-01-10T21:28:10.832Z" }, - { url = "https://files.pythonhosted.org/packages/90/b8/4f0f5cf0c5ea4d7548424e6533e6b17d164f34a6e2fb2e43ffebb6697b06/scipy-1.17.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:cacbaddd91fcffde703934897c5cd2c7cb0371fac195d383f4e1f1c5d3f3bd04", size = 37438061, upload-time = "2026-01-10T21:28:19.684Z" }, - { url = "https://files.pythonhosted.org/packages/f9/cc/2bd59140ed3b2fa2882fb15da0a9cb1b5a6443d67cfd0d98d4cec83a57ec/scipy-1.17.0-cp313-cp313t-win_amd64.whl", hash = "sha256:edce1a1cf66298cccdc48a1bdf8fb10a3bf58e8b58d6c3883dd1530e103f87c0", size = 36328593, upload-time = "2026-01-10T21:28:28.007Z" }, - { url = "https://files.pythonhosted.org/packages/13/1b/c87cc44a0d2c7aaf0f003aef2904c3d097b422a96c7e7c07f5efd9073c1b/scipy-1.17.0-cp313-cp313t-win_arm64.whl", hash = "sha256:30509da9dbec1c2ed8f168b8d8aa853bc6723fede1dbc23c7d43a56f5ab72a67", size = 24625083, upload-time = "2026-01-10T21:28:35.188Z" }, - { url = "https://files.pythonhosted.org/packages/1a/2d/51006cd369b8e7879e1c630999a19d1fbf6f8b5ed3e33374f29dc87e53b3/scipy-1.17.0-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:c17514d11b78be8f7e6331b983a65a7f5ca1fd037b95e27b280921fe5606286a", size = 31346803, upload-time = "2026-01-10T21:28:57.24Z" }, - { url = "https://files.pythonhosted.org/packages/d6/2e/2349458c3ce445f53a6c93d4386b1c4c5c0c540917304c01222ff95ff317/scipy-1.17.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:4e00562e519c09da34c31685f6acc3aa384d4d50604db0f245c14e1b4488bfa2", size = 27967182, upload-time = "2026-01-10T21:29:04.107Z" }, - { url = "https://files.pythonhosted.org/packages/5e/7c/df525fbfa77b878d1cfe625249529514dc02f4fd5f45f0f6295676a76528/scipy-1.17.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:f7df7941d71314e60a481e02d5ebcb3f0185b8d799c70d03d8258f6c80f3d467", size = 20139125, upload-time = "2026-01-10T21:29:10.179Z" }, - { url = "https://files.pythonhosted.org/packages/33/11/fcf9d43a7ed1234d31765ec643b0515a85a30b58eddccc5d5a4d12b5f194/scipy-1.17.0-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:aabf057c632798832f071a8dde013c2e26284043934f53b00489f1773b33527e", size = 22443554, upload-time = "2026-01-10T21:29:15.888Z" }, - { url = "https://files.pythonhosted.org/packages/80/5c/ea5d239cda2dd3d31399424967a24d556cf409fbea7b5b21412b0fd0a44f/scipy-1.17.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a38c3337e00be6fd8a95b4ed66b5d988bac4ec888fd922c2ea9fe5fb1603dd67", size = 32757834, upload-time = "2026-01-10T21:29:23.406Z" }, - { url = "https://files.pythonhosted.org/packages/b8/7e/8c917cc573310e5dc91cbeead76f1b600d3fb17cf0969db02c9cf92e3cfa/scipy-1.17.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00fb5f8ec8398ad90215008d8b6009c9db9fa924fd4c7d6be307c6f945f9cd73", size = 34995775, upload-time = "2026-01-10T21:29:31.915Z" }, - { url = "https://files.pythonhosted.org/packages/c5/43/176c0c3c07b3f7df324e7cdd933d3e2c4898ca202b090bd5ba122f9fe270/scipy-1.17.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f2a4942b0f5f7c23c7cd641a0ca1955e2ae83dedcff537e3a0259096635e186b", size = 34841240, upload-time = "2026-01-10T21:29:39.995Z" }, - { url = "https://files.pythonhosted.org/packages/44/8c/d1f5f4b491160592e7f084d997de53a8e896a3ac01cd07e59f43ca222744/scipy-1.17.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:dbf133ced83889583156566d2bdf7a07ff89228fe0c0cb727f777de92092ec6b", size = 37394463, upload-time = "2026-01-10T21:29:48.723Z" }, - { url = "https://files.pythonhosted.org/packages/9f/ec/42a6657f8d2d087e750e9a5dde0b481fd135657f09eaf1cf5688bb23c338/scipy-1.17.0-cp314-cp314-win_amd64.whl", hash = "sha256:3625c631a7acd7cfd929e4e31d2582cf00f42fcf06011f59281271746d77e061", size = 37053015, upload-time = "2026-01-10T21:30:51.418Z" }, - { url = "https://files.pythonhosted.org/packages/27/58/6b89a6afd132787d89a362d443a7bddd511b8f41336a1ae47f9e4f000dc4/scipy-1.17.0-cp314-cp314-win_arm64.whl", hash = "sha256:9244608d27eafe02b20558523ba57f15c689357c85bdcfe920b1828750aa26eb", size = 24951312, upload-time = "2026-01-10T21:30:56.771Z" }, - { url = "https://files.pythonhosted.org/packages/e9/01/f58916b9d9ae0112b86d7c3b10b9e685625ce6e8248df139d0fcb17f7397/scipy-1.17.0-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:2b531f57e09c946f56ad0b4a3b2abee778789097871fc541e267d2eca081cff1", size = 31706502, upload-time = "2026-01-10T21:29:56.326Z" }, - { url = "https://files.pythonhosted.org/packages/59/8e/2912a87f94a7d1f8b38aabc0faf74b82d3b6c9e22be991c49979f0eceed8/scipy-1.17.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:13e861634a2c480bd237deb69333ac79ea1941b94568d4b0efa5db5e263d4fd1", size = 28380854, upload-time = "2026-01-10T21:30:01.554Z" }, - { url = "https://files.pythonhosted.org/packages/bd/1c/874137a52dddab7d5d595c1887089a2125d27d0601fce8c0026a24a92a0b/scipy-1.17.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:eb2651271135154aa24f6481cbae5cc8af1f0dd46e6533fb7b56aa9727b6a232", size = 20552752, upload-time = "2026-01-10T21:30:05.93Z" }, - { url = "https://files.pythonhosted.org/packages/3f/f0/7518d171cb735f6400f4576cf70f756d5b419a07fe1867da34e2c2c9c11b/scipy-1.17.0-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:c5e8647f60679790c2f5c76be17e2e9247dc6b98ad0d3b065861e082c56e078d", size = 22803972, upload-time = "2026-01-10T21:30:10.651Z" }, - { url = "https://files.pythonhosted.org/packages/7c/74/3498563a2c619e8a3ebb4d75457486c249b19b5b04a30600dfd9af06bea5/scipy-1.17.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5fb10d17e649e1446410895639f3385fd2bf4c3c7dfc9bea937bddcbc3d7b9ba", size = 32829770, upload-time = "2026-01-10T21:30:16.359Z" }, - { url = "https://files.pythonhosted.org/packages/48/d1/7b50cedd8c6c9d6f706b4b36fa8544d829c712a75e370f763b318e9638c1/scipy-1.17.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8547e7c57f932e7354a2319fab613981cde910631979f74c9b542bb167a8b9db", size = 35051093, upload-time = "2026-01-10T21:30:22.987Z" }, - { url = "https://files.pythonhosted.org/packages/e2/82/a2d684dfddb87ba1b3ea325df7c3293496ee9accb3a19abe9429bce94755/scipy-1.17.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:33af70d040e8af9d5e7a38b5ed3b772adddd281e3062ff23fec49e49681c38cf", size = 34909905, upload-time = "2026-01-10T21:30:28.704Z" }, - { url = "https://files.pythonhosted.org/packages/ef/5e/e565bd73991d42023eb82bb99e51c5b3d9e2c588ca9d4b3e2cc1d3ca62a6/scipy-1.17.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f9eb55bb97d00f8b7ab95cb64f873eb0bf54d9446264d9f3609130381233483f", size = 37457743, upload-time = "2026-01-10T21:30:34.819Z" }, - { url = "https://files.pythonhosted.org/packages/58/a8/a66a75c3d8f1fb2b83f66007d6455a06a6f6cf5618c3dc35bc9b69dd096e/scipy-1.17.0-cp314-cp314t-win_amd64.whl", hash = "sha256:1ff269abf702f6c7e67a4b7aad981d42871a11b9dd83c58d2d2ea624efbd1088", size = 37098574, upload-time = "2026-01-10T21:30:40.782Z" }, - { url = "https://files.pythonhosted.org/packages/56/a5/df8f46ef7da168f1bc52cd86e09a9de5c6f19cc1da04454d51b7d4f43408/scipy-1.17.0-cp314-cp314t-win_arm64.whl", hash = "sha256:031121914e295d9791319a1875444d55079885bbae5bdc9c5e0f2ee5f09d34ff", size = 25246266, upload-time = "2026-01-10T21:30:45.923Z" }, + { url = "https://files.pythonhosted.org/packages/df/75/b4ce781849931fef6fd529afa6b63711d5a733065722d0c3e2724af9e40a/scipy-1.17.1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:1f95b894f13729334fb990162e911c9e5dc1ab390c58aa6cbecb389c5b5e28ec", size = 31613675, upload-time = "2026-02-23T00:16:00.13Z" }, + { url = "https://files.pythonhosted.org/packages/f7/58/bccc2861b305abdd1b8663d6130c0b3d7cc22e8d86663edbc8401bfd40d4/scipy-1.17.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:e18f12c6b0bc5a592ed23d3f7b891f68fd7f8241d69b7883769eb5d5dfb52696", size = 28162057, upload-time = "2026-02-23T00:16:09.456Z" }, + { url = "https://files.pythonhosted.org/packages/6d/ee/18146b7757ed4976276b9c9819108adbc73c5aad636e5353e20746b73069/scipy-1.17.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:a3472cfbca0a54177d0faa68f697d8ba4c80bbdc19908c3465556d9f7efce9ee", size = 20334032, upload-time = "2026-02-23T00:16:17.358Z" }, + { url = "https://files.pythonhosted.org/packages/ec/e6/cef1cf3557f0c54954198554a10016b6a03b2ec9e22a4e1df734936bd99c/scipy-1.17.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:766e0dc5a616d026a3a1cffa379af959671729083882f50307e18175797b3dfd", size = 22709533, upload-time = "2026-02-23T00:16:25.791Z" }, + { url = "https://files.pythonhosted.org/packages/4d/60/8804678875fc59362b0fb759ab3ecce1f09c10a735680318ac30da8cd76b/scipy-1.17.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:744b2bf3640d907b79f3fd7874efe432d1cf171ee721243e350f55234b4cec4c", size = 33062057, upload-time = "2026-02-23T00:16:36.931Z" }, + { url = "https://files.pythonhosted.org/packages/09/7d/af933f0f6e0767995b4e2d705a0665e454d1c19402aa7e895de3951ebb04/scipy-1.17.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43af8d1f3bea642559019edfe64e9b11192a8978efbd1539d7bc2aaa23d92de4", size = 35349300, upload-time = "2026-02-23T00:16:49.108Z" }, + { url = "https://files.pythonhosted.org/packages/b4/3d/7ccbbdcbb54c8fdc20d3b6930137c782a163fa626f0aef920349873421ba/scipy-1.17.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cd96a1898c0a47be4520327e01f874acfd61fb48a9420f8aa9f6483412ffa444", size = 35127333, upload-time = "2026-02-23T00:17:01.293Z" }, + { url = "https://files.pythonhosted.org/packages/e8/19/f926cb11c42b15ba08e3a71e376d816ac08614f769b4f47e06c3580c836a/scipy-1.17.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4eb6c25dd62ee8d5edf68a8e1c171dd71c292fdae95d8aeb3dd7d7de4c364082", size = 37741314, upload-time = "2026-02-23T00:17:12.576Z" }, + { url = "https://files.pythonhosted.org/packages/95/da/0d1df507cf574b3f224ccc3d45244c9a1d732c81dcb26b1e8a766ae271a8/scipy-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:d30e57c72013c2a4fe441c2fcb8e77b14e152ad48b5464858e07e2ad9fbfceff", size = 36607512, upload-time = "2026-02-23T00:17:23.424Z" }, + { url = "https://files.pythonhosted.org/packages/68/7f/bdd79ceaad24b671543ffe0ef61ed8e659440eb683b66f033454dcee90eb/scipy-1.17.1-cp311-cp311-win_arm64.whl", hash = "sha256:9ecb4efb1cd6e8c4afea0daa91a87fbddbce1b99d2895d151596716c0b2e859d", size = 24599248, upload-time = "2026-02-23T00:17:34.561Z" }, + { url = "https://files.pythonhosted.org/packages/35/48/b992b488d6f299dbe3f11a20b24d3dda3d46f1a635ede1c46b5b17a7b163/scipy-1.17.1-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:35c3a56d2ef83efc372eaec584314bd0ef2e2f0d2adb21c55e6ad5b344c0dcb8", size = 31610954, upload-time = "2026-02-23T00:17:49.855Z" }, + { url = "https://files.pythonhosted.org/packages/b2/02/cf107b01494c19dc100f1d0b7ac3cc08666e96ba2d64db7626066cee895e/scipy-1.17.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:fcb310ddb270a06114bb64bbe53c94926b943f5b7f0842194d585c65eb4edd76", size = 28172662, upload-time = "2026-02-23T00:18:01.64Z" }, + { url = "https://files.pythonhosted.org/packages/cf/a9/599c28631bad314d219cf9ffd40e985b24d603fc8a2f4ccc5ae8419a535b/scipy-1.17.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:cc90d2e9c7e5c7f1a482c9875007c095c3194b1cfedca3c2f3291cdc2bc7c086", size = 20344366, upload-time = "2026-02-23T00:18:12.015Z" }, + { url = "https://files.pythonhosted.org/packages/35/f5/906eda513271c8deb5af284e5ef0206d17a96239af79f9fa0aebfe0e36b4/scipy-1.17.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:c80be5ede8f3f8eded4eff73cc99a25c388ce98e555b17d31da05287015ffa5b", size = 22704017, upload-time = "2026-02-23T00:18:21.502Z" }, + { url = "https://files.pythonhosted.org/packages/da/34/16f10e3042d2f1d6b66e0428308ab52224b6a23049cb2f5c1756f713815f/scipy-1.17.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e19ebea31758fac5893a2ac360fedd00116cbb7628e650842a6691ba7ca28a21", size = 32927842, upload-time = "2026-02-23T00:18:35.367Z" }, + { url = "https://files.pythonhosted.org/packages/01/8e/1e35281b8ab6d5d72ebe9911edcdffa3f36b04ed9d51dec6dd140396e220/scipy-1.17.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:02ae3b274fde71c5e92ac4d54bc06c42d80e399fec704383dcd99b301df37458", size = 35235890, upload-time = "2026-02-23T00:18:49.188Z" }, + { url = "https://files.pythonhosted.org/packages/c5/5c/9d7f4c88bea6e0d5a4f1bc0506a53a00e9fcb198de372bfe4d3652cef482/scipy-1.17.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8a604bae87c6195d8b1045eddece0514d041604b14f2727bbc2b3020172045eb", size = 35003557, upload-time = "2026-02-23T00:18:54.74Z" }, + { url = "https://files.pythonhosted.org/packages/65/94/7698add8f276dbab7a9de9fb6b0e02fc13ee61d51c7c3f85ac28b65e1239/scipy-1.17.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f590cd684941912d10becc07325a3eeb77886fe981415660d9265c4c418d0bea", size = 37625856, upload-time = "2026-02-23T00:19:00.307Z" }, + { url = "https://files.pythonhosted.org/packages/a2/84/dc08d77fbf3d87d3ee27f6a0c6dcce1de5829a64f2eae85a0ecc1f0daa73/scipy-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:41b71f4a3a4cab9d366cd9065b288efc4d4f3c0b37a91a8e0947fb5bd7f31d87", size = 36549682, upload-time = "2026-02-23T00:19:07.67Z" }, + { url = "https://files.pythonhosted.org/packages/bc/98/fe9ae9ffb3b54b62559f52dedaebe204b408db8109a8c66fdd04869e6424/scipy-1.17.1-cp312-cp312-win_arm64.whl", hash = "sha256:f4115102802df98b2b0db3cce5cb9b92572633a1197c77b7553e5203f284a5b3", size = 24547340, upload-time = "2026-02-23T00:19:12.024Z" }, + { url = "https://files.pythonhosted.org/packages/76/27/07ee1b57b65e92645f219b37148a7e7928b82e2b5dbeccecb4dff7c64f0b/scipy-1.17.1-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:5e3c5c011904115f88a39308379c17f91546f77c1667cea98739fe0fccea804c", size = 31590199, upload-time = "2026-02-23T00:19:17.192Z" }, + { url = "https://files.pythonhosted.org/packages/ec/ae/db19f8ab842e9b724bf5dbb7db29302a91f1e55bc4d04b1025d6d605a2c5/scipy-1.17.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:6fac755ca3d2c3edcb22f479fceaa241704111414831ddd3bc6056e18516892f", size = 28154001, upload-time = "2026-02-23T00:19:22.241Z" }, + { url = "https://files.pythonhosted.org/packages/5b/58/3ce96251560107b381cbd6e8413c483bbb1228a6b919fa8652b0d4090e7f/scipy-1.17.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:7ff200bf9d24f2e4d5dc6ee8c3ac64d739d3a89e2326ba68aaf6c4a2b838fd7d", size = 20325719, upload-time = "2026-02-23T00:19:26.329Z" }, + { url = "https://files.pythonhosted.org/packages/b2/83/15087d945e0e4d48ce2377498abf5ad171ae013232ae31d06f336e64c999/scipy-1.17.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:4b400bdc6f79fa02a4d86640310dde87a21fba0c979efff5248908c6f15fad1b", size = 22683595, upload-time = "2026-02-23T00:19:30.304Z" }, + { url = "https://files.pythonhosted.org/packages/b4/e0/e58fbde4a1a594c8be8114eb4aac1a55bcd6587047efc18a61eb1f5c0d30/scipy-1.17.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2b64ca7d4aee0102a97f3ba22124052b4bd2152522355073580bf4845e2550b6", size = 32896429, upload-time = "2026-02-23T00:19:35.536Z" }, + { url = "https://files.pythonhosted.org/packages/f5/5f/f17563f28ff03c7b6799c50d01d5d856a1d55f2676f537ca8d28c7f627cd/scipy-1.17.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:581b2264fc0aa555f3f435a5944da7504ea3a065d7029ad60e7c3d1ae09c5464", size = 35203952, upload-time = "2026-02-23T00:19:42.259Z" }, + { url = "https://files.pythonhosted.org/packages/8d/a5/9afd17de24f657fdfe4df9a3f1ea049b39aef7c06000c13db1530d81ccca/scipy-1.17.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:beeda3d4ae615106d7094f7e7cef6218392e4465cc95d25f900bebabfded0950", size = 34979063, upload-time = "2026-02-23T00:19:47.547Z" }, + { url = "https://files.pythonhosted.org/packages/8b/13/88b1d2384b424bf7c924f2038c1c409f8d88bb2a8d49d097861dd64a57b2/scipy-1.17.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6609bc224e9568f65064cfa72edc0f24ee6655b47575954ec6339534b2798369", size = 37598449, upload-time = "2026-02-23T00:19:53.238Z" }, + { url = "https://files.pythonhosted.org/packages/35/e5/d6d0e51fc888f692a35134336866341c08655d92614f492c6860dc45bb2c/scipy-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:37425bc9175607b0268f493d79a292c39f9d001a357bebb6b88fdfaff13f6448", size = 36510943, upload-time = "2026-02-23T00:20:50.89Z" }, + { url = "https://files.pythonhosted.org/packages/2a/fd/3be73c564e2a01e690e19cc618811540ba5354c67c8680dce3281123fb79/scipy-1.17.1-cp313-cp313-win_arm64.whl", hash = "sha256:5cf36e801231b6a2059bf354720274b7558746f3b1a4efb43fcf557ccd484a87", size = 24545621, upload-time = "2026-02-23T00:20:55.871Z" }, + { url = "https://files.pythonhosted.org/packages/6f/6b/17787db8b8114933a66f9dcc479a8272e4b4da75fe03b0c282f7b0ade8cd/scipy-1.17.1-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:d59c30000a16d8edc7e64152e30220bfbd724c9bbb08368c054e24c651314f0a", size = 31936708, upload-time = "2026-02-23T00:19:58.694Z" }, + { url = "https://files.pythonhosted.org/packages/38/2e/524405c2b6392765ab1e2b722a41d5da33dc5c7b7278184a8ad29b6cb206/scipy-1.17.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:010f4333c96c9bb1a4516269e33cb5917b08ef2166d5556ca2fd9f082a9e6ea0", size = 28570135, upload-time = "2026-02-23T00:20:03.934Z" }, + { url = "https://files.pythonhosted.org/packages/fd/c3/5bd7199f4ea8556c0c8e39f04ccb014ac37d1468e6cfa6a95c6b3562b76e/scipy-1.17.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:2ceb2d3e01c5f1d83c4189737a42d9cb2fc38a6eeed225e7515eef71ad301dce", size = 20741977, upload-time = "2026-02-23T00:20:07.935Z" }, + { url = "https://files.pythonhosted.org/packages/d9/b8/8ccd9b766ad14c78386599708eb745f6b44f08400a5fd0ade7cf89b6fc93/scipy-1.17.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:844e165636711ef41f80b4103ed234181646b98a53c8f05da12ca5ca289134f6", size = 23029601, upload-time = "2026-02-23T00:20:12.161Z" }, + { url = "https://files.pythonhosted.org/packages/6d/a0/3cb6f4d2fb3e17428ad2880333cac878909ad1a89f678527b5328b93c1d4/scipy-1.17.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:158dd96d2207e21c966063e1635b1063cd7787b627b6f07305315dd73d9c679e", size = 33019667, upload-time = "2026-02-23T00:20:17.208Z" }, + { url = "https://files.pythonhosted.org/packages/f3/c3/2d834a5ac7bf3a0c806ad1508efc02dda3c8c61472a56132d7894c312dea/scipy-1.17.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74cbb80d93260fe2ffa334efa24cb8f2f0f622a9b9febf8b483c0b865bfb3475", size = 35264159, upload-time = "2026-02-23T00:20:23.087Z" }, + { url = "https://files.pythonhosted.org/packages/4d/77/d3ed4becfdbd217c52062fafe35a72388d1bd82c2d0ba5ca19d6fcc93e11/scipy-1.17.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:dbc12c9f3d185f5c737d801da555fb74b3dcfa1a50b66a1a93e09190f41fab50", size = 35102771, upload-time = "2026-02-23T00:20:28.636Z" }, + { url = "https://files.pythonhosted.org/packages/bd/12/d19da97efde68ca1ee5538bb261d5d2c062f0c055575128f11a2730e3ac1/scipy-1.17.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:94055a11dfebe37c656e70317e1996dc197e1a15bbcc351bcdd4610e128fe1ca", size = 37665910, upload-time = "2026-02-23T00:20:34.743Z" }, + { url = "https://files.pythonhosted.org/packages/06/1c/1172a88d507a4baaf72c5a09bb6c018fe2ae0ab622e5830b703a46cc9e44/scipy-1.17.1-cp313-cp313t-win_amd64.whl", hash = "sha256:e30bdeaa5deed6bc27b4cc490823cd0347d7dae09119b8803ae576ea0ce52e4c", size = 36562980, upload-time = "2026-02-23T00:20:40.575Z" }, + { url = "https://files.pythonhosted.org/packages/70/b0/eb757336e5a76dfa7911f63252e3b7d1de00935d7705cf772db5b45ec238/scipy-1.17.1-cp313-cp313t-win_arm64.whl", hash = "sha256:a720477885a9d2411f94a93d16f9d89bad0f28ca23c3f8daa521e2dcc3f44d49", size = 24856543, upload-time = "2026-02-23T00:20:45.313Z" }, + { url = "https://files.pythonhosted.org/packages/cf/83/333afb452af6f0fd70414dc04f898647ee1423979ce02efa75c3b0f2c28e/scipy-1.17.1-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:a48a72c77a310327f6a3a920092fa2b8fd03d7deaa60f093038f22d98e096717", size = 31584510, upload-time = "2026-02-23T00:21:01.015Z" }, + { url = "https://files.pythonhosted.org/packages/ed/a6/d05a85fd51daeb2e4ea71d102f15b34fedca8e931af02594193ae4fd25f7/scipy-1.17.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:45abad819184f07240d8a696117a7aacd39787af9e0b719d00285549ed19a1e9", size = 28170131, upload-time = "2026-02-23T00:21:05.888Z" }, + { url = "https://files.pythonhosted.org/packages/db/7b/8624a203326675d7746a254083a187398090a179335b2e4a20e2ddc46e83/scipy-1.17.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:3fd1fcdab3ea951b610dc4cef356d416d5802991e7e32b5254828d342f7b7e0b", size = 20342032, upload-time = "2026-02-23T00:21:09.904Z" }, + { url = "https://files.pythonhosted.org/packages/c9/35/2c342897c00775d688d8ff3987aced3426858fd89d5a0e26e020b660b301/scipy-1.17.1-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:7bdf2da170b67fdf10bca777614b1c7d96ae3ca5794fd9587dce41eb2966e866", size = 22678766, upload-time = "2026-02-23T00:21:14.313Z" }, + { url = "https://files.pythonhosted.org/packages/ef/f2/7cdb8eb308a1a6ae1e19f945913c82c23c0c442a462a46480ce487fdc0ac/scipy-1.17.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:adb2642e060a6549c343603a3851ba76ef0b74cc8c079a9a58121c7ec9fe2350", size = 32957007, upload-time = "2026-02-23T00:21:19.663Z" }, + { url = "https://files.pythonhosted.org/packages/0b/2e/7eea398450457ecb54e18e9d10110993fa65561c4f3add5e8eccd2b9cd41/scipy-1.17.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eee2cfda04c00a857206a4330f0c5e3e56535494e30ca445eb19ec624ae75118", size = 35221333, upload-time = "2026-02-23T00:21:25.278Z" }, + { url = "https://files.pythonhosted.org/packages/d9/77/5b8509d03b77f093a0d52e606d3c4f79e8b06d1d38c441dacb1e26cacf46/scipy-1.17.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d2650c1fb97e184d12d8ba010493ee7b322864f7d3d00d3f9bb97d9c21de4068", size = 35042066, upload-time = "2026-02-23T00:21:31.358Z" }, + { url = "https://files.pythonhosted.org/packages/f9/df/18f80fb99df40b4070328d5ae5c596f2f00fffb50167e31439e932f29e7d/scipy-1.17.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:08b900519463543aa604a06bec02461558a6e1cef8fdbb8098f77a48a83c8118", size = 37612763, upload-time = "2026-02-23T00:21:37.247Z" }, + { url = "https://files.pythonhosted.org/packages/4b/39/f0e8ea762a764a9dc52aa7dabcfad51a354819de1f0d4652b6a1122424d6/scipy-1.17.1-cp314-cp314-win_amd64.whl", hash = "sha256:3877ac408e14da24a6196de0ddcace62092bfc12a83823e92e49e40747e52c19", size = 37290984, upload-time = "2026-02-23T00:22:35.023Z" }, + { url = "https://files.pythonhosted.org/packages/7c/56/fe201e3b0f93d1a8bcf75d3379affd228a63d7e2d80ab45467a74b494947/scipy-1.17.1-cp314-cp314-win_arm64.whl", hash = "sha256:f8885db0bc2bffa59d5c1b72fad7a6a92d3e80e7257f967dd81abb553a90d293", size = 25192877, upload-time = "2026-02-23T00:22:39.798Z" }, + { url = "https://files.pythonhosted.org/packages/96/ad/f8c414e121f82e02d76f310f16db9899c4fcde36710329502a6b2a3c0392/scipy-1.17.1-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:1cc682cea2ae55524432f3cdff9e9a3be743d52a7443d0cba9017c23c87ae2f6", size = 31949750, upload-time = "2026-02-23T00:21:42.289Z" }, + { url = "https://files.pythonhosted.org/packages/7c/b0/c741e8865d61b67c81e255f4f0a832846c064e426636cd7de84e74d209be/scipy-1.17.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:2040ad4d1795a0ae89bfc7e8429677f365d45aa9fd5e4587cf1ea737f927b4a1", size = 28585858, upload-time = "2026-02-23T00:21:47.706Z" }, + { url = "https://files.pythonhosted.org/packages/ed/1b/3985219c6177866628fa7c2595bfd23f193ceebbe472c98a08824b9466ff/scipy-1.17.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:131f5aaea57602008f9822e2115029b55d4b5f7c070287699fe45c661d051e39", size = 20757723, upload-time = "2026-02-23T00:21:52.039Z" }, + { url = "https://files.pythonhosted.org/packages/c0/19/2a04aa25050d656d6f7b9e7b685cc83d6957fb101665bfd9369ca6534563/scipy-1.17.1-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:9cdc1a2fcfd5c52cfb3045feb399f7b3ce822abdde3a193a6b9a60b3cb5854ca", size = 23043098, upload-time = "2026-02-23T00:21:56.185Z" }, + { url = "https://files.pythonhosted.org/packages/86/f1/3383beb9b5d0dbddd030335bf8a8b32d4317185efe495374f134d8be6cce/scipy-1.17.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e3dcd57ab780c741fde8dc68619de988b966db759a3c3152e8e9142c26295ad", size = 33030397, upload-time = "2026-02-23T00:22:01.404Z" }, + { url = "https://files.pythonhosted.org/packages/41/68/8f21e8a65a5a03f25a79165ec9d2b28c00e66dc80546cf5eb803aeeff35b/scipy-1.17.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a9956e4d4f4a301ebf6cde39850333a6b6110799d470dbbb1e25326ac447f52a", size = 35281163, upload-time = "2026-02-23T00:22:07.024Z" }, + { url = "https://files.pythonhosted.org/packages/84/8d/c8a5e19479554007a5632ed7529e665c315ae7492b4f946b0deb39870e39/scipy-1.17.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:a4328d245944d09fd639771de275701ccadf5f781ba0ff092ad141e017eccda4", size = 35116291, upload-time = "2026-02-23T00:22:12.585Z" }, + { url = "https://files.pythonhosted.org/packages/52/52/e57eceff0e342a1f50e274264ed47497b59e6a4e3118808ee58ddda7b74a/scipy-1.17.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a77cbd07b940d326d39a1d1b37817e2ee4d79cb30e7338f3d0cddffae70fcaa2", size = 37682317, upload-time = "2026-02-23T00:22:18.513Z" }, + { url = "https://files.pythonhosted.org/packages/11/2f/b29eafe4a3fbc3d6de9662b36e028d5f039e72d345e05c250e121a230dd4/scipy-1.17.1-cp314-cp314t-win_amd64.whl", hash = "sha256:eb092099205ef62cd1782b006658db09e2fed75bffcae7cc0d44052d8aa0f484", size = 37345327, upload-time = "2026-02-23T00:22:24.442Z" }, + { url = "https://files.pythonhosted.org/packages/07/39/338d9219c4e87f3e708f18857ecd24d22a0c3094752393319553096b98af/scipy-1.17.1-cp314-cp314t-win_arm64.whl", hash = "sha256:200e1050faffacc162be6a486a984a0497866ec54149a01270adc8a59b7c7d21", size = 25489165, upload-time = "2026-02-23T00:22:29.563Z" }, ] [[package]] @@ -6459,18 +6575,19 @@ wheels = [ [[package]] name = "simli-ai" -version = "2.0.1" +version = "2.0.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiortc" }, { name = "av" }, { name = "httpx" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/76/b5/6021990871daf9f5b6eb744aff68c83f2c7b257cfd2ee5b9883d0acd9cf4/simli_ai-2.0.1.tar.gz", hash = "sha256:1f63eb76900d4dac0c18406a854219e54ebab51acb0c01e245c7a0738dc72413", size = 16104, upload-time = "2026-02-17T12:46:09.743Z" } +sdist = { url = "https://files.pythonhosted.org/packages/aa/8c/fe0697cd371a0f203b915f59e376e1807e4ad79bd53e20ceea57a161f242/simli_ai-2.0.2.tar.gz", hash = "sha256:53b99901fe4c5eeb7637492f70dde34c131ee9e5589bf8781a75494c0469ca03", size = 16422, upload-time = "2026-02-25T11:13:16.854Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bc/c1/e7aed0f59d04628c0ac738e2bf8cb6bf020870d1909f3ec9fcf265136663/simli_ai-2.0.1-py3-none-any.whl", hash = "sha256:0a48e38fe289568e56236266843484a1f0e28aca694dd8e2b96610fe40d6c687", size = 19456, upload-time = "2026-02-17T12:46:08.727Z" }, + { url = "https://files.pythonhosted.org/packages/5d/f0/fb6737a87069ed2830d421c7e45cc5c117c8bc7d2183bb37466c0bf6f6ab/simli_ai-2.0.2-py3-none-any.whl", hash = "sha256:023cb8ef37c74f7463810af4595c2e0c2850647e33f9ff9b2ef09d088c0d2403", size = 19914, upload-time = "2026-02-25T11:13:15.257Z" }, ] [[package]] @@ -6484,14 +6601,14 @@ wheels = [ [[package]] name = "smart-open" -version = "7.5.0" +version = "7.5.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "wrapt" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/67/9a/0a7acb748b86e2922982366d780ca4b16c33f7246fa5860d26005c97e4f3/smart_open-7.5.0.tar.gz", hash = "sha256:f394b143851d8091011832ac8113ea4aba6b92e6c35f6e677ddaaccb169d7cb9", size = 53920, upload-time = "2025-11-08T21:38:40.698Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e8/be/a66598b305763861a9ab15ff0f2fbc44e47b1ce7a776797337a4eef37c66/smart_open-7.5.1.tar.gz", hash = "sha256:3f08e16827c4733699e6b2cc40328a3568f900cb12ad9a3ad233ba6c872d9fe7", size = 54034, upload-time = "2026-02-23T11:01:28.979Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ad/95/bc978be7ea0babf2fb48a414b6afaad414c6a9e8b1eafc5b8a53c030381a/smart_open-7.5.0-py3-none-any.whl", hash = "sha256:87e695c5148bbb988f15cec00971602765874163be85acb1c9fb8abc012e6599", size = 63940, upload-time = "2025-11-08T21:38:39.024Z" }, + { url = "https://files.pythonhosted.org/packages/5e/ea/dcdecd68acebb49d3fd560473a43499b1635076f7f1ae8641c060fe7ce74/smart_open-7.5.1-py3-none-any.whl", hash = "sha256:3e07cbbd9c8a908bcb8e25d48becf1a5cbb4886fa975e9f34c672ed171df2318", size = 64108, upload-time = "2026-02-23T11:01:27.429Z" }, ] [[package]] @@ -6591,7 +6708,8 @@ version = "0.13.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cffi" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/e1/41/9b873a8c055582859b239be17902a85339bec6a30ad162f98c9b0288a2cc/soundfile-0.13.1.tar.gz", hash = "sha256:b2c68dab1e30297317080a5b43df57e302584c49e2942defdde0acccc53f0e5b", size = 46156, upload-time = "2025-01-25T09:17:04.831Z" } wheels = [ @@ -6609,7 +6727,8 @@ name = "soxr" version = "0.5.0.post1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/02/c0/4429bf9b3be10e749149e286aa5c53775399ec62891c6b970456c6dca325/soxr-0.5.0.post1.tar.gz", hash = "sha256:7092b9f3e8a416044e1fa138c8172520757179763b85dc53aa9504f4813cff73", size = 170853, upload-time = "2024-08-31T03:43:33.058Z" } wheels = [ @@ -6647,7 +6766,8 @@ name = "speechmatics-voice" version = "0.2.8" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "pydantic" }, { name = "speechmatics-rt" }, ] @@ -6790,7 +6910,7 @@ wheels = [ [[package]] name = "sphinx-autodoc-typehints" -version = "3.6.3" +version = "3.8.0" source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version >= '3.14'", @@ -6800,9 +6920,9 @@ resolution-markers = [ dependencies = [ { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/64/5f/ebcaed1a67e623e4a7622808a8be6b0fd8344313e185f62e85a26b0ce26a/sphinx_autodoc_typehints-3.6.3.tar.gz", hash = "sha256:6c387b47d9ad5e75b157810af5bad46901f0a22708ed5e4adf466885a9c60910", size = 38288, upload-time = "2026-02-18T04:22:08.384Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ba/89/72f96fe27aa1cfdc882aa6e1309a86b94e4653c1e8acf9b143d34e89c619/sphinx_autodoc_typehints-3.8.0.tar.gz", hash = "sha256:155a30407e88ed3287eeeb1e9156b0ed0ad08c998b0391c652b540563132fd70", size = 59672, upload-time = "2026-02-25T15:00:35.909Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0a/bd/2b853836d152e40a27655828fdc02c5128f294ac452ad9a13424bb7f92fa/sphinx_autodoc_typehints-3.6.3-py3-none-any.whl", hash = "sha256:46ebc68fa85b320d55887a8d836a01e12e3b7744da973e70af8cedc74072aad5", size = 20882, upload-time = "2026-02-18T04:22:07.238Z" }, + { url = "https://files.pythonhosted.org/packages/8c/0e/36820830c766647d688dfc2b3fda76d76c1cf007eea58fffc1990195aca4/sphinx_autodoc_typehints-3.8.0-py3-none-any.whl", hash = "sha256:f348971f3d88eaee053668b61512e921086b8f0600f1e0887a39bc9476aca51c", size = 32616, upload-time = "2026-02-25T15:00:34.749Z" }, ] [[package]] @@ -6909,71 +7029,75 @@ wheels = [ [[package]] name = "sqlalchemy" -version = "2.0.46" +version = "2.0.47" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "greenlet", marker = "platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64'" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/06/aa/9ce0f3e7a9829ead5c8ce549392f33a12c4555a6c0609bb27d882e9c7ddf/sqlalchemy-2.0.46.tar.gz", hash = "sha256:cf36851ee7219c170bb0793dbc3da3e80c582e04a5437bc601bfe8c85c9216d7", size = 9865393, upload-time = "2026-01-21T18:03:45.119Z" } +sdist = { url = "https://files.pythonhosted.org/packages/cd/4b/1e00561093fe2cd8eef09d406da003c8a118ff02d6548498c1ae677d68d9/sqlalchemy-2.0.47.tar.gz", hash = "sha256:e3e7feb57b267fe897e492b9721ae46d5c7de6f9e8dee58aacf105dc4e154f3d", size = 9886323, upload-time = "2026-02-24T16:34:27.947Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/40/26/66ba59328dc25e523bfcb0f8db48bdebe2035e0159d600e1f01c0fc93967/sqlalchemy-2.0.46-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:895296687ad06dc9b11a024cf68e8d9d3943aa0b4964278d2553b86f1b267735", size = 2155051, upload-time = "2026-01-21T18:27:28.965Z" }, - { url = "https://files.pythonhosted.org/packages/21/cd/9336732941df972fbbfa394db9caa8bb0cf9fe03656ec728d12e9cbd6edc/sqlalchemy-2.0.46-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab65cb2885a9f80f979b85aa4e9c9165a31381ca322cbde7c638fe6eefd1ec39", size = 3234666, upload-time = "2026-01-21T18:32:28.72Z" }, - { url = "https://files.pythonhosted.org/packages/38/62/865ae8b739930ec433cd4123760bee7f8dafdc10abefd725a025604fb0de/sqlalchemy-2.0.46-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:52fe29b3817bd191cc20bad564237c808967972c97fa683c04b28ec8979ae36f", size = 3232917, upload-time = "2026-01-21T18:44:54.064Z" }, - { url = "https://files.pythonhosted.org/packages/24/38/805904b911857f2b5e00fdea44e9570df62110f834378706939825579296/sqlalchemy-2.0.46-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:09168817d6c19954d3b7655da6ba87fcb3a62bb575fb396a81a8b6a9fadfe8b5", size = 3185790, upload-time = "2026-01-21T18:32:30.581Z" }, - { url = "https://files.pythonhosted.org/packages/69/4f/3260bb53aabd2d274856337456ea52f6a7eccf6cce208e558f870cec766b/sqlalchemy-2.0.46-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:be6c0466b4c25b44c5d82b0426b5501de3c424d7a3220e86cd32f319ba56798e", size = 3207206, upload-time = "2026-01-21T18:44:55.93Z" }, - { url = "https://files.pythonhosted.org/packages/ce/b3/67c432d7f9d88bb1a61909b67e29f6354d59186c168fb5d381cf438d3b73/sqlalchemy-2.0.46-cp310-cp310-win32.whl", hash = "sha256:1bc3f601f0a818d27bfe139f6766487d9c88502062a2cd3a7ee6c342e81d5047", size = 2115296, upload-time = "2026-01-21T18:33:12.498Z" }, - { url = "https://files.pythonhosted.org/packages/4a/8c/25fb284f570f9d48e6c240f0269a50cec9cf009a7e08be4c0aaaf0654972/sqlalchemy-2.0.46-cp310-cp310-win_amd64.whl", hash = "sha256:e0c05aff5c6b1bb5fb46a87e0f9d2f733f83ef6cbbbcd5c642b6c01678268061", size = 2138540, upload-time = "2026-01-21T18:33:14.22Z" }, - { url = "https://files.pythonhosted.org/packages/69/ac/b42ad16800d0885105b59380ad69aad0cce5a65276e269ce2729a2343b6a/sqlalchemy-2.0.46-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:261c4b1f101b4a411154f1da2b76497d73abbfc42740029205d4d01fa1052684", size = 2154851, upload-time = "2026-01-21T18:27:30.54Z" }, - { url = "https://files.pythonhosted.org/packages/a0/60/d8710068cb79f64d002ebed62a7263c00c8fd95f4ebd4b5be8f7ca93f2bc/sqlalchemy-2.0.46-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:181903fe8c1b9082995325f1b2e84ac078b1189e2819380c2303a5f90e114a62", size = 3311241, upload-time = "2026-01-21T18:32:33.45Z" }, - { url = "https://files.pythonhosted.org/packages/2b/0f/20c71487c7219ab3aa7421c7c62d93824c97c1460f2e8bb72404b0192d13/sqlalchemy-2.0.46-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:590be24e20e2424a4c3c1b0835e9405fa3d0af5823a1a9fc02e5dff56471515f", size = 3310741, upload-time = "2026-01-21T18:44:57.887Z" }, - { url = "https://files.pythonhosted.org/packages/65/80/d26d00b3b249ae000eee4db206fcfc564bf6ca5030e4747adf451f4b5108/sqlalchemy-2.0.46-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7568fe771f974abadce52669ef3a03150ff03186d8eb82613bc8adc435a03f01", size = 3263116, upload-time = "2026-01-21T18:32:35.044Z" }, - { url = "https://files.pythonhosted.org/packages/da/ee/74dda7506640923821340541e8e45bd3edd8df78664f1f2e0aae8077192b/sqlalchemy-2.0.46-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ebf7e1e78af38047e08836d33502c7a278915698b7c2145d045f780201679999", size = 3285327, upload-time = "2026-01-21T18:44:59.254Z" }, - { url = "https://files.pythonhosted.org/packages/9f/25/6dcf8abafff1389a21c7185364de145107b7394ecdcb05233815b236330d/sqlalchemy-2.0.46-cp311-cp311-win32.whl", hash = "sha256:9d80ea2ac519c364a7286e8d765d6cd08648f5b21ca855a8017d9871f075542d", size = 2114564, upload-time = "2026-01-21T18:33:15.85Z" }, - { url = "https://files.pythonhosted.org/packages/93/5f/e081490f8523adc0088f777e4ebad3cac21e498ec8a3d4067074e21447a1/sqlalchemy-2.0.46-cp311-cp311-win_amd64.whl", hash = "sha256:585af6afe518732d9ccd3aea33af2edaae4a7aa881af5d8f6f4fe3a368699597", size = 2139233, upload-time = "2026-01-21T18:33:17.528Z" }, - { url = "https://files.pythonhosted.org/packages/b6/35/d16bfa235c8b7caba3730bba43e20b1e376d2224f407c178fbf59559f23e/sqlalchemy-2.0.46-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3a9a72b0da8387f15d5810f1facca8f879de9b85af8c645138cba61ea147968c", size = 2153405, upload-time = "2026-01-21T19:05:54.143Z" }, - { url = "https://files.pythonhosted.org/packages/06/6c/3192e24486749862f495ddc6584ed730c0c994a67550ec395d872a2ad650/sqlalchemy-2.0.46-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2347c3f0efc4de367ba00218e0ae5c4ba2306e47216ef80d6e31761ac97cb0b9", size = 3334702, upload-time = "2026-01-21T18:46:45.384Z" }, - { url = "https://files.pythonhosted.org/packages/ea/a2/b9f33c8d68a3747d972a0bb758c6b63691f8fb8a49014bc3379ba15d4274/sqlalchemy-2.0.46-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9094c8b3197db12aa6f05c51c05daaad0a92b8c9af5388569847b03b1007fb1b", size = 3347664, upload-time = "2026-01-21T18:40:09.979Z" }, - { url = "https://files.pythonhosted.org/packages/aa/d2/3e59e2a91eaec9db7e8dc6b37b91489b5caeb054f670f32c95bcba98940f/sqlalchemy-2.0.46-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37fee2164cf21417478b6a906adc1a91d69ae9aba8f9533e67ce882f4bb1de53", size = 3277372, upload-time = "2026-01-21T18:46:47.168Z" }, - { url = "https://files.pythonhosted.org/packages/dd/dd/67bc2e368b524e2192c3927b423798deda72c003e73a1e94c21e74b20a85/sqlalchemy-2.0.46-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b1e14b2f6965a685c7128bd315e27387205429c2e339eeec55cb75ca4ab0ea2e", size = 3312425, upload-time = "2026-01-21T18:40:11.548Z" }, - { url = "https://files.pythonhosted.org/packages/43/82/0ecd68e172bfe62247e96cb47867c2d68752566811a4e8c9d8f6e7c38a65/sqlalchemy-2.0.46-cp312-cp312-win32.whl", hash = "sha256:412f26bb4ba942d52016edc8d12fb15d91d3cd46b0047ba46e424213ad407bcb", size = 2113155, upload-time = "2026-01-21T18:42:49.748Z" }, - { url = "https://files.pythonhosted.org/packages/bc/2a/2821a45742073fc0331dc132552b30de68ba9563230853437cac54b2b53e/sqlalchemy-2.0.46-cp312-cp312-win_amd64.whl", hash = "sha256:ea3cd46b6713a10216323cda3333514944e510aa691c945334713fca6b5279ff", size = 2140078, upload-time = "2026-01-21T18:42:51.197Z" }, - { url = "https://files.pythonhosted.org/packages/b3/4b/fa7838fe20bb752810feed60e45625a9a8b0102c0c09971e2d1d95362992/sqlalchemy-2.0.46-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:93a12da97cca70cea10d4b4fc602589c4511f96c1f8f6c11817620c021d21d00", size = 2150268, upload-time = "2026-01-21T19:05:56.621Z" }, - { url = "https://files.pythonhosted.org/packages/46/c1/b34dccd712e8ea846edf396e00973dda82d598cb93762e55e43e6835eba9/sqlalchemy-2.0.46-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:af865c18752d416798dae13f83f38927c52f085c52e2f32b8ab0fef46fdd02c2", size = 3276511, upload-time = "2026-01-21T18:46:49.022Z" }, - { url = "https://files.pythonhosted.org/packages/96/48/a04d9c94753e5d5d096c628c82a98c4793b9c08ca0e7155c3eb7d7db9f24/sqlalchemy-2.0.46-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8d679b5f318423eacb61f933a9a0f75535bfca7056daeadbf6bd5bcee6183aee", size = 3292881, upload-time = "2026-01-21T18:40:13.089Z" }, - { url = "https://files.pythonhosted.org/packages/be/f4/06eda6e91476f90a7d8058f74311cb65a2fb68d988171aced81707189131/sqlalchemy-2.0.46-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:64901e08c33462acc9ec3bad27fc7a5c2b6491665f2aa57564e57a4f5d7c52ad", size = 3224559, upload-time = "2026-01-21T18:46:50.974Z" }, - { url = "https://files.pythonhosted.org/packages/ab/a2/d2af04095412ca6345ac22b33b89fe8d6f32a481e613ffcb2377d931d8d0/sqlalchemy-2.0.46-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e8ac45e8f4eaac0f9f8043ea0e224158855c6a4329fd4ee37c45c61e3beb518e", size = 3262728, upload-time = "2026-01-21T18:40:14.883Z" }, - { url = "https://files.pythonhosted.org/packages/31/48/1980c7caa5978a3b8225b4d230e69a2a6538a3562b8b31cea679b6933c83/sqlalchemy-2.0.46-cp313-cp313-win32.whl", hash = "sha256:8d3b44b3d0ab2f1319d71d9863d76eeb46766f8cf9e921ac293511804d39813f", size = 2111295, upload-time = "2026-01-21T18:42:52.366Z" }, - { url = "https://files.pythonhosted.org/packages/2d/54/f8d65bbde3d877617c4720f3c9f60e99bb7266df0d5d78b6e25e7c149f35/sqlalchemy-2.0.46-cp313-cp313-win_amd64.whl", hash = "sha256:77f8071d8fbcbb2dd11b7fd40dedd04e8ebe2eb80497916efedba844298065ef", size = 2137076, upload-time = "2026-01-21T18:42:53.924Z" }, - { url = "https://files.pythonhosted.org/packages/56/ba/9be4f97c7eb2b9d5544f2624adfc2853e796ed51d2bb8aec90bc94b7137e/sqlalchemy-2.0.46-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a1e8cc6cc01da346dc92d9509a63033b9b1bda4fed7a7a7807ed385c7dccdc10", size = 3556533, upload-time = "2026-01-21T18:33:06.636Z" }, - { url = "https://files.pythonhosted.org/packages/20/a6/b1fc6634564dbb4415b7ed6419cdfeaadefd2c39cdab1e3aa07a5f2474c2/sqlalchemy-2.0.46-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:96c7cca1a4babaaf3bfff3e4e606e38578856917e52f0384635a95b226c87764", size = 3523208, upload-time = "2026-01-21T18:45:08.436Z" }, - { url = "https://files.pythonhosted.org/packages/a1/d8/41e0bdfc0f930ff236f86fccd12962d8fa03713f17ed57332d38af6a3782/sqlalchemy-2.0.46-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:b2a9f9aee38039cf4755891a1e50e1effcc42ea6ba053743f452c372c3152b1b", size = 3464292, upload-time = "2026-01-21T18:33:08.208Z" }, - { url = "https://files.pythonhosted.org/packages/f0/8b/9dcbec62d95bea85f5ecad9b8d65b78cc30fb0ffceeb3597961f3712549b/sqlalchemy-2.0.46-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:db23b1bf8cfe1f7fda19018e7207b20cdb5168f83c437ff7e95d19e39289c447", size = 3473497, upload-time = "2026-01-21T18:45:10.552Z" }, - { url = "https://files.pythonhosted.org/packages/e9/f8/5ecdfc73383ec496de038ed1614de9e740a82db9ad67e6e4514ebc0708a3/sqlalchemy-2.0.46-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:56bdd261bfd0895452006d5316cbf35739c53b9bb71a170a331fa0ea560b2ada", size = 2152079, upload-time = "2026-01-21T19:05:58.477Z" }, - { url = "https://files.pythonhosted.org/packages/e5/bf/eba3036be7663ce4d9c050bc3d63794dc29fbe01691f2bf5ccb64e048d20/sqlalchemy-2.0.46-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:33e462154edb9493f6c3ad2125931e273bbd0be8ae53f3ecd1c161ea9a1dd366", size = 3272216, upload-time = "2026-01-21T18:46:52.634Z" }, - { url = "https://files.pythonhosted.org/packages/05/45/1256fb597bb83b58a01ddb600c59fe6fdf0e5afe333f0456ed75c0f8d7bd/sqlalchemy-2.0.46-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9bcdce05f056622a632f1d44bb47dbdb677f58cad393612280406ce37530eb6d", size = 3277208, upload-time = "2026-01-21T18:40:16.38Z" }, - { url = "https://files.pythonhosted.org/packages/d9/a0/2053b39e4e63b5d7ceb3372cface0859a067c1ddbd575ea7e9985716f771/sqlalchemy-2.0.46-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8e84b09a9b0f19accedcbeff5c2caf36e0dd537341a33aad8d680336152dc34e", size = 3221994, upload-time = "2026-01-21T18:46:54.622Z" }, - { url = "https://files.pythonhosted.org/packages/1e/87/97713497d9502553c68f105a1cb62786ba1ee91dea3852ae4067ed956a50/sqlalchemy-2.0.46-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:4f52f7291a92381e9b4de9050b0a65ce5d6a763333406861e33906b8aa4906bf", size = 3243990, upload-time = "2026-01-21T18:40:18.253Z" }, - { url = "https://files.pythonhosted.org/packages/a8/87/5d1b23548f420ff823c236f8bea36b1a997250fd2f892e44a3838ca424f4/sqlalchemy-2.0.46-cp314-cp314-win32.whl", hash = "sha256:70ed2830b169a9960193f4d4322d22be5c0925357d82cbf485b3369893350908", size = 2114215, upload-time = "2026-01-21T18:42:55.232Z" }, - { url = "https://files.pythonhosted.org/packages/3a/20/555f39cbcf0c10cf452988b6a93c2a12495035f68b3dbd1a408531049d31/sqlalchemy-2.0.46-cp314-cp314-win_amd64.whl", hash = "sha256:3c32e993bc57be6d177f7d5d31edb93f30726d798ad86ff9066d75d9bf2e0b6b", size = 2139867, upload-time = "2026-01-21T18:42:56.474Z" }, - { url = "https://files.pythonhosted.org/packages/3e/f0/f96c8057c982d9d8a7a68f45d69c674bc6f78cad401099692fe16521640a/sqlalchemy-2.0.46-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4dafb537740eef640c4d6a7c254611dca2df87eaf6d14d6a5fca9d1f4c3fc0fa", size = 3561202, upload-time = "2026-01-21T18:33:10.337Z" }, - { url = "https://files.pythonhosted.org/packages/d7/53/3b37dda0a5b137f21ef608d8dfc77b08477bab0fe2ac9d3e0a66eaeab6fc/sqlalchemy-2.0.46-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:42a1643dc5427b69aca967dae540a90b0fbf57eaf248f13a90ea5930e0966863", size = 3526296, upload-time = "2026-01-21T18:45:12.657Z" }, - { url = "https://files.pythonhosted.org/packages/33/75/f28622ba6dde79cd545055ea7bd4062dc934e0621f7b3be2891f8563f8de/sqlalchemy-2.0.46-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ff33c6e6ad006bbc0f34f5faf941cfc62c45841c64c0a058ac38c799f15b5ede", size = 3470008, upload-time = "2026-01-21T18:33:11.725Z" }, - { url = "https://files.pythonhosted.org/packages/a9/42/4afecbbc38d5e99b18acef446453c76eec6fbd03db0a457a12a056836e22/sqlalchemy-2.0.46-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:82ec52100ec1e6ec671563bbd02d7c7c8d0b9e71a0723c72f22ecf52d1755330", size = 3476137, upload-time = "2026-01-21T18:45:15.001Z" }, - { url = "https://files.pythonhosted.org/packages/fc/a1/9c4efa03300926601c19c18582531b45aededfb961ab3c3585f1e24f120b/sqlalchemy-2.0.46-py3-none-any.whl", hash = "sha256:f9c11766e7e7c0a2767dda5acb006a118640c9fc0a4104214b96269bfb78399e", size = 1937882, upload-time = "2026-01-21T18:22:10.456Z" }, + { url = "https://files.pythonhosted.org/packages/ec/75/17db77c57129c223c7d98518ad1e1faa24ee350c22a44b55390d8463c28c/sqlalchemy-2.0.47-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:33a917ede39406ddb93c3e642b5bc480be7c5fd0f3d0d6ae1036d466fb963f1a", size = 2157331, upload-time = "2026-02-24T16:43:52.693Z" }, + { url = "https://files.pythonhosted.org/packages/b0/d6/3658f7e5c376de774c009f2bb9c0ddf88a35b89c5bfb15ee7174a17b1a5f/sqlalchemy-2.0.47-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:561d027c829b01e040bdade6b6f5b429249d056ef95d7bdcb9211539ecc82803", size = 3236939, upload-time = "2026-02-24T17:28:57.419Z" }, + { url = "https://files.pythonhosted.org/packages/4e/38/f4b94f85d1c26cb9ee0e57449754de816c326f9586b9a8c5247eb49146de/sqlalchemy-2.0.47-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fa5072a37e68c565363c009b7afa5b199b488c87940ec02719860093a08f34ca", size = 3235190, upload-time = "2026-02-24T17:27:07.884Z" }, + { url = "https://files.pythonhosted.org/packages/94/f2/36714f1de01e135a2bf142b662e416e5338ab63c47878e31051338c66e2d/sqlalchemy-2.0.47-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:1e7ed17dd4312a298b6024bfd1baf51654bc49e3f03c798005babf0c7922d6a7", size = 3188064, upload-time = "2026-02-24T17:28:58.908Z" }, + { url = "https://files.pythonhosted.org/packages/ab/94/fcd978e7625cd1c97d9f1d7363e18e37d24314e572acd7c091e3a4210106/sqlalchemy-2.0.47-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:6992e353fcb0593eb42d95ad84b3e58fe40b5e37fd332b9ccba28f4b2f36d1fc", size = 3209480, upload-time = "2026-02-24T17:27:09.823Z" }, + { url = "https://files.pythonhosted.org/packages/23/29/c633202b9900ab65f0162f59df737b57f30010f44d892b186810c9ed58b7/sqlalchemy-2.0.47-cp310-cp310-win32.whl", hash = "sha256:05a6d58ed99ebd01303c92d29a0c9cbf70f637b3ddd155f5172c5a7239940998", size = 2117652, upload-time = "2026-02-24T17:14:34.635Z" }, + { url = "https://files.pythonhosted.org/packages/00/39/54acf13913932b8508058d47a169e6fcde9adaa4cbfa16cbf30da1f6a482/sqlalchemy-2.0.47-cp310-cp310-win_amd64.whl", hash = "sha256:4a7aa4a584cc97e268c11e700dea0b763874eaebb435e75e7d0ffee5d90f5030", size = 2140883, upload-time = "2026-02-24T17:14:35.875Z" }, + { url = "https://files.pythonhosted.org/packages/94/13/886338d3e8ab5ddcfe84d54302c749b1793e16c4bba63d7004e3f7baa8ec/sqlalchemy-2.0.47-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3a1dbf0913879c443617d6b64403cf2801c941651db8c60e96d204ed9388d6b0", size = 2157124, upload-time = "2026-02-24T16:43:54.706Z" }, + { url = "https://files.pythonhosted.org/packages/b6/bb/a897f6a66c9986aa9f27f5cf8550637d8a5ea368fd7fb42f6dac3105b4dc/sqlalchemy-2.0.47-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:775effbb97ea3b00c4dd3aeaf3ba8acba6e3e2b4b41d17d67a27e696843dbc95", size = 3313513, upload-time = "2026-02-24T17:29:00.527Z" }, + { url = "https://files.pythonhosted.org/packages/59/fb/69bfae022b681507565ab0d34f0c80aa1e9f954a5a7cbfb0ed054966ac8d/sqlalchemy-2.0.47-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:56cc834a3ffac34270cc2a41875e0f40e97aa651f4f3ca1cfbbf421c044cb62b", size = 3313014, upload-time = "2026-02-24T17:27:11.679Z" }, + { url = "https://files.pythonhosted.org/packages/04/f3/0eba329f7c182d53205a228c4fd24651b95489b431ea2bd830887b4c13c4/sqlalchemy-2.0.47-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:49b5e0c7244262f39e767c018e4fdb5e5dbc23cd54c5ddac8eea8f0ba32ef890", size = 3265389, upload-time = "2026-02-24T17:29:02.497Z" }, + { url = "https://files.pythonhosted.org/packages/5c/06/654edc084b3b46ac79e04200d7c46467ae80c759c4ee41c897f9272b036f/sqlalchemy-2.0.47-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:15cd822a3f1f6f77b5b841a30c1a07a07f7dee3385f17e638e1722de9ab683be", size = 3287604, upload-time = "2026-02-24T17:27:13.295Z" }, + { url = "https://files.pythonhosted.org/packages/78/33/c18c8f63b61981219d3aa12321bb7ccee605034d195e868ed94f9727b27c/sqlalchemy-2.0.47-cp311-cp311-win32.whl", hash = "sha256:9847a19548cd283a65e1ce0afd54016598d55ff72682d6fd3e493af6fc044064", size = 2116916, upload-time = "2026-02-24T17:14:37.392Z" }, + { url = "https://files.pythonhosted.org/packages/f5/c6/a59e3f9796fff844e16afbd821db9abfd6e12698db9441a231a96193a100/sqlalchemy-2.0.47-cp311-cp311-win_amd64.whl", hash = "sha256:722abf1c82aeca46a1a0803711244a48a298279eeaec9e02f7bfee9e064182e5", size = 2141587, upload-time = "2026-02-24T17:14:39.746Z" }, + { url = "https://files.pythonhosted.org/packages/80/88/74eb470223ff88ea6572a132c0b8de8c1d8ed7b843d3b44a8a3c77f31d39/sqlalchemy-2.0.47-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4fa91b19d6b9821c04cc8f7aa2476429cc8887b9687c762815aa629f5c0edec1", size = 2155687, upload-time = "2026-02-24T17:05:46.451Z" }, + { url = "https://files.pythonhosted.org/packages/ef/ba/1447d3d558971b036cb93b557595cb5dcdfe728f1c7ac4dec16505ef5756/sqlalchemy-2.0.47-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7c5bbbd14eff577c8c79cbfe39a0771eecd20f430f3678533476f0087138f356", size = 3336978, upload-time = "2026-02-24T17:18:04.597Z" }, + { url = "https://files.pythonhosted.org/packages/8a/07/b47472d2ffd0776826f17ccf0b4d01b224c99fbd1904aeb103dffbb4b1cc/sqlalchemy-2.0.47-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a5a6c555da8d4280a3c4c78c5b7a3f990cee2b2884e5f934f87a226191682ff7", size = 3349939, upload-time = "2026-02-24T17:27:18.937Z" }, + { url = "https://files.pythonhosted.org/packages/bb/c6/95fa32b79b57769da3e16f054cf658d90940317b5ca0ec20eac84aa19c4f/sqlalchemy-2.0.47-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ed48a1701d24dff3bb49a5bce94d6bc84cbe33d98af2aa2d3cdcce3dea1709ec", size = 3279648, upload-time = "2026-02-24T17:18:07.038Z" }, + { url = "https://files.pythonhosted.org/packages/bb/c8/3d07e7c73928dc59a0bed40961ca4e313e797bce650b088e8d5fdd3ad939/sqlalchemy-2.0.47-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4f3178c920ad98158f0b6309382194df04b14808fa6052ae07099fdde29d5602", size = 3314695, upload-time = "2026-02-24T17:27:20.93Z" }, + { url = "https://files.pythonhosted.org/packages/6b/d2/ed32b1611c1e19fdb028eee1adc5a9aa138c2952d09ae11f1670170f80ae/sqlalchemy-2.0.47-cp312-cp312-win32.whl", hash = "sha256:b9c11ac9934dd59ece9619fe42780a08abe2faab7b0543bb00d5eabea4f421b9", size = 2115502, upload-time = "2026-02-24T17:22:52.546Z" }, + { url = "https://files.pythonhosted.org/packages/fd/52/9de590356a4dd8e9ef5a881dbba64b2bbc4cbc71bf02bc68e775fb9b1899/sqlalchemy-2.0.47-cp312-cp312-win_amd64.whl", hash = "sha256:db43b72cf8274a99e089755c9c1e0b947159b71adbc2c83c3de2e38d5d607acb", size = 2142435, upload-time = "2026-02-24T17:22:54.268Z" }, + { url = "https://files.pythonhosted.org/packages/4a/e5/0af64ce7d8f60ec5328c10084e2f449e7912a9b8bdbefdcfb44454a25f49/sqlalchemy-2.0.47-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:456a135b790da5d3c6b53d0ef71ac7b7d280b7f41eb0c438986352bf03ca7143", size = 2152551, upload-time = "2026-02-24T17:05:47.675Z" }, + { url = "https://files.pythonhosted.org/packages/63/79/746b8d15f6940e2ac469ce22d7aa5b1124b1ab820bad9b046eb3000c88a6/sqlalchemy-2.0.47-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09a2f7698e44b3135433387da5d8846cf7cc7c10e5425af7c05fee609df978b6", size = 3278782, upload-time = "2026-02-24T17:18:10.012Z" }, + { url = "https://files.pythonhosted.org/packages/91/b1/bd793ddb34345d1ed43b13ab2d88c95d7d4eb2e28f5b5a99128b9cc2bca2/sqlalchemy-2.0.47-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0bbc72e6a177c78d724f9106aaddc0d26a2ada89c6332b5935414eccf04cbd5", size = 3295155, upload-time = "2026-02-24T17:27:22.827Z" }, + { url = "https://files.pythonhosted.org/packages/97/84/7213def33f94e5ca6f5718d259bc9f29de0363134648425aa218d4356b23/sqlalchemy-2.0.47-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:75460456b043b78b6006e41bdf5b86747ee42eafaf7fffa3b24a6e9a456a2092", size = 3226834, upload-time = "2026-02-24T17:18:11.465Z" }, + { url = "https://files.pythonhosted.org/packages/ef/06/456810204f4dc29b5f025b1b0a03b4bd6b600ebf3c1040aebd90a257fa33/sqlalchemy-2.0.47-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5d9adaa616c3bc7d80f9ded57cd84b51d6617cad6a5456621d858c9f23aaee01", size = 3265001, upload-time = "2026-02-24T17:27:24.813Z" }, + { url = "https://files.pythonhosted.org/packages/fb/20/df3920a4b2217dbd7390a5bd277c1902e0393f42baaf49f49b3c935e7328/sqlalchemy-2.0.47-cp313-cp313-win32.whl", hash = "sha256:76e09f974382a496a5ed985db9343628b1cb1ac911f27342e4cc46a8bac10476", size = 2113647, upload-time = "2026-02-24T17:22:55.747Z" }, + { url = "https://files.pythonhosted.org/packages/46/06/7873ddf69918efbfabd7211829f4bd8019739d0a719253112d305d3ba51d/sqlalchemy-2.0.47-cp313-cp313-win_amd64.whl", hash = "sha256:0664089b0bf6724a0bfb49a0cf4d4da24868a0a5c8e937cd7db356d5dcdf2c66", size = 2139425, upload-time = "2026-02-24T17:22:57.033Z" }, + { url = "https://files.pythonhosted.org/packages/54/fa/61ad9731370c90ac7ea5bf8f5eaa12c48bb4beec41c0fa0360becf4ac10d/sqlalchemy-2.0.47-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ed0c967c701ae13da98eb220f9ddab3044ab63504c1ba24ad6a59b26826ad003", size = 3558809, upload-time = "2026-02-24T17:12:15.232Z" }, + { url = "https://files.pythonhosted.org/packages/33/d5/221fac96f0529391fe374875633804c866f2b21a9c6d3a6ca57d9c12cfd7/sqlalchemy-2.0.47-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d3537943a61fd25b241e976426a0c6814434b93cf9b09d39e8e78f3c9eb9a487", size = 3525480, upload-time = "2026-02-24T17:27:59.602Z" }, + { url = "https://files.pythonhosted.org/packages/ec/55/8247d53998c3673e4a8d1958eba75c6f5cc3b39082029d400bb1f2a911ae/sqlalchemy-2.0.47-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:57f7e336a64a0dba686c66392d46b9bc7af2c57d55ce6dc1697b4ef32b043ceb", size = 3466569, upload-time = "2026-02-24T17:12:16.94Z" }, + { url = "https://files.pythonhosted.org/packages/6b/b5/c1f0eea1bac6790845f71420a7fe2f2a0566203aa57543117d4af3b77d1c/sqlalchemy-2.0.47-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dff735a621858680217cb5142b779bad40ef7322ddbb7c12062190db6879772e", size = 3475770, upload-time = "2026-02-24T17:28:02.034Z" }, + { url = "https://files.pythonhosted.org/packages/c5/ed/2f43f92474ea0c43c204657dc47d9d002cd738b96ca2af8e6d29a9b5e42d/sqlalchemy-2.0.47-cp313-cp313t-win32.whl", hash = "sha256:3893dc096bb3cca9608ea3487372ffcea3ae9b162f40e4d3c51dd49db1d1b2dc", size = 2141300, upload-time = "2026-02-24T17:14:37.024Z" }, + { url = "https://files.pythonhosted.org/packages/cc/a9/8b73f9f1695b6e92f7aaf1711135a1e3bbeb78bca9eded35cb79180d3c6d/sqlalchemy-2.0.47-cp313-cp313t-win_amd64.whl", hash = "sha256:b5103427466f4b3e61f04833ae01f9a914b1280a2a8bcde3a9d7ab11f3755b42", size = 2173053, upload-time = "2026-02-24T17:14:38.688Z" }, + { url = "https://files.pythonhosted.org/packages/c1/30/98243209aae58ed80e090ea988d5182244ca7ab3ff59e6d850c3dfc7651e/sqlalchemy-2.0.47-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:b03010a5a5dfe71676bc83f2473ebe082478e32d77e6f082c8fe15a31c3b42a6", size = 2154355, upload-time = "2026-02-24T17:05:48.959Z" }, + { url = "https://files.pythonhosted.org/packages/ab/62/12ca6ea92055fe486d6558a2a4efe93e194ff597463849c01f88e5adb99d/sqlalchemy-2.0.47-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f8e3371aa9024520883a415a09cc20c33cfd3eeccf9e0f4f4c367f940b9cbd44", size = 3274486, upload-time = "2026-02-24T17:18:13.659Z" }, + { url = "https://files.pythonhosted.org/packages/97/88/7dfbdeaa8d42b1584e65d6cc713e9d33b6fa563e0d546d5cb87e545bb0e5/sqlalchemy-2.0.47-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9449f747e50d518c6e1b40cc379e48bfc796453c47b15e627ea901c201e48a6", size = 3279481, upload-time = "2026-02-24T17:27:26.491Z" }, + { url = "https://files.pythonhosted.org/packages/d0/b7/75e1c1970616a9dd64a8a6fd788248da2ddaf81c95f4875f2a1e8aee4128/sqlalchemy-2.0.47-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:21410f60d5cac1d6bfe360e05bd91b179be4fa0aa6eea6be46054971d277608f", size = 3224269, upload-time = "2026-02-24T17:18:15.078Z" }, + { url = "https://files.pythonhosted.org/packages/31/ac/eec1a13b891df9a8bc203334caf6e6aac60b02f61b018ef3b4124b8c4120/sqlalchemy-2.0.47-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:819841dd5bb4324c284c09e2874cf96fe6338bfb57a64548d9b81a4e39c9871f", size = 3246262, upload-time = "2026-02-24T17:27:27.986Z" }, + { url = "https://files.pythonhosted.org/packages/c9/b0/661b0245b06421058610da39f8ceb34abcc90b49f90f256380968d761dbe/sqlalchemy-2.0.47-cp314-cp314-win32.whl", hash = "sha256:e255ee44821a7ef45649c43064cf94e74f81f61b4df70547304b97a351e9b7db", size = 2116528, upload-time = "2026-02-24T17:22:59.363Z" }, + { url = "https://files.pythonhosted.org/packages/aa/ef/1035a90d899e61810791c052004958be622a2cf3eb3df71c3fe20778c5d0/sqlalchemy-2.0.47-cp314-cp314-win_amd64.whl", hash = "sha256:209467ff73ea1518fe1a5aaed9ba75bb9e33b2666e2553af9ccd13387bf192cb", size = 2142181, upload-time = "2026-02-24T17:23:01.001Z" }, + { url = "https://files.pythonhosted.org/packages/76/bb/17a1dd09cbba91258218ceb582225f14b5364d2683f9f5a274f72f2d764f/sqlalchemy-2.0.47-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e78fd9186946afaa287f8a1fe147ead06e5d566b08c0afcb601226e9c7322a64", size = 3563477, upload-time = "2026-02-24T17:12:18.46Z" }, + { url = "https://files.pythonhosted.org/packages/66/8f/1a03d24c40cc321ef2f2231f05420d140bb06a84f7047eaa7eaa21d230ba/sqlalchemy-2.0.47-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5740e2f31b5987ed9619d6912ae5b750c03637f2078850da3002934c9532f172", size = 3528568, upload-time = "2026-02-24T17:28:03.732Z" }, + { url = "https://files.pythonhosted.org/packages/fd/53/d56a213055d6b038a5384f0db5ece7343334aca230ff3f0fa1561106f22c/sqlalchemy-2.0.47-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:fb9ac00d03de93acb210e8ec7243fefe3e012515bf5fd2f0898c8dff38bc77a4", size = 3472284, upload-time = "2026-02-24T17:12:20.319Z" }, + { url = "https://files.pythonhosted.org/packages/ff/19/c235d81b9cfdd6130bf63143b7bade0dc4afa46c4b634d5d6b2a96bea233/sqlalchemy-2.0.47-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c72a0b9eb2672d70d112cb149fbaf172d466bc691014c496aaac594f1988e706", size = 3478410, upload-time = "2026-02-24T17:28:05.892Z" }, + { url = "https://files.pythonhosted.org/packages/0e/db/cafdeca5ecdaa3bb0811ba5449501da677ce0d83be8d05c5822da72d2e86/sqlalchemy-2.0.47-cp314-cp314t-win32.whl", hash = "sha256:c200db1128d72a71dc3c31c24b42eb9fd85b2b3e5a3c9ba1e751c11ac31250ff", size = 2147164, upload-time = "2026-02-24T17:14:40.783Z" }, + { url = "https://files.pythonhosted.org/packages/fc/5e/ff41a010e9e0f76418b02ad352060a4341bb15f0af66cedc924ab376c7c6/sqlalchemy-2.0.47-cp314-cp314t-win_amd64.whl", hash = "sha256:669837759b84e575407355dcff912835892058aea9b80bd1cb76d6a151cf37f7", size = 2182154, upload-time = "2026-02-24T17:14:43.205Z" }, + { url = "https://files.pythonhosted.org/packages/15/9f/7c378406b592fcf1fc157248607b495a40e3202ba4a6f1372a2ba6447717/sqlalchemy-2.0.47-py3-none-any.whl", hash = "sha256:e2647043599297a1ef10e720cf310846b7f31b6c841fee093d2b09d81215eb93", size = 1940159, upload-time = "2026-02-24T17:15:07.158Z" }, ] [[package]] name = "sse-starlette" -version = "3.2.0" +version = "3.3.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, { name = "starlette" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8b/8d/00d280c03ffd39aaee0e86ec81e2d3b9253036a0f93f51d10503adef0e65/sse_starlette-3.2.0.tar.gz", hash = "sha256:8127594edfb51abe44eac9c49e59b0b01f1039d0c7461c6fd91d4e03b70da422", size = 27253, upload-time = "2026-01-17T13:11:05.62Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5a/9f/c3695c2d2d4ef70072c3a06992850498b01c6bc9be531950813716b426fa/sse_starlette-3.3.2.tar.gz", hash = "sha256:678fca55a1945c734d8472a6cad186a55ab02840b4f6786f5ee8770970579dcd", size = 32326, upload-time = "2026-02-28T11:24:34.36Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/96/7f/832f015020844a8b8f7a9cbc103dd76ba8e3875004c41e08440ea3a2b41a/sse_starlette-3.2.0-py3-none-any.whl", hash = "sha256:5876954bd51920fc2cd51baee47a080eb88a37b5b784e615abb0b283f801cdbf", size = 12763, upload-time = "2026-01-17T13:11:03.775Z" }, + { url = "https://files.pythonhosted.org/packages/61/28/8cb142d3fe80c4a2d8af54ca0b003f47ce0ba920974e7990fa6e016402d1/sse_starlette-3.3.2-py3-none-any.whl", hash = "sha256:5c3ea3dad425c601236726af2f27689b74494643f57017cafcb6f8c9acfbb862", size = 14270, upload-time = "2026-02-28T11:24:32.984Z" }, ] [[package]] @@ -6991,7 +7115,7 @@ wheels = [ [[package]] name = "strands-agents" -version = "1.27.0" +version = "1.28.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "boto3" }, @@ -7006,9 +7130,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "watchdog" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b9/54/bf0910a1c40feacaedcf5d30840be990eabd09eff5375fa40525ba530c8d/strands_agents-1.27.0.tar.gz", hash = "sha256:84d0b670e534d7c281104a22035c10de8d43e9ad8ee589bde16f54a8387b2c56", size = 712878, upload-time = "2026-02-19T17:18:23.327Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9e/27/9c1c114a83844f9e27fe0312bfdad27c753b922f123512669997f8af47e3/strands_agents-1.28.0.tar.gz", hash = "sha256:0372d8f75d694f3230b0035867455ef31c74f6d9c708985e41f646a1a0b29f7e", size = 717116, upload-time = "2026-02-25T19:36:46.959Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6a/ca/d5c269f83929bdc753dce3c6091a1671e50268769b0ace009264424bf165/strands_agents-1.27.0-py3-none-any.whl", hash = "sha256:d9012515a7b4f324a600cacc539e837a51b3f7fe21da7efe1764186ade3be498", size = 351988, upload-time = "2026-02-19T17:18:19Z" }, + { url = "https://files.pythonhosted.org/packages/b3/98/f4f87500251f1cab2bd9a0d271852d6d8796635ea8d287b5c51a12316d58/strands_agents-1.28.0-py3-none-any.whl", hash = "sha256:e4c238811949b4f8d31ea9df03a74a57afa5f1728a23bf1ddbf8703f34addc6b", size = 355636, upload-time = "2026-02-25T19:36:45.075Z" }, ] [[package]] @@ -7113,7 +7237,7 @@ wheels = [ [[package]] name = "timm" -version = "1.0.24" +version = "1.0.25" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "huggingface-hub" }, @@ -7122,9 +7246,9 @@ dependencies = [ { name = "torch" }, { name = "torchvision" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f4/9d/0ea45640be447445c8664ce2b10c74f763b0b0b9ed11620d41a4d4baa10c/timm-1.0.24.tar.gz", hash = "sha256:c7b909f43fe2ef8fe62c505e270cd4f1af230dfbc37f2ee93e3608492b9d9a40", size = 2412239, upload-time = "2026-01-07T00:26:17.541Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/2c/593109822fe735e637382aca6640c1102c19797f7791f1fd1dab2d6c3cb1/timm-1.0.25.tar.gz", hash = "sha256:47f59fc2754725735cc81bb83bcbfce5bec4ebd5d4bb9e69da57daa92fcfa768", size = 2414743, upload-time = "2026-02-23T16:49:00.137Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/92/dd/c1f5b0890f7b5db661bde0864b41cb0275be76851047e5f7e085fe0b455a/timm-1.0.24-py3-none-any.whl", hash = "sha256:8301ac783410c6ad72c73c49326af6d71a9e4d1558238552796e825c2464913f", size = 2560563, upload-time = "2026-01-07T00:26:13.956Z" }, + { url = "https://files.pythonhosted.org/packages/ef/50/de09f69a74278a16f08f1d562047a2d6713783765ee3c6971881a2b21a3f/timm-1.0.25-py3-none-any.whl", hash = "sha256:bef7f61dd717cb2dbbb7e326f143e13d660a47ecbd84116e6fe33732bed5c484", size = 2565837, upload-time = "2026-02-23T16:48:58.324Z" }, ] [[package]] @@ -7329,7 +7453,8 @@ name = "torchvision" version = "0.25.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "pillow" }, { name = "torch" }, ] @@ -7397,7 +7522,8 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "filelock" }, { name = "huggingface-hub" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "packaging" }, { name = "pyyaml" }, { name = "regex" }, @@ -7427,7 +7553,7 @@ wheels = [ [[package]] name = "typer" -version = "0.24.0" +version = "0.24.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "annotated-doc" }, @@ -7435,18 +7561,18 @@ dependencies = [ { name = "rich" }, { name = "shellingham" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5a/b6/3e681d3b6bb22647509bdbfdd18055d5adc0dce5c5585359fa46ff805fdc/typer-0.24.0.tar.gz", hash = "sha256:f9373dc4eff901350694f519f783c29b6d7a110fc0dcc11b1d7e353b85ca6504", size = 118380, upload-time = "2026-02-16T22:08:48.496Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f5/24/cb09efec5cc954f7f9b930bf8279447d24618bb6758d4f6adf2574c41780/typer-0.24.1.tar.gz", hash = "sha256:e39b4732d65fbdcde189ae76cf7cd48aeae72919dea1fdfc16593be016256b45", size = 118613, upload-time = "2026-02-21T16:54:40.609Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/85/d0/4da85c2a45054bb661993c93524138ace4956cb075a7ae0c9d1deadc331b/typer-0.24.0-py3-none-any.whl", hash = "sha256:5fc435a9c8356f6160ed6e85a6301fdd6e3d8b2851da502050d1f92c5e9eddc8", size = 56441, upload-time = "2026-02-16T22:08:47.535Z" }, + { url = "https://files.pythonhosted.org/packages/4a/91/48db081e7a63bb37284f9fbcefda7c44c277b18b0e13fbc36ea2335b71e6/typer-0.24.1-py3-none-any.whl", hash = "sha256:112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e", size = 56085, upload-time = "2026-02-21T16:54:41.616Z" }, ] [[package]] name = "types-protobuf" -version = "6.32.1.20251210" +version = "6.32.1.20260221" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c2/59/c743a842911887cd96d56aa8936522b0cd5f7a7f228c96e81b59fced45be/types_protobuf-6.32.1.20251210.tar.gz", hash = "sha256:c698bb3f020274b1a2798ae09dc773728ce3f75209a35187bd11916ebfde6763", size = 63900, upload-time = "2025-12-10T03:14:25.451Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5f/e2/9aa4a3b2469508bd7b4e2ae11cbedaf419222a09a1b94daffcd5efca4023/types_protobuf-6.32.1.20260221.tar.gz", hash = "sha256:6d5fb060a616bfb076cbb61b4b3c3969f5fc8bec5810f9a2f7e648ee5cbcbf6e", size = 64408, upload-time = "2026-02-21T03:55:13.916Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/aa/43/58e75bac4219cbafee83179505ff44cae3153ec279be0e30583a73b8f108/types_protobuf-6.32.1.20251210-py3-none-any.whl", hash = "sha256:2641f78f3696822a048cfb8d0ff42ccd85c25f12f871fbebe86da63793692140", size = 77921, upload-time = "2025-12-10T03:14:24.477Z" }, + { url = "https://files.pythonhosted.org/packages/2e/e8/1fd38926f9cf031188fbc5a96694203ea6f24b0e34bd64a225ec6f6291ba/types_protobuf-6.32.1.20260221-py3-none-any.whl", hash = "sha256:da7cdd947975964a93c30bfbcc2c6841ee646b318d3816b033adc2c4eb6448e4", size = 77956, upload-time = "2026-02-21T03:55:12.894Z" }, ] [[package]] @@ -7583,31 +7709,31 @@ wheels = [ [[package]] name = "uuid-utils" -version = "0.14.0" +version = "0.14.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/57/7c/3a926e847516e67bc6838634f2e54e24381105b4e80f9338dc35cca0086b/uuid_utils-0.14.0.tar.gz", hash = "sha256:fc5bac21e9933ea6c590433c11aa54aaca599f690c08069e364eb13a12f670b4", size = 22072, upload-time = "2026-01-20T20:37:15.729Z" } +sdist = { url = "https://files.pythonhosted.org/packages/7b/d1/38a573f0c631c062cf42fa1f5d021d4dd3c31fb23e4376e4b56b0c9fbbed/uuid_utils-0.14.1.tar.gz", hash = "sha256:9bfc95f64af80ccf129c604fb6b8ca66c6f256451e32bc4570f760e4309c9b69", size = 22195, upload-time = "2026-02-20T22:50:38.833Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a7/42/42d003f4a99ddc901eef2fd41acb3694163835e037fb6dde79ad68a72342/uuid_utils-0.14.0-cp39-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:f6695c0bed8b18a904321e115afe73b34444bc8451d0ce3244a1ec3b84deb0e5", size = 601786, upload-time = "2026-01-20T20:37:09.843Z" }, - { url = "https://files.pythonhosted.org/packages/96/e6/775dfb91f74b18f7207e3201eb31ee666d286579990dc69dd50db2d92813/uuid_utils-0.14.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:4f0a730bbf2d8bb2c11b93e1005e91769f2f533fa1125ed1f00fd15b6fcc732b", size = 303943, upload-time = "2026-01-20T20:37:18.767Z" }, - { url = "https://files.pythonhosted.org/packages/17/82/ea5f5e85560b08a1f30cdc65f75e76494dc7aba9773f679e7eaa27370229/uuid_utils-0.14.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40ce3fd1a4fdedae618fc3edc8faf91897012469169d600133470f49fd699ed3", size = 340467, upload-time = "2026-01-20T20:37:11.794Z" }, - { url = "https://files.pythonhosted.org/packages/ca/33/54b06415767f4569882e99b6470c6c8eeb97422686a6d432464f9967fd91/uuid_utils-0.14.0-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:09ae4a98416a440e78f7d9543d11b11cae4bab538b7ed94ec5da5221481748f2", size = 346333, upload-time = "2026-01-20T20:37:12.818Z" }, - { url = "https://files.pythonhosted.org/packages/cb/10/a6bce636b8f95e65dc84bf4a58ce8205b8e0a2a300a38cdbc83a3f763d27/uuid_utils-0.14.0-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:971e8c26b90d8ae727e7f2ac3ee23e265971d448b3672882f2eb44828b2b8c3e", size = 470859, upload-time = "2026-01-20T20:37:01.512Z" }, - { url = "https://files.pythonhosted.org/packages/8a/27/84121c51ea72f013f0e03d0886bcdfa96b31c9b83c98300a7bd5cc4fa191/uuid_utils-0.14.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5cde1fa82804a8f9d2907b7aec2009d440062c63f04abbdb825fce717a5e860", size = 341988, upload-time = "2026-01-20T20:37:22.881Z" }, - { url = "https://files.pythonhosted.org/packages/90/a4/01c1c7af5e6a44f20b40183e8dac37d6ed83e7dc9e8df85370a15959b804/uuid_utils-0.14.0-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c7343862a2359e0bd48a7f3dfb5105877a1728677818bb694d9f40703264a2db", size = 365784, upload-time = "2026-01-20T20:37:10.808Z" }, - { url = "https://files.pythonhosted.org/packages/04/f0/65ee43ec617b8b6b1bf2a5aecd56a069a08cca3d9340c1de86024331bde3/uuid_utils-0.14.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:c51e4818fdb08ccec12dc7083a01f49507b4608770a0ab22368001685d59381b", size = 523750, upload-time = "2026-01-20T20:37:06.152Z" }, - { url = "https://files.pythonhosted.org/packages/95/d3/6bf503e3f135a5dfe705a65e6f89f19bccd55ac3fb16cb5d3ec5ba5388b8/uuid_utils-0.14.0-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:181bbcccb6f93d80a8504b5bd47b311a1c31395139596edbc47b154b0685b533", size = 615818, upload-time = "2026-01-20T20:37:21.816Z" }, - { url = "https://files.pythonhosted.org/packages/df/6c/99937dd78d07f73bba831c8dc9469dfe4696539eba2fc269ae1b92752f9e/uuid_utils-0.14.0-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:5c8ae96101c3524ba8dbf762b6f05e9e9d896544786c503a727c5bf5cb9af1a7", size = 580831, upload-time = "2026-01-20T20:37:19.691Z" }, - { url = "https://files.pythonhosted.org/packages/44/fa/bbc9e2c25abd09a293b9b097a0d8fc16acd6a92854f0ec080f1ea7ad8bb3/uuid_utils-0.14.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:00ac3c6edfdaff7e1eed041f4800ae09a3361287be780d7610a90fdcde9befdc", size = 546333, upload-time = "2026-01-20T20:37:03.117Z" }, - { url = "https://files.pythonhosted.org/packages/e7/9b/e5e99b324b1b5f0c62882230455786df0bc66f67eff3b452447e703f45d2/uuid_utils-0.14.0-cp39-abi3-win32.whl", hash = "sha256:ec2fd80adf8e0e6589d40699e6f6df94c93edcc16dd999be0438dd007c77b151", size = 177319, upload-time = "2026-01-20T20:37:04.208Z" }, - { url = "https://files.pythonhosted.org/packages/d3/28/2c7d417ea483b6ff7820c948678fdf2ac98899dc7e43bb15852faa95acaf/uuid_utils-0.14.0-cp39-abi3-win_amd64.whl", hash = "sha256:efe881eb43a5504fad922644cb93d725fd8a6a6d949bd5a4b4b7d1a1587c7fd1", size = 182566, upload-time = "2026-01-20T20:37:16.868Z" }, - { url = "https://files.pythonhosted.org/packages/b8/86/49e4bdda28e962fbd7266684171ee29b3d92019116971d58783e51770745/uuid_utils-0.14.0-cp39-abi3-win_arm64.whl", hash = "sha256:32b372b8fd4ebd44d3a219e093fe981af4afdeda2994ee7db208ab065cfcd080", size = 182809, upload-time = "2026-01-20T20:37:05.139Z" }, - { url = "https://files.pythonhosted.org/packages/f1/03/1f1146e32e94d1f260dfabc81e1649102083303fb4ad549775c943425d9a/uuid_utils-0.14.0-pp311-pypy311_pp73-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:762e8d67992ac4d2454e24a141a1c82142b5bde10409818c62adbe9924ebc86d", size = 587430, upload-time = "2026-01-20T20:37:24.998Z" }, - { url = "https://files.pythonhosted.org/packages/87/ba/d5a7469362594d885fd9219fe9e851efbe65101d3ef1ef25ea321d7ce841/uuid_utils-0.14.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:40be5bf0b13aa849d9062abc86c198be6a25ff35316ce0b89fc25f3bac6d525e", size = 298106, upload-time = "2026-01-20T20:37:23.896Z" }, - { url = "https://files.pythonhosted.org/packages/8a/11/3dafb2a5502586f59fd49e93f5802cd5face82921b3a0f3abb5f357cb879/uuid_utils-0.14.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:191a90a6f3940d1b7322b6e6cceff4dd533c943659e0a15f788674407856a515", size = 333423, upload-time = "2026-01-20T20:37:17.828Z" }, - { url = "https://files.pythonhosted.org/packages/7c/f2/c8987663f0cdcf4d717a36d85b5db2a5589df0a4e129aa10f16f4380ef48/uuid_utils-0.14.0-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4aa4525f4ad82f9d9c842f9a3703f1539c1808affbaec07bb1b842f6b8b96aa5", size = 338659, upload-time = "2026-01-20T20:37:14.286Z" }, - { url = "https://files.pythonhosted.org/packages/d1/c8/929d81665d83f0b2ffaecb8e66c3091a50f62c7cb5b65e678bd75a96684e/uuid_utils-0.14.0-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cdbd82ff20147461caefc375551595ecf77ebb384e46267f128aca45a0f2cdfc", size = 467029, upload-time = "2026-01-20T20:37:08.277Z" }, - { url = "https://files.pythonhosted.org/packages/8e/a0/27d7daa1bfed7163f4ccaf52d7d2f4ad7bb1002a85b45077938b91ee584f/uuid_utils-0.14.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eff57e8a5d540006ce73cf0841a643d445afe78ba12e75ac53a95ca2924a56be", size = 333298, upload-time = "2026-01-20T20:37:07.271Z" }, - { url = "https://files.pythonhosted.org/packages/63/d4/acad86ce012b42ce18a12f31ee2aa3cbeeb98664f865f05f68c882945913/uuid_utils-0.14.0-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3fd9112ca96978361201e669729784f26c71fecc9c13a7f8a07162c31bd4d1e2", size = 359217, upload-time = "2026-01-20T20:36:59.687Z" }, + { url = "https://files.pythonhosted.org/packages/43/b7/add4363039a34506a58457d96d4aa2126061df3a143eb4d042aedd6a2e76/uuid_utils-0.14.1-cp39-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:93a3b5dc798a54a1feb693f2d1cb4cf08258c32ff05ae4929b5f0a2ca624a4f0", size = 604679, upload-time = "2026-02-20T22:50:27.469Z" }, + { url = "https://files.pythonhosted.org/packages/dd/84/d1d0bef50d9e66d31b2019997c741b42274d53dde2e001b7a83e9511c339/uuid_utils-0.14.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:ccd65a4b8e83af23eae5e56d88034b2fe7264f465d3e830845f10d1591b81741", size = 309346, upload-time = "2026-02-20T22:50:31.857Z" }, + { url = "https://files.pythonhosted.org/packages/ef/ed/b6d6fd52a6636d7c3eddf97d68da50910bf17cd5ac221992506fb56cf12e/uuid_utils-0.14.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b56b0cacd81583834820588378e432b0696186683b813058b707aedc1e16c4b1", size = 344714, upload-time = "2026-02-20T22:50:42.642Z" }, + { url = "https://files.pythonhosted.org/packages/a8/a7/a19a1719fb626fe0b31882db36056d44fe904dc0cf15b06fdf56b2679cf7/uuid_utils-0.14.1-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bb3cf14de789097320a3c56bfdfdd51b1225d11d67298afbedee7e84e3837c96", size = 350914, upload-time = "2026-02-20T22:50:36.487Z" }, + { url = "https://files.pythonhosted.org/packages/1d/fc/f6690e667fdc3bb1a73f57951f97497771c56fe23e3d302d7404be394d4f/uuid_utils-0.14.1-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:60e0854a90d67f4b0cc6e54773deb8be618f4c9bad98d3326f081423b5d14fae", size = 482609, upload-time = "2026-02-20T22:50:37.511Z" }, + { url = "https://files.pythonhosted.org/packages/54/6e/dcd3fa031320921a12ec7b4672dea3bd1dd90ddffa363a91831ba834d559/uuid_utils-0.14.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce6743ba194de3910b5feb1a62590cd2587e33a73ab6af8a01b642ceb5055862", size = 345699, upload-time = "2026-02-20T22:50:46.87Z" }, + { url = "https://files.pythonhosted.org/packages/04/28/e5220204b58b44ac0047226a9d016a113fde039280cc8732d9e6da43b39f/uuid_utils-0.14.1-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:043fb58fde6cf1620a6c066382f04f87a8e74feb0f95a585e4ed46f5d44af57b", size = 372205, upload-time = "2026-02-20T22:50:28.438Z" }, + { url = "https://files.pythonhosted.org/packages/c7/d9/3d2eb98af94b8dfffc82b6a33b4dfc87b0a5de2c68a28f6dde0db1f8681b/uuid_utils-0.14.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:c915d53f22945e55fe0d3d3b0b87fd965a57f5fd15666fd92d6593a73b1dd297", size = 521836, upload-time = "2026-02-20T22:50:23.057Z" }, + { url = "https://files.pythonhosted.org/packages/a8/15/0eb106cc6fe182f7577bc0ab6e2f0a40be247f35c5e297dbf7bbc460bd02/uuid_utils-0.14.1-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:0972488e3f9b449e83f006ead5a0e0a33ad4a13e4462e865b7c286ab7d7566a3", size = 625260, upload-time = "2026-02-20T22:50:25.949Z" }, + { url = "https://files.pythonhosted.org/packages/3c/17/f539507091334b109e7496830af2f093d9fc8082411eafd3ece58af1f8ba/uuid_utils-0.14.1-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:1c238812ae0c8ffe77d8d447a32c6dfd058ea4631246b08b5a71df586ff08531", size = 587824, upload-time = "2026-02-20T22:50:35.225Z" }, + { url = "https://files.pythonhosted.org/packages/2e/c2/d37a7b2e41f153519367d4db01f0526e0d4b06f1a4a87f1c5dfca5d70a8b/uuid_utils-0.14.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:bec8f8ef627af86abf8298e7ec50926627e29b34fa907fcfbedb45aaa72bca43", size = 551407, upload-time = "2026-02-20T22:50:44.915Z" }, + { url = "https://files.pythonhosted.org/packages/65/36/2d24b2cbe78547c6532da33fb8613debd3126eccc33a6374ab788f5e46e9/uuid_utils-0.14.1-cp39-abi3-win32.whl", hash = "sha256:b54d6aa6252d96bac1fdbc80d26ba71bad9f220b2724d692ad2f2310c22ef523", size = 183476, upload-time = "2026-02-20T22:50:32.745Z" }, + { url = "https://files.pythonhosted.org/packages/83/92/2d7e90df8b1a69ec4cff33243ce02b7a62f926ef9e2f0eca5a026889cd73/uuid_utils-0.14.1-cp39-abi3-win_amd64.whl", hash = "sha256:fc27638c2ce267a0ce3e06828aff786f91367f093c80625ee21dad0208e0f5ba", size = 187147, upload-time = "2026-02-20T22:50:45.807Z" }, + { url = "https://files.pythonhosted.org/packages/d9/26/529f4beee17e5248e37e0bc17a2761d34c0fa3b1e5729c88adb2065bae6e/uuid_utils-0.14.1-cp39-abi3-win_arm64.whl", hash = "sha256:b04cb49b42afbc4ff8dbc60cf054930afc479d6f4dd7f1ec3bbe5dbfdde06b7a", size = 188132, upload-time = "2026-02-20T22:50:41.718Z" }, + { url = "https://files.pythonhosted.org/packages/91/f9/6c64bdbf71f58ccde7919e00491812556f446a5291573af92c49a5e9aaef/uuid_utils-0.14.1-pp311-pypy311_pp73-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:b197cd5424cf89fb019ca7f53641d05bfe34b1879614bed111c9c313b5574cd8", size = 591617, upload-time = "2026-02-20T22:50:24.532Z" }, + { url = "https://files.pythonhosted.org/packages/d0/f0/758c3b0fb0c4871c7704fef26a5bc861de4f8a68e4831669883bebe07b0f/uuid_utils-0.14.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:12c65020ba6cb6abe1d57fcbfc2d0ea0506c67049ee031714057f5caf0f9bc9c", size = 303702, upload-time = "2026-02-20T22:50:40.687Z" }, + { url = "https://files.pythonhosted.org/packages/85/89/d91862b544c695cd58855efe3201f83894ed82fffe34500774238ab8eba7/uuid_utils-0.14.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b5d2ad28063d422ccc2c28d46471d47b61a58de885d35113a8f18cb547e25bf", size = 337678, upload-time = "2026-02-20T22:50:39.768Z" }, + { url = "https://files.pythonhosted.org/packages/ee/6b/cf342ba8a898f1de024be0243fac67c025cad530c79ea7f89c4ce718891a/uuid_utils-0.14.1-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:da2234387b45fde40b0fedfee64a0ba591caeea9c48c7698ab6e2d85c7991533", size = 343711, upload-time = "2026-02-20T22:50:43.965Z" }, + { url = "https://files.pythonhosted.org/packages/b3/20/049418d094d396dfa6606b30af925cc68a6670c3b9103b23e6990f84b589/uuid_utils-0.14.1-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:50fffc2827348c1e48972eed3d1c698959e63f9d030aa5dd82ba451113158a62", size = 476731, upload-time = "2026-02-20T22:50:30.589Z" }, + { url = "https://files.pythonhosted.org/packages/77/a1/0857f64d53a90321e6a46a3d4cc394f50e1366132dcd2ae147f9326ca98b/uuid_utils-0.14.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1dbe718765f70f5b7f9b7f66b6a937802941b1cc56bcf642ce0274169741e01", size = 338902, upload-time = "2026-02-20T22:50:33.927Z" }, + { url = "https://files.pythonhosted.org/packages/ed/d0/5bf7cbf1ac138c92b9ac21066d18faf4d7e7f651047b700eb192ca4b9fdb/uuid_utils-0.14.1-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:258186964039a8e36db10810c1ece879d229b01331e09e9030bc5dcabe231bd2", size = 364700, upload-time = "2026-02-20T22:50:21.732Z" }, ] [[package]] @@ -7681,17 +7807,18 @@ wheels = [ [[package]] name = "virtualenv" -version = "20.38.0" +version = "21.1.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "distlib" }, { name = "filelock" }, { name = "platformdirs" }, + { name = "python-discovery" }, { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d2/03/a94d404ca09a89a7301a7008467aed525d4cdeb9186d262154dd23208709/virtualenv-20.38.0.tar.gz", hash = "sha256:94f39b1abaea5185bf7ea5a46702b56f1d0c9aa2f41a6c2b8b0af4ddc74c10a7", size = 5864558, upload-time = "2026-02-19T07:48:02.385Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2f/c9/18d4b36606d6091844daa3bd93cf7dc78e6f5da21d9f21d06c221104b684/virtualenv-21.1.0.tar.gz", hash = "sha256:1990a0188c8f16b6b9cf65c9183049007375b26aad415514d377ccacf1e4fb44", size = 5840471, upload-time = "2026-02-27T08:49:29.702Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/42/d7/394801755d4c8684b655d35c665aea7836ec68320304f62ab3c94395b442/virtualenv-20.38.0-py3-none-any.whl", hash = "sha256:d6e78e5889de3a4742df2d3d44e779366325a90cf356f15621fddace82431794", size = 5837778, upload-time = "2026-02-19T07:47:59.778Z" }, + { url = "https://files.pythonhosted.org/packages/78/55/896b06bf93a49bec0f4ae2a6f1ed12bd05c8860744ac3a70eda041064e4d/virtualenv-21.1.0-py3-none-any.whl", hash = "sha256:164f5e14c5587d170cf98e60378eb91ea35bf037be313811905d3a24ea33cc07", size = 5825072, upload-time = "2026-02-27T08:49:27.516Z" }, ] [[package]] From ece434383991f477d725353c2e503842d6542963 Mon Sep 17 00:00:00 2001 From: Om Chauhan Date: Sun, 1 Mar 2026 12:25:42 +0530 Subject: [PATCH 163/189] changed log level to debug --- src/pipecat/services/grok/realtime/llm.py | 2 +- src/pipecat/services/openai/realtime/llm.py | 2 +- src/pipecat/services/openai_realtime_beta/openai.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pipecat/services/grok/realtime/llm.py b/src/pipecat/services/grok/realtime/llm.py index 0d3687a26..f717e408b 100644 --- a/src/pipecat/services/grok/realtime/llm.py +++ b/src/pipecat/services/grok/realtime/llm.py @@ -512,7 +512,7 @@ class GrokRealtimeLLMService(LLMService): await self._handle_evt_function_call_arguments_done(evt) elif evt.type == "error": if evt.error.code == "response_cancel_not_active": - logger.warning(f"Non-fatal API error: {evt.error.message}") + logger.debug(f"{self} {evt.error.message}") else: await self._handle_evt_error(evt) return diff --git a/src/pipecat/services/openai/realtime/llm.py b/src/pipecat/services/openai/realtime/llm.py index ebd1fbdbc..57efafbef 100644 --- a/src/pipecat/services/openai/realtime/llm.py +++ b/src/pipecat/services/openai/realtime/llm.py @@ -578,7 +578,7 @@ class OpenAIRealtimeLLMService(LLMService): elif evt.type == "error": if not await self._maybe_handle_evt_retrieve_conversation_item_error(evt): if evt.error.code == "response_cancel_not_active": - logger.warning(f"Non-fatal API error: {evt.error.message}") + logger.debug(f"{self} {evt.error.message}") else: await self._handle_evt_error(evt) # errors are fatal, so exit the receive loop diff --git a/src/pipecat/services/openai_realtime_beta/openai.py b/src/pipecat/services/openai_realtime_beta/openai.py index 808fbb053..ffa1f4207 100644 --- a/src/pipecat/services/openai_realtime_beta/openai.py +++ b/src/pipecat/services/openai_realtime_beta/openai.py @@ -504,7 +504,7 @@ class OpenAIRealtimeBetaLLMService(LLMService): elif evt.type == "error": if not await self._maybe_handle_evt_retrieve_conversation_item_error(evt): if evt.error.code == "response_cancel_not_active": - logger.warning(f"Non-fatal API error: {evt.error.message}") + logger.debug(f"{self} {evt.error.message}") else: await self._handle_evt_error(evt) # errors are fatal, so exit the receive loop From 91c46ffbf49609bffe07eb5801a97f77b88734a1 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Sun, 1 Mar 2026 16:30:49 -0500 Subject: [PATCH 164/189] Re-inject turn completion instructions after LLM context reset When filter_incomplete_user_turns is enabled and an LLMMessagesUpdateFrame replaces the context via set_messages(), the turn completion instructions system message was lost. This caused the LLM to stop emitting turn completion markers. Re-inject the instructions after set_messages() to fix this. --- changelog/3888.fixed.md | 1 + .../aggregators/llm_response_universal.py | 3 +++ tests/test_context_aggregators_universal.py | 23 +++++++++++++++++++ 3 files changed, 27 insertions(+) create mode 100644 changelog/3888.fixed.md diff --git a/changelog/3888.fixed.md b/changelog/3888.fixed.md new file mode 100644 index 000000000..99e9ad0e0 --- /dev/null +++ b/changelog/3888.fixed.md @@ -0,0 +1 @@ +- Fixed turn completion instructions being lost when `LLMMessagesUpdateFrame` replaces the LLM context. When `filter_incomplete_user_turns` is enabled, the turn completion system message is now re-injected after context replacement. diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py index c43cc279d..96f3702be 100644 --- a/src/pipecat/processors/aggregators/llm_response_universal.py +++ b/src/pipecat/processors/aggregators/llm_response_universal.py @@ -642,6 +642,9 @@ class LLMUserAggregator(LLMContextAggregator): async def _handle_llm_messages_update(self, frame: LLMMessagesUpdateFrame): self.set_messages(frame.messages) + if self._params.filter_incomplete_user_turns: + config = self._params.user_turn_completion_config or UserTurnCompletionConfig() + self._context.add_message({"role": "system", "content": config.completion_instructions}) if frame.run_llm: await self.push_context_frame() diff --git a/tests/test_context_aggregators_universal.py b/tests/test_context_aggregators_universal.py index e86905e1c..b22abf6c6 100644 --- a/tests/test_context_aggregators_universal.py +++ b/tests/test_context_aggregators_universal.py @@ -50,6 +50,7 @@ from pipecat.turns.user_mute import ( MuteUntilFirstBotCompleteUserMuteStrategy, ) from pipecat.turns.user_stop import SpeechTimeoutUserTurnStopStrategy +from pipecat.turns.user_turn_completion_mixin import UserTurnCompletionConfig from pipecat.turns.user_turn_strategies import UserTurnStrategies USER_TURN_STOP_TIMEOUT = 0.2 @@ -155,6 +156,28 @@ class TestLLMUserAggregator(unittest.IsolatedAsyncioTestCase): ) assert context.messages[0]["content"] == "Hi there!" + async def test_llm_messages_update_reinjects_turn_completion_instructions(self): + context = LLMContext() + params = LLMUserAggregatorParams(filter_incomplete_user_turns=True) + pipeline = Pipeline([LLMUserAggregator(context, params=params)]) + + new_messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Hello!"}, + ] + frames_to_send = [LLMMessagesUpdateFrame(messages=new_messages)] + await run_test( + pipeline, + frames_to_send=frames_to_send, + ) + config = UserTurnCompletionConfig() + # The context should contain the new messages plus the re-injected instructions + assert len(context.messages) == 3 + assert context.messages[0]["content"] == "You are a helpful assistant." + assert context.messages[1]["content"] == "Hello!" + assert context.messages[2]["role"] == "system" + assert context.messages[2]["content"] == config.completion_instructions + async def test_default_user_turn_strategies(self): context = LLMContext() user_aggregator = LLMUserAggregator( From 55a641e2584d14c2dfff6fe65561331b08229646 Mon Sep 17 00:00:00 2001 From: dhruvladia-sarvam Date: Sun, 1 Mar 2026 22:10:27 +0530 Subject: [PATCH 165/189] fix(sarvam): standardize STT/TTS User-Agent headers --- src/pipecat/services/sarvam/stt.py | 34 +++++++++++++++++------------- src/pipecat/services/sarvam/tts.py | 11 ++++++---- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/src/pipecat/services/sarvam/stt.py b/src/pipecat/services/sarvam/stt.py index 379473c6f..f4ebf7574 100644 --- a/src/pipecat/services/sarvam/stt.py +++ b/src/pipecat/services/sarvam/stt.py @@ -266,15 +266,10 @@ class SarvamSTTService(STTService): # Initialize Sarvam SDK client self._sdk_headers = sdk_headers() - # NOTE: We avoid passing non-standard kwargs here because different sarvamai - # versions expose different constructor signatures (static type checkers - # complain otherwise). We instead inject headers best-effort below. - self._sarvam_client = AsyncSarvamAI(api_subscription_key=api_key) - for attr in ("default_headers", "_default_headers", "headers", "_headers"): - d = getattr(self._sarvam_client, attr, None) - if isinstance(d, dict): - d.update(self._sdk_headers) - break + # Pass Pipecat SDK headers directly at client construction time so they are + # merged by the Sarvam SDK's client wrapper and consistently applied to + # WebSocket handshake requests. + self._sarvam_client = AsyncSarvamAI(api_subscription_key=api_key, headers=self._sdk_headers) self._websocket_context = None self._socket_client = None self._receive_task = None @@ -517,20 +512,29 @@ class SarvamSTTService(STTService): connect_kwargs["prompt"] = self._settings.prompt def _connect_with_sdk_headers(connect_fn, **kwargs): - # Different SDK versions may use different kwarg names. # If prompt is unsupported at connect-time, retry without it. + # Headers are supplied through request_options because this is a + # documented SDK parameter that survives SDK signature changes. + request_options = {"additional_headers": self._sdk_headers} + logger.debug( + f"Sarvam STT connect request_options.additional_headers: " + f"{request_options['additional_headers']}" + ) attempts = [kwargs] if "prompt" in kwargs: attempts.append({k: v for k, v in kwargs.items() if k != "prompt"}) last_type_error = None for attempt_kwargs in attempts: - for header_kw in ("headers", "additional_headers", "extra_headers"): - try: - return connect_fn(**attempt_kwargs, **{header_kw: self._sdk_headers}) - except TypeError as e: - last_type_error = e try: + return connect_fn( + **attempt_kwargs, + request_options=request_options, + ) + except TypeError as e: + last_type_error = e + try: + # Fallback for SDK builds that don't expose request_options. return connect_fn(**attempt_kwargs) except TypeError as e: last_type_error = e diff --git a/src/pipecat/services/sarvam/tts.py b/src/pipecat/services/sarvam/tts.py index 87604a9f9..e92ade2e5 100644 --- a/src/pipecat/services/sarvam/tts.py +++ b/src/pipecat/services/sarvam/tts.py @@ -1013,12 +1013,15 @@ class SarvamTTSService(InterruptibleTTSService): if self._websocket and self._websocket.state is State.OPEN: return + ws_additional_headers = { + "api-subscription-key": self._api_key, + **sdk_headers(), + } + self._websocket = await websocket_connect( self._websocket_url, - additional_headers={ - "api-subscription-key": self._api_key, - **sdk_headers(), - }, + additional_headers=ws_additional_headers, + user_agent_header=None, ) logger.debug("Connected to Sarvam TTS Websocket") await self._send_config() From 1242f1c10ef0404a03346c0489eceac749548732 Mon Sep 17 00:00:00 2001 From: dhruvladia-sarvam Date: Mon, 2 Mar 2026 17:29:03 +0530 Subject: [PATCH 166/189] changelog entry --- changelog/3886.other.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog/3886.other.md diff --git a/changelog/3886.other.md b/changelog/3886.other.md new file mode 100644 index 000000000..0e9fdafed --- /dev/null +++ b/changelog/3886.other.md @@ -0,0 +1 @@ +- Standardized Sarvam STT/TTS User-Agent header handling to consistently send Pipecat SDK identity in websocket requests. \ No newline at end of file From 018ead85514a4cab3de6519e9678ae83f448335b Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Fri, 27 Feb 2026 19:32:08 -0500 Subject: [PATCH 167/189] Changelog for PR 3873, docstrings change --- changelog/3873.added.md | 1 + src/pipecat/services/rime/tts.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 changelog/3873.added.md diff --git a/changelog/3873.added.md b/changelog/3873.added.md new file mode 100644 index 000000000..ed01b8e5d --- /dev/null +++ b/changelog/3873.added.md @@ -0,0 +1 @@ +- Added support for the `speed_alpha` parameter to the `arcana` model in `RimeTTSService`. diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index 2dbaf2760..944ff4e58 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -147,10 +147,10 @@ class RimeTTSService(AudioContextTTSService): Parameters: language: Language for synthesis. Defaults to English. segment: Text segmentation mode ("immediate", "bySentence", "never"). + speed_alpha: Speech speed multiplier. repetition_penalty: Token repetition penalty (arcana only). temperature: Sampling temperature (arcana only). top_p: Cumulative probability threshold (arcana only). - speed_alpha: Speech speed multiplier (mistv2 only). reduce_latency: Whether to reduce latency at potential quality cost (mistv2 only). pause_between_brackets: Whether to add pauses between bracketed content (mistv2 only). phonemize_between_brackets: Whether to phonemize bracketed content (mistv2 only). @@ -160,12 +160,12 @@ class RimeTTSService(AudioContextTTSService): language: Optional[Language] = Language.EN segment: Optional[str] = None + speed_alpha: Optional[float] = None # Arcana params repetition_penalty: Optional[float] = None temperature: Optional[float] = None top_p: Optional[float] = None # Mistv2 params - speed_alpha: Optional[float] = None reduce_latency: Optional[bool] = None pause_between_brackets: Optional[bool] = None phonemize_between_brackets: Optional[bool] = None @@ -230,12 +230,12 @@ class RimeTTSService(AudioContextTTSService): else None, segment=params.segment, inlineSpeedAlpha=None, # Not applicable here + speedAlpha=params.speed_alpha, # Arcana params repetition_penalty=params.repetition_penalty, temperature=params.temperature, top_p=params.top_p, # Mistv2 params - speedAlpha=params.speed_alpha, reduceLatency=params.reduce_latency, pauseBetweenBrackets=params.pause_between_brackets, phonemizeBetweenBrackets=params.phonemize_between_brackets, From 442ea6a97e0c3835757f5651f1ea7f37d5485930 Mon Sep 17 00:00:00 2001 From: Rupesh Date: Thu, 26 Feb 2026 19:36:21 -0800 Subject: [PATCH 168/189] Fix Smart Turn v3 producing incorrect predictions at non-16kHz sample rates The Whisper-based ONNX model expects 16 kHz audio, but the _predict_endpoint method had five hardcoded references to 16000 without checking the actual pipeline sample rate. When running at 8 kHz (e.g. Twilio telephony), audio was fed to the feature extractor at the wrong rate, causing the model to perceive speech at 2x speed with shifted formant frequencies and produce incorrect end-of-turn predictions. Add automatic resampling via numpy interpolation before feature extraction and replace all hardcoded sample rate values with a _MODEL_SAMPLE_RATE constant. Also fix the WAV debug logger to write files with the correct sample rate header. Fixes #3844 --- .../turn/smart_turn/local_smart_turn_v3.py | 47 +++++++++++++++++-- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py b/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py index b9e2a7663..01c3746c8 100644 --- a/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +++ b/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py @@ -20,6 +20,9 @@ from transformers import WhisperFeatureExtractor from pipecat.audio.turn.smart_turn.base_smart_turn import BaseSmartTurn from pipecat.utils.env import env_truthy +# The Whisper-based ONNX model expects 16 kHz audio input. +_MODEL_SAMPLE_RATE = 16000 + class LocalSmartTurnAnalyzerV3(BaseSmartTurn): """Local turn analyzer using the smart-turn-v3 ONNX model. @@ -42,6 +45,7 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn): super().__init__(**kwargs) self._log_data = env_truthy("PIPECAT_SMART_TURN_LOG_DATA", default=False) + self._resample_warned = False if not smart_turn_model_path: # Load bundled model @@ -77,7 +81,7 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn): logger.debug("Loaded Local Smart Turn v3.x") def _write_audio_to_wav( - self, audio_array: np.ndarray, sample_rate: int = 16000, suffix: str = "" + self, audio_array: np.ndarray, sample_rate: int = _MODEL_SAMPLE_RATE, suffix: str = "" ) -> None: """Write audio data to a WAV file in a background thread. @@ -119,10 +123,39 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn): thread = threading.Thread(target=write_wav, daemon=True) thread.start() + def _resample_to_model_rate(self, audio_array: np.ndarray) -> np.ndarray: + """Resample audio to the model's expected sample rate (16 kHz). + + Args: + audio_array: Audio data as a float32 numpy array. + + Returns: + Resampled audio array at 16 kHz. + """ + actual_rate = self._sample_rate or _MODEL_SAMPLE_RATE + if actual_rate == _MODEL_SAMPLE_RATE: + return audio_array + + if not self._resample_warned: + logger.warning( + f"Smart Turn v3 model expects {_MODEL_SAMPLE_RATE}Hz audio but received " + f"{actual_rate}Hz. Audio will be resampled automatically." + ) + self._resample_warned = True + + num_output_samples = int(len(audio_array) * _MODEL_SAMPLE_RATE / actual_rate) + return np.interp( + np.linspace(0, len(audio_array), num_output_samples, endpoint=False), + np.arange(len(audio_array)), + audio_array, + ) + def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]: """Predict end-of-turn using local ONNX model.""" - def truncate_audio_to_last_n_seconds(audio_array, n_seconds=8, sample_rate=16000): + def truncate_audio_to_last_n_seconds( + audio_array, n_seconds=8, sample_rate=_MODEL_SAMPLE_RATE + ): """Truncate audio to last n seconds or pad with zeros to meet n seconds.""" max_samples = n_seconds * sample_rate if len(audio_array) > max_samples: @@ -134,6 +167,10 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn): return audio_array audio_for_logging = audio_array + actual_rate = self._sample_rate or _MODEL_SAMPLE_RATE + + # Resample to 16 kHz if the pipeline uses a different sample rate + audio_array = self._resample_to_model_rate(audio_array) # Truncate to 8 seconds (keeping the end) or pad to 8 seconds audio_array = truncate_audio_to_last_n_seconds(audio_array, n_seconds=8) @@ -141,10 +178,10 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn): # Process audio using Whisper's feature extractor inputs = self._feature_extractor( audio_array, - sampling_rate=16000, + sampling_rate=_MODEL_SAMPLE_RATE, return_tensors="np", padding="max_length", - max_length=8 * 16000, + max_length=8 * _MODEL_SAMPLE_RATE, truncation=True, do_normalize=True, ) @@ -164,7 +201,7 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn): if self._log_data: suffix = "_complete" if prediction == 1 else "_incomplete" - self._write_audio_to_wav(audio_for_logging, sample_rate=16000, suffix=suffix) + self._write_audio_to_wav(audio_for_logging, sample_rate=actual_rate, suffix=suffix) return { "prediction": prediction, From a7f6db84365e8442df056bdf3540970d0a9225b0 Mon Sep 17 00:00:00 2001 From: Rupesh Date: Thu, 26 Feb 2026 19:36:50 -0800 Subject: [PATCH 169/189] Add changelog fragment for #3857 --- changelog/3857.fixed.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog/3857.fixed.md diff --git a/changelog/3857.fixed.md b/changelog/3857.fixed.md new file mode 100644 index 000000000..869c54111 --- /dev/null +++ b/changelog/3857.fixed.md @@ -0,0 +1 @@ +- Fixed `LocalSmartTurnAnalyzerV3` producing incorrect end-of-turn predictions at non-16kHz sample rates (e.g. 8kHz Twilio telephony) by adding automatic resampling to 16kHz before Whisper feature extraction. From 5e8d722bf25ed82560ccb26f5018598313da3100 Mon Sep 17 00:00:00 2001 From: Rupesh Date: Fri, 27 Feb 2026 11:46:39 -0800 Subject: [PATCH 170/189] Use soxr for high-quality audio resampling instead of numpy linear interpolation --- src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py b/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py index 01c3746c8..ffe714641 100644 --- a/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +++ b/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py @@ -14,6 +14,7 @@ from typing import Any, Dict, Optional import numpy as np import onnxruntime as ort +import soxr from loguru import logger from transformers import WhisperFeatureExtractor @@ -143,12 +144,7 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn): ) self._resample_warned = True - num_output_samples = int(len(audio_array) * _MODEL_SAMPLE_RATE / actual_rate) - return np.interp( - np.linspace(0, len(audio_array), num_output_samples, endpoint=False), - np.arange(len(audio_array)), - audio_array, - ) + return soxr.resample(audio_array, actual_rate, _MODEL_SAMPLE_RATE, quality="VHQ") def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]: """Predict end-of-turn using local ONNX model.""" From ad74d19c6ba0178d8b0fa3bdcab27737d5f78c40 Mon Sep 17 00:00:00 2001 From: Rupesh Date: Sun, 1 Mar 2026 13:19:42 -0800 Subject: [PATCH 171/189] Remove resampling warning log for consistency with rest of codebase --- src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py b/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py index ffe714641..a8cc249fd 100644 --- a/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +++ b/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py @@ -46,7 +46,6 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn): super().__init__(**kwargs) self._log_data = env_truthy("PIPECAT_SMART_TURN_LOG_DATA", default=False) - self._resample_warned = False if not smart_turn_model_path: # Load bundled model @@ -137,13 +136,6 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn): if actual_rate == _MODEL_SAMPLE_RATE: return audio_array - if not self._resample_warned: - logger.warning( - f"Smart Turn v3 model expects {_MODEL_SAMPLE_RATE}Hz audio but received " - f"{actual_rate}Hz. Audio will be resampled automatically." - ) - self._resample_warned = True - return soxr.resample(audio_array, actual_rate, _MODEL_SAMPLE_RATE, quality="VHQ") def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]: From 07ba2550733b21598f5846e392844e649c48f372 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Mon, 2 Mar 2026 09:20:24 -0500 Subject: [PATCH 172/189] Fix update-docs workflow OIDC failure with pull_request_target The switch from pull_request to pull_request_target (for fork PR secret access) broke claude-code-action default OIDC-based GitHub App authentication. Pass github_token explicitly to bypass OIDC. --- .github/workflows/update-docs.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/update-docs.yml b/.github/workflows/update-docs.yml index a9066762d..d26862766 100644 --- a/.github/workflows/update-docs.yml +++ b/.github/workflows/update-docs.yml @@ -59,6 +59,7 @@ jobs: DOCS_SYNC_TOKEN: ${{ secrets.DOCS_SYNC_TOKEN }} with: anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + github_token: ${{ secrets.GITHUB_TOKEN }} prompt: | You are updating documentation for the pipecat-ai/docs repository based on changes merged in PR #${{ steps.pr.outputs.number }} of pipecat-ai/pipecat. From f386722ef9dfd038ae1fe4100b7671086cd02023 Mon Sep 17 00:00:00 2001 From: dhruvladia-sarvam Date: Mon, 2 Mar 2026 20:38:39 +0530 Subject: [PATCH 173/189] removing unnecessary logs --- src/pipecat/services/sarvam/stt.py | 5 +---- src/pipecat/services/sarvam/tts.py | 1 - 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/pipecat/services/sarvam/stt.py b/src/pipecat/services/sarvam/stt.py index f4ebf7574..9e245aece 100644 --- a/src/pipecat/services/sarvam/stt.py +++ b/src/pipecat/services/sarvam/stt.py @@ -516,10 +516,7 @@ class SarvamSTTService(STTService): # Headers are supplied through request_options because this is a # documented SDK parameter that survives SDK signature changes. request_options = {"additional_headers": self._sdk_headers} - logger.debug( - f"Sarvam STT connect request_options.additional_headers: " - f"{request_options['additional_headers']}" - ) + attempts = [kwargs] if "prompt" in kwargs: attempts.append({k: v for k, v in kwargs.items() if k != "prompt"}) diff --git a/src/pipecat/services/sarvam/tts.py b/src/pipecat/services/sarvam/tts.py index e92ade2e5..c18933407 100644 --- a/src/pipecat/services/sarvam/tts.py +++ b/src/pipecat/services/sarvam/tts.py @@ -1021,7 +1021,6 @@ class SarvamTTSService(InterruptibleTTSService): self._websocket = await websocket_connect( self._websocket_url, additional_headers=ws_additional_headers, - user_agent_header=None, ) logger.debug("Connected to Sarvam TTS Websocket") await self._send_config() From c54232bdb40c28c7e28e1600bd3492212ff4b6cc Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Sat, 28 Feb 2026 14:04:21 -0500 Subject: [PATCH 174/189] Add StartupTimingObserver for measuring processor start() times Tracks how long each processor start method takes during pipeline startup by measuring StartFrame arrive/leave deltas. Emits a timing report via the on_startup_timing_report event and auto-logs a summary. Internal pipeline processors are excluded from reports by default. --- .../foundational/29-turn-tracking-observer.py | 12 +- .../observers/startup_timing_observer.py | 232 ++++++++++++++++++ tests/test_startup_timing_observer.py | 186 ++++++++++++++ 3 files changed, 427 insertions(+), 3 deletions(-) create mode 100644 src/pipecat/observers/startup_timing_observer.py create mode 100644 tests/test_startup_timing_observer.py diff --git a/examples/foundational/29-turn-tracking-observer.py b/examples/foundational/29-turn-tracking-observer.py index 321197db2..3e85ddfb8 100644 --- a/examples/foundational/29-turn-tracking-observer.py +++ b/examples/foundational/29-turn-tracking-observer.py @@ -12,6 +12,7 @@ from loguru import logger from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.frames.frames import LLMRunFrame +from pipecat.observers.startup_timing_observer import StartupTimingObserver from pipecat.observers.user_bot_latency_observer import UserBotLatencyObserver from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner @@ -87,8 +88,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): ] ) - # Create latency tracking observer latency_observer = UserBotLatencyObserver() + startup_observer = StartupTimingObserver() task = PipelineTask( pipeline, @@ -97,14 +98,19 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): enable_usage_metrics=True, ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, - observers=[latency_observer], + observers=[latency_observer, startup_observer], ) - # Log latency measurements using the event handler @latency_observer.event_handler("on_latency_measured") async def on_latency_measured(observer, latency_seconds): logger.info(f"⏱️ User-to-bot latency: {latency_seconds:.3f}s") + @startup_observer.event_handler("on_startup_timing_report") + async def on_startup_timing_report(observer, report): + logger.info(f"Total startup: {report.total_duration_secs:.3f}s") + for timing in report.processor_timings: + logger.info(f" {timing.processor_name}: {timing.duration_secs:.3f}s") + turn_observer = task.turn_tracking_observer if turn_observer: diff --git a/src/pipecat/observers/startup_timing_observer.py b/src/pipecat/observers/startup_timing_observer.py new file mode 100644 index 000000000..0f3ad0b7a --- /dev/null +++ b/src/pipecat/observers/startup_timing_observer.py @@ -0,0 +1,232 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Observer for tracking pipeline startup timing. + +This module provides an observer that measures how long each processor's +``start()`` method takes during pipeline startup. It works by tracking +when a ``StartFrame`` arrives at a processor (``on_process_frame``) versus +when it leaves (``on_push_frame``), giving the exact ``start()`` duration +for each processor in the pipeline. + +Example:: + + observer = StartupTimingObserver() + + @observer.event_handler("on_startup_timing_report") + async def on_report(observer, report): + for t in report.processor_timings: + print(f"{t.processor_name}: {t.duration_secs:.3f}s") + + task = PipelineTask(pipeline, observers=[observer]) +""" + +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Tuple, Type + +from loguru import logger + +from pipecat.frames.frames import StartFrame +from pipecat.observers.base_observer import BaseObserver, FrameProcessed, FramePushed +from pipecat.pipeline.base_pipeline import BasePipeline +from pipecat.pipeline.pipeline import PipelineSink, PipelineSource +from pipecat.processors.frame_processor import FrameDirection, FrameProcessor + +# Internal pipeline types excluded from tracking by default. +_INTERNAL_TYPES = (PipelineSink, PipelineSource, BasePipeline) + + +@dataclass +class ProcessorStartupTiming: + """Startup timing for a single processor. + + Parameters: + processor_name: The name of the processor. + duration_secs: How long the processor's start() took, in seconds. + """ + + processor_name: str + duration_secs: float + + +@dataclass +class StartupTimingReport: + """Report of startup timings for all measured processors. + + Parameters: + total_duration_secs: Total wall-clock time from first to last processor start. + processor_timings: Per-processor timing data, in pipeline order. + """ + + total_duration_secs: float + processor_timings: List[ProcessorStartupTiming] = field(default_factory=list) + + +class StartupTimingObserver(BaseObserver): + """Observer that measures processor startup times during pipeline initialization. + + Tracks how long each processor's ``start()`` method takes by measuring the + time between when a ``StartFrame`` arrives at a processor and when it is + pushed downstream. This captures WebSocket connections, API authentication, + model loading, and other initialization work. + + By default, internal pipeline processors (``PipelineSource``, ``PipelineSink``, + ``Pipeline``) are excluded from the report. Pass ``processor_types`` to + measure only specific types. + + Event handlers available: + + - on_startup_timing_report: Called once after startup completes with the full + timing report. + + Example:: + + observer = StartupTimingObserver( + processor_types=(STTService, TTSService) + ) + + @observer.event_handler("on_startup_timing_report") + async def on_report(observer, report): + for t in report.processor_timings: + logger.info(f"{t.processor_name}: {t.duration_secs:.3f}s") + + task = PipelineTask(pipeline, observers=[observer]) + + Args: + processor_types: Optional tuple of processor types to measure. If None, + all non-internal processors are measured. + """ + + def __init__( + self, + *, + processor_types: Optional[Tuple[Type[FrameProcessor], ...]] = None, + **kwargs, + ): + """Initialize the startup timing observer. + + Args: + processor_types: Optional tuple of processor types to measure. + If None, all non-internal processors are measured. + **kwargs: Additional arguments passed to parent class. + """ + super().__init__(**kwargs) + self._processor_types = processor_types + + # Map processor ID -> (processor, arrival_timestamp_ns) + self._arrivals: Dict[int, Tuple[FrameProcessor, int]] = {} + + # Collected timings in pipeline order. + self._timings: List[ProcessorStartupTiming] = [] + + # Lock onto the first StartFrame we see (by frame ID). + self._start_frame_id: Optional[str] = None + + # Whether we've already emitted the report. + self._reported = False + + self._register_event_handler("on_startup_timing_report") + + def _should_track(self, processor: FrameProcessor) -> bool: + """Check if a processor should be tracked for timing. + + Args: + processor: The processor to check. + + Returns: + True if the processor matches the filter or no filter is set. + """ + if self._processor_types is not None: + return isinstance(processor, self._processor_types) + # Default: exclude internal pipeline plumbing. + return not isinstance(processor, _INTERNAL_TYPES) + + async def on_process_frame(self, data: FrameProcessed): + """Record when a StartFrame arrives at a processor. + + When a ``StartFrame`` reaches a ``PipelineSink``, startup is complete + (the frame has traversed the entire pipeline) and the report is emitted. + + Args: + data: The frame processing event data. + """ + if self._reported: + return + + if not isinstance(data.frame, StartFrame): + return + + if data.direction != FrameDirection.DOWNSTREAM: + return + + # Lock onto the first StartFrame. + if self._start_frame_id is None: + self._start_frame_id = data.frame.id + elif data.frame.id != self._start_frame_id: + return + + # When the StartFrame reaches a PipelineSink, all processors have + # completed start(). PipelineSinks use direct mode so the outermost + # sink fires last within the same synchronous call chain. + if isinstance(data.processor, PipelineSink): + if self._timings: + await self._emit_report() + return + + if self._should_track(data.processor): + self._arrivals[data.processor.id] = (data.processor, data.timestamp) + + async def on_push_frame(self, data: FramePushed): + """Record when a StartFrame leaves a processor and compute the delta. + + Args: + data: The frame push event data. + """ + if self._reported: + return + + if not isinstance(data.frame, StartFrame): + return + + if data.direction != FrameDirection.DOWNSTREAM: + return + + if self._start_frame_id is not None and data.frame.id != self._start_frame_id: + return + + arrival = self._arrivals.pop(data.source.id, None) + if arrival is None: + return + + processor, arrival_ts = arrival + duration_ns = data.timestamp - arrival_ts + duration_secs = duration_ns / 1e9 + + self._timings.append( + ProcessorStartupTiming( + processor_name=processor.name, + duration_secs=duration_secs, + ) + ) + + async def _emit_report(self): + """Build and emit the startup timing report.""" + if self._reported: + return + self._reported = True + + total = sum(t.duration_secs for t in self._timings) + + report = StartupTimingReport( + total_duration_secs=total, + processor_timings=self._timings, + ) + + logger.debug(f"Pipeline startup completed in {total:.3f}s") + for t in self._timings: + logger.debug(f" {t.processor_name}: {t.duration_secs:.3f}s") + + await self._call_event_handler("on_startup_timing_report", report) diff --git a/tests/test_startup_timing_observer.py b/tests/test_startup_timing_observer.py new file mode 100644 index 000000000..e3cd7c2b7 --- /dev/null +++ b/tests/test_startup_timing_observer.py @@ -0,0 +1,186 @@ +import asyncio +import unittest + +from pipecat.frames.frames import Frame, StartFrame, TextFrame +from pipecat.observers.startup_timing_observer import ( + StartupTimingObserver, + StartupTimingReport, +) +from pipecat.processors.frame_processor import FrameDirection, FrameProcessor +from pipecat.tests.utils import run_test + + +class SlowStartProcessor(FrameProcessor): + """A processor that sleeps during start to simulate slow initialization.""" + + def __init__(self, delay: float = 0.1, **kwargs): + super().__init__(**kwargs) + self._delay = delay + + async def process_frame(self, frame: Frame, direction: FrameDirection): + await super().process_frame(frame, direction) + if isinstance(frame, StartFrame): + await asyncio.sleep(self._delay) + await self.push_frame(frame, direction) + + +class FastProcessor(FrameProcessor): + """A processor with no start delay.""" + + async def process_frame(self, frame: Frame, direction: FrameDirection): + await super().process_frame(frame, direction) + await self.push_frame(frame, direction) + + +class TestStartupTimingObserver(unittest.IsolatedAsyncioTestCase): + """Tests for StartupTimingObserver.""" + + async def test_timing_reported(self): + """Test that startup timing is measured and reported.""" + observer = StartupTimingObserver() + processor = SlowStartProcessor(delay=0.1) + + reports = [] + + @observer.event_handler("on_startup_timing_report") + async def on_report(obs, report): + reports.append(report) + + frames_to_send = [TextFrame(text="hello")] + + await run_test( + processor, + frames_to_send=frames_to_send, + expected_down_frames=[TextFrame], + observers=[observer], + ) + + self.assertEqual(len(reports), 1) + report = reports[0] + self.assertGreater(report.total_duration_secs, 0) + self.assertGreater(len(report.processor_timings), 0) + + # Find our slow processor in the timings. + slow_timings = [ + t for t in report.processor_timings if "SlowStartProcessor" in t.processor_name + ] + self.assertEqual(len(slow_timings), 1) + self.assertGreaterEqual(slow_timings[0].duration_secs, 0.05) + + async def test_processor_types_filter(self): + """Test that processor_types filter limits which processors appear.""" + observer = StartupTimingObserver(processor_types=(SlowStartProcessor,)) + processor = SlowStartProcessor(delay=0.05) + + reports = [] + + @observer.event_handler("on_startup_timing_report") + async def on_report(obs, report): + reports.append(report) + + frames_to_send = [TextFrame(text="hello")] + + await run_test( + processor, + frames_to_send=frames_to_send, + expected_down_frames=[TextFrame], + observers=[observer], + ) + + self.assertEqual(len(reports), 1) + report = reports[0] + + # Only SlowStartProcessor should be in the timings. + for t in report.processor_timings: + self.assertIn("SlowStartProcessor", t.processor_name) + + async def test_report_emits_once(self): + """Test that the report is emitted only once even with multiple frames.""" + observer = StartupTimingObserver() + processor = FastProcessor() + + reports = [] + + @observer.event_handler("on_startup_timing_report") + async def on_report(obs, report): + reports.append(report) + + frames_to_send = [ + TextFrame(text="first"), + TextFrame(text="second"), + TextFrame(text="third"), + ] + + await run_test( + processor, + frames_to_send=frames_to_send, + expected_down_frames=[TextFrame, TextFrame, TextFrame], + observers=[observer], + ) + + self.assertEqual(len(reports), 1) + + async def test_event_handler_receives_report(self): + """Test that the event handler receives a proper StartupTimingReport.""" + observer = StartupTimingObserver() + processor = SlowStartProcessor(delay=0.05) + + reports = [] + + @observer.event_handler("on_startup_timing_report") + async def on_report(obs, report): + reports.append(report) + + frames_to_send = [TextFrame(text="hello")] + + await run_test( + processor, + frames_to_send=frames_to_send, + expected_down_frames=[TextFrame], + observers=[observer], + ) + + self.assertEqual(len(reports), 1) + report = reports[0] + self.assertIsInstance(report, StartupTimingReport) + self.assertIsInstance(report.total_duration_secs, float) + for timing in report.processor_timings: + self.assertIsInstance(timing.processor_name, str) + self.assertIsInstance(timing.duration_secs, float) + + async def test_excludes_internal_processors(self): + """Test that internal pipeline processors are excluded by default.""" + observer = StartupTimingObserver() + processor = FastProcessor() + + reports = [] + + @observer.event_handler("on_startup_timing_report") + async def on_report(obs, report): + reports.append(report) + + frames_to_send = [TextFrame(text="hello")] + + await run_test( + processor, + frames_to_send=frames_to_send, + expected_down_frames=[TextFrame], + observers=[observer], + ) + + self.assertEqual(len(reports), 1) + report = reports[0] + + # No internal processors (PipelineSource, PipelineSink, Pipeline) in the report. + internal_names = ("Pipeline#", "PipelineTask#") + for t in report.processor_timings: + for prefix in internal_names: + self.assertNotIn( + prefix, + t.processor_name, + f"Internal processor {t.processor_name} should be excluded by default", + ) + + +if __name__ == "__main__": + unittest.main() From e6b9c5c4dccbe5dd46ff9275bac19c136b15fb35 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Mon, 2 Mar 2026 12:24:10 -0500 Subject: [PATCH 175/189] Propagate Azure TTS/STT cancellation errors to the pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Azure TTS _handle_canceled was putting None (the normal completion signal) into the audio queue for all cancellation reasons, so run_tts treated errors identically to success—silently producing no audio. Now error cancellations put an Exception marker in the queue, which run_tts converts to an ErrorFrame. Azure STT had no canceled event handler at all, so auth failures, network errors, and rate-limit cancellations were invisible. Added _on_handle_canceled which pushes an ErrorFrame upstream via push_error. Fixes pipecat-ai/pipecat#3892 --- changelog/3893.fixed.md | 1 + src/pipecat/services/azure/stt.py | 12 ++++++++++++ src/pipecat/services/azure/tts.py | 11 +++++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 changelog/3893.fixed.md diff --git a/changelog/3893.fixed.md b/changelog/3893.fixed.md new file mode 100644 index 000000000..0209571e3 --- /dev/null +++ b/changelog/3893.fixed.md @@ -0,0 +1 @@ +- Fixed Azure TTS and STT services silently swallowing cancellation errors (invalid API key, network failures, rate limiting) instead of propagating them as `ErrorFrame`s to the pipeline. diff --git a/src/pipecat/services/azure/stt.py b/src/pipecat/services/azure/stt.py index c6cb96d2e..5533e350e 100644 --- a/src/pipecat/services/azure/stt.py +++ b/src/pipecat/services/azure/stt.py @@ -35,6 +35,7 @@ from pipecat.utils.tracing.service_decorators import traced_stt try: from azure.cognitiveservices.speech import ( + CancellationReason, ResultReason, SpeechConfig, SpeechRecognizer, @@ -209,6 +210,7 @@ class AzureSTTService(STTService): ) self._speech_recognizer.recognizing.connect(self._on_handle_recognizing) self._speech_recognizer.recognized.connect(self._on_handle_recognized) + self._speech_recognizer.canceled.connect(self._on_handle_canceled) self._speech_recognizer.start_continuous_recognition_async() except Exception as e: await self.push_error( @@ -280,3 +282,13 @@ class AzureSTTService(STTService): result=event, ) asyncio.run_coroutine_threadsafe(self.push_frame(frame), self.get_event_loop()) + + def _on_handle_canceled(self, event): + details = event.result.cancellation_details + if details.reason == CancellationReason.Error: + error_msg = f"Azure STT recognition canceled: {details.reason}" + if details.error_details: + error_msg += f" - {details.error_details}" + asyncio.run_coroutine_threadsafe( + self.push_error(error_msg=error_msg), self.get_event_loop() + ) diff --git a/src/pipecat/services/azure/tts.py b/src/pipecat/services/azure/tts.py index f68694eb5..6e62c73bf 100644 --- a/src/pipecat/services/azure/tts.py +++ b/src/pipecat/services/azure/tts.py @@ -561,9 +561,13 @@ class AzureTTSService(TTSService, AzureBaseTTSService): # User cancellation (from interruption) is expected, not an error if reason == CancellationReason.CancelledByUser: logger.debug(f"{self}: Speech synthesis canceled by user (interruption)") + self._audio_queue.put_nowait(None) else: - logger.warning(f"{self}: Speech synthesis canceled: {reason}") - self._audio_queue.put_nowait(None) + details = evt.result.cancellation_details + error_msg = f"Azure TTS synthesis canceled: {reason}" + if details.error_details: + error_msg += f" - {details.error_details}" + self._audio_queue.put_nowait(Exception(error_msg)) async def push_frame(self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM): """Push a frame and handle state changes. @@ -676,6 +680,9 @@ class AzureTTSService(TTSService, AzureBaseTTSService): chunk = await self._audio_queue.get() if chunk is None: # End of stream break + if isinstance(chunk, Exception): # Error from _handle_canceled + yield ErrorFrame(error=str(chunk)) + break if self._first_chunk: await self.stop_ttfb_metrics() From 58aa8e1ba56a4817fcccffd234a8533257ba8cdf Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Sat, 28 Feb 2026 14:05:25 -0500 Subject: [PATCH 176/189] Add changelog for #3881 --- changelog/3881.added.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog/3881.added.md diff --git a/changelog/3881.added.md b/changelog/3881.added.md new file mode 100644 index 000000000..694e052ce --- /dev/null +++ b/changelog/3881.added.md @@ -0,0 +1 @@ +- Added `StartupTimingObserver` for measuring how long each processor's `start()` method takes during pipeline startup. Useful for diagnosing cold start slowness and identifying initialization bottlenecks. From 08360668984a9ae158bae566d390c2a67d17b5d5 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Sun, 1 Mar 2026 08:45:59 -0500 Subject: [PATCH 177/189] Add ClientConnectedFrame and transport readiness timing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce ClientConnectedFrame (SystemFrame) pushed by all transports when a client connects. StartupTimingObserver uses this to measure transport readiness — the time from StartFrame to first client connection — via a new on_transport_readiness_measured event. --- .../foundational/29-turn-tracking-observer.py | 4 + src/pipecat/frames/frames.py | 11 +++ .../observers/startup_timing_observer.py | 80 ++++++++++++++----- src/pipecat/transports/daily/transport.py | 3 + src/pipecat/transports/heygen/transport.py | 3 + src/pipecat/transports/livekit/transport.py | 3 + .../transports/smallwebrtc/transport.py | 3 + src/pipecat/transports/tavus/transport.py | 3 + src/pipecat/transports/websocket/fastapi.py | 2 + src/pipecat/transports/websocket/server.py | 4 +- tests/test_startup_timing_observer.py | 76 +++++++++++++++++- 11 files changed, 172 insertions(+), 20 deletions(-) diff --git a/examples/foundational/29-turn-tracking-observer.py b/examples/foundational/29-turn-tracking-observer.py index 3e85ddfb8..ad0b448e9 100644 --- a/examples/foundational/29-turn-tracking-observer.py +++ b/examples/foundational/29-turn-tracking-observer.py @@ -111,6 +111,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): for timing in report.processor_timings: logger.info(f" {timing.processor_name}: {timing.duration_secs:.3f}s") + @startup_observer.event_handler("on_transport_readiness_measured") + async def on_transport_readiness_measured(observer, report): + logger.info(f"Transport readiness: {report.readiness_secs:.3f}s") + turn_observer = task.turn_tracking_observer if turn_observer: diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index 126f3c001..b5e368c53 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -1910,6 +1910,17 @@ class StopFrame(ControlFrame, UninterruptibleFrame): pass +@dataclass +class ClientConnectedFrame(SystemFrame): + """Frame indicating that a client has connected to the transport. + + Pushed downstream by the input transport when a client (participant) + connects. Used by observers to measure transport readiness timing. + """ + + pass + + @dataclass class OutputTransportReadyFrame(ControlFrame): """Frame indicating that the output transport is ready. diff --git a/src/pipecat/observers/startup_timing_observer.py b/src/pipecat/observers/startup_timing_observer.py index 0f3ad0b7a..d6b1c8fa9 100644 --- a/src/pipecat/observers/startup_timing_observer.py +++ b/src/pipecat/observers/startup_timing_observer.py @@ -12,6 +12,10 @@ when a ``StartFrame`` arrives at a processor (``on_process_frame``) versus when it leaves (``on_push_frame``), giving the exact ``start()`` duration for each processor in the pipeline. +It also measures transport readiness — the time from ``StartFrame`` to the +first ``ClientConnectedFrame`` — via a separate ``on_transport_readiness_measured`` +event. + Example:: observer = StartupTimingObserver() @@ -21,6 +25,10 @@ Example:: for t in report.processor_timings: print(f"{t.processor_name}: {t.duration_secs:.3f}s") + @observer.event_handler("on_transport_readiness_measured") + async def on_readiness(observer, report): + print(f"Transport ready in {report.readiness_secs:.3f}s") + task = PipelineTask(pipeline, observers=[observer]) """ @@ -29,11 +37,11 @@ from typing import Dict, List, Optional, Tuple, Type from loguru import logger -from pipecat.frames.frames import StartFrame +from pipecat.frames.frames import ClientConnectedFrame, StartFrame from pipecat.observers.base_observer import BaseObserver, FrameProcessed, FramePushed from pipecat.pipeline.base_pipeline import BasePipeline from pipecat.pipeline.pipeline import PipelineSink, PipelineSource -from pipecat.processors.frame_processor import FrameDirection, FrameProcessor +from pipecat.processors.frame_processor import FrameProcessor # Internal pipeline types excluded from tracking by default. _INTERNAL_TYPES = (PipelineSink, PipelineSource, BasePipeline) @@ -65,6 +73,17 @@ class StartupTimingReport: processor_timings: List[ProcessorStartupTiming] = field(default_factory=list) +@dataclass +class TransportReadinessReport: + """Time from pipeline start to first client connection. + + Parameters: + readiness_secs: Seconds from StartFrame to first ClientConnectedFrame. + """ + + readiness_secs: float + + class StartupTimingObserver(BaseObserver): """Observer that measures processor startup times during pipeline initialization. @@ -73,6 +92,10 @@ class StartupTimingObserver(BaseObserver): pushed downstream. This captures WebSocket connections, API authentication, model loading, and other initialization work. + Also measures transport readiness — the time from ``StartFrame`` to the + first ``ClientConnectedFrame`` — indicating how long it takes for a client + to connect after the pipeline starts. + By default, internal pipeline processors (``PipelineSource``, ``PipelineSink``, ``Pipeline``) are excluded from the report. Pass ``processor_types`` to measure only specific types. @@ -81,6 +104,8 @@ class StartupTimingObserver(BaseObserver): - on_startup_timing_report: Called once after startup completes with the full timing report. + - on_transport_readiness_measured: Called once when the first client connects with the + transport readiness timing. Example:: @@ -93,6 +118,10 @@ class StartupTimingObserver(BaseObserver): for t in report.processor_timings: logger.info(f"{t.processor_name}: {t.duration_secs:.3f}s") + @observer.event_handler("on_transport_readiness_measured") + async def on_readiness(observer, report): + logger.info(f"Transport ready in {report.readiness_secs:.3f}s") + task = PipelineTask(pipeline, observers=[observer]) Args: @@ -125,10 +154,17 @@ class StartupTimingObserver(BaseObserver): # Lock onto the first StartFrame we see (by frame ID). self._start_frame_id: Optional[str] = None - # Whether we've already emitted the report. - self._reported = False + # Whether we've already emitted the startup timing report. + self._startup_timing_reported = False + + # Whether we've already measured transport readiness. + self._transport_readiness_measured = False + + # Timestamp (ns) when we first see a StartFrame arrive at a processor. + self._start_frame_arrival_ns: Optional[int] = None self._register_event_handler("on_startup_timing_report") + self._register_event_handler("on_transport_readiness_measured") def _should_track(self, processor: FrameProcessor) -> bool: """Check if a processor should be tracked for timing. @@ -153,18 +189,16 @@ class StartupTimingObserver(BaseObserver): Args: data: The frame processing event data. """ - if self._reported: + if self._startup_timing_reported: return if not isinstance(data.frame, StartFrame): return - if data.direction != FrameDirection.DOWNSTREAM: - return - # Lock onto the first StartFrame. if self._start_frame_id is None: self._start_frame_id = data.frame.id + self._start_frame_arrival_ns = data.timestamp elif data.frame.id != self._start_frame_id: return @@ -182,18 +216,21 @@ class StartupTimingObserver(BaseObserver): async def on_push_frame(self, data: FramePushed): """Record when a StartFrame leaves a processor and compute the delta. + Also handles ``ClientConnectedFrame`` to measure transport readiness. + Args: data: The frame push event data. """ - if self._reported: + if isinstance(data.frame, ClientConnectedFrame): + await self._handle_client_connected(data) + return + + if self._startup_timing_reported: return if not isinstance(data.frame, StartFrame): return - if data.direction != FrameDirection.DOWNSTREAM: - return - if self._start_frame_id is not None and data.frame.id != self._start_frame_id: return @@ -212,11 +249,22 @@ class StartupTimingObserver(BaseObserver): ) ) + async def _handle_client_connected(self, data: FramePushed): + """Measure transport readiness on first client connection.""" + if self._transport_readiness_measured or self._start_frame_arrival_ns is None: + return + + self._transport_readiness_measured = True + delta_ns = data.timestamp - self._start_frame_arrival_ns + readiness_secs = delta_ns / 1e9 + report = TransportReadinessReport(readiness_secs=readiness_secs) + await self._call_event_handler("on_transport_readiness_measured", report) + async def _emit_report(self): """Build and emit the startup timing report.""" - if self._reported: + if self._startup_timing_reported: return - self._reported = True + self._startup_timing_reported = True total = sum(t.duration_secs for t in self._timings) @@ -225,8 +273,4 @@ class StartupTimingObserver(BaseObserver): processor_timings=self._timings, ) - logger.debug(f"Pipeline startup completed in {total:.3f}s") - for t in self._timings: - logger.debug(f" {t.processor_name}: {t.duration_secs:.3f}s") - await self._call_event_handler("on_startup_timing_report", report) diff --git a/src/pipecat/transports/daily/transport.py b/src/pipecat/transports/daily/transport.py index 9575fd51b..cb24b23fa 100644 --- a/src/pipecat/transports/daily/transport.py +++ b/src/pipecat/transports/daily/transport.py @@ -25,6 +25,7 @@ from pydantic import BaseModel from pipecat.audio.vad.vad_analyzer import VADAnalyzer, VADParams from pipecat.frames.frames import ( CancelFrame, + ClientConnectedFrame, DataFrame, EndFrame, Frame, @@ -2716,6 +2717,8 @@ class DailyTransport(BaseTransport): await self._call_event_handler("on_participant_joined", participant) # Also call on_client_connected for compatibility with other transports await self._call_event_handler("on_client_connected", participant) + if self._input: + await self._input.push_frame(ClientConnectedFrame()) async def _on_participant_left(self, participant, reason): """Handle participant left events.""" diff --git a/src/pipecat/transports/heygen/transport.py b/src/pipecat/transports/heygen/transport.py index dbeded3e5..77ccda09f 100644 --- a/src/pipecat/transports/heygen/transport.py +++ b/src/pipecat/transports/heygen/transport.py @@ -26,6 +26,7 @@ from pipecat.frames.frames import ( BotStartedSpeakingFrame, BotStoppedSpeakingFrame, CancelFrame, + ClientConnectedFrame, EndFrame, Frame, InputAudioRawFrame, @@ -387,6 +388,8 @@ class HeyGenTransport(BaseTransport): async def _on_client_connected(self, participant: Any): """Handle client connected events.""" await self._call_event_handler("on_client_connected", participant) + if self._input: + await self._input.push_frame(ClientConnectedFrame()) async def _on_client_disconnected(self, participant: Any): """Handle client disconnected events.""" diff --git a/src/pipecat/transports/livekit/transport.py b/src/pipecat/transports/livekit/transport.py index 1902e7cd3..e4435016c 100644 --- a/src/pipecat/transports/livekit/transport.py +++ b/src/pipecat/transports/livekit/transport.py @@ -24,6 +24,7 @@ from pipecat.audio.vad.vad_analyzer import VADAnalyzer from pipecat.frames.frames import ( AudioRawFrame, CancelFrame, + ClientConnectedFrame, EndFrame, ImageRawFrame, OutputAudioRawFrame, @@ -1143,6 +1144,8 @@ class LiveKitTransport(BaseTransport): async def _on_participant_connected(self, participant_id: str): """Handle participant connected events.""" await self._call_event_handler("on_participant_connected", participant_id) + if self._input: + await self._input.push_frame(ClientConnectedFrame()) async def _on_participant_disconnected(self, participant_id: str): """Handle participant disconnected events.""" diff --git a/src/pipecat/transports/smallwebrtc/transport.py b/src/pipecat/transports/smallwebrtc/transport.py index dc91588a3..36f883278 100644 --- a/src/pipecat/transports/smallwebrtc/transport.py +++ b/src/pipecat/transports/smallwebrtc/transport.py @@ -23,6 +23,7 @@ from pydantic import BaseModel from pipecat.frames.frames import ( CancelFrame, + ClientConnectedFrame, EndFrame, Frame, InputAudioRawFrame, @@ -964,6 +965,8 @@ class SmallWebRTCTransport(BaseTransport): async def _on_client_connected(self, webrtc_connection): """Handle client connection events.""" await self._call_event_handler("on_client_connected", webrtc_connection) + if self._input: + await self._input.push_frame(ClientConnectedFrame()) async def _on_client_disconnected(self, webrtc_connection): """Handle client disconnection events.""" diff --git a/src/pipecat/transports/tavus/transport.py b/src/pipecat/transports/tavus/transport.py index dd63cb790..114f33ca0 100644 --- a/src/pipecat/transports/tavus/transport.py +++ b/src/pipecat/transports/tavus/transport.py @@ -22,6 +22,7 @@ from pydantic import BaseModel from pipecat.frames.frames import ( CancelFrame, + ClientConnectedFrame, EndFrame, Frame, InputAudioRawFrame, @@ -786,6 +787,8 @@ class TavusTransport(BaseTransport): async def _on_client_connected(self, participant: Any): """Handle client connected events.""" await self._call_event_handler("on_client_connected", participant) + if self._input: + await self._input.push_frame(ClientConnectedFrame()) async def _on_client_disconnected(self, participant: Any): """Handle client disconnected events.""" diff --git a/src/pipecat/transports/websocket/fastapi.py b/src/pipecat/transports/websocket/fastapi.py index f52123e52..0fde2b9ae 100644 --- a/src/pipecat/transports/websocket/fastapi.py +++ b/src/pipecat/transports/websocket/fastapi.py @@ -23,6 +23,7 @@ from pydantic import BaseModel from pipecat.frames.frames import ( CancelFrame, + ClientConnectedFrame, EndFrame, Frame, InputAudioRawFrame, @@ -260,6 +261,7 @@ class FastAPIWebsocketInputTransport(BaseInputTransport): if not self._monitor_websocket_task and self._params.session_timeout: self._monitor_websocket_task = self.create_task(self._monitor_websocket()) await self._client.trigger_client_connected() + await self.push_frame(ClientConnectedFrame()) if not self._receive_task: self._receive_task = self.create_task(self._receive_messages()) await self.set_transport_ready(frame) diff --git a/src/pipecat/transports/websocket/server.py b/src/pipecat/transports/websocket/server.py index e5f628fa4..fa3645d37 100644 --- a/src/pipecat/transports/websocket/server.py +++ b/src/pipecat/transports/websocket/server.py @@ -22,11 +22,11 @@ from pydantic import BaseModel from pipecat.frames.frames import ( CancelFrame, + ClientConnectedFrame, EndFrame, Frame, InputAudioRawFrame, InputTransportMessageFrame, - InputTransportMessageUrgentFrame, InterruptionFrame, OutputAudioRawFrame, OutputTransportMessageFrame, @@ -504,6 +504,8 @@ class WebsocketServerTransport(BaseTransport): if self._output: await self._output.set_client_connection(websocket) await self._call_event_handler("on_client_connected", websocket) + if self._input: + await self._input.push_frame(ClientConnectedFrame()) else: logger.error("A WebsocketServerTransport output is missing in the pipeline") diff --git a/tests/test_startup_timing_observer.py b/tests/test_startup_timing_observer.py index e3cd7c2b7..efabf5bc7 100644 --- a/tests/test_startup_timing_observer.py +++ b/tests/test_startup_timing_observer.py @@ -1,10 +1,11 @@ import asyncio import unittest -from pipecat.frames.frames import Frame, StartFrame, TextFrame +from pipecat.frames.frames import ClientConnectedFrame, Frame, StartFrame, TextFrame from pipecat.observers.startup_timing_observer import ( StartupTimingObserver, StartupTimingReport, + TransportReadinessReport, ) from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.tests.utils import run_test @@ -181,6 +182,79 @@ class TestStartupTimingObserver(unittest.IsolatedAsyncioTestCase): f"Internal processor {t.processor_name} should be excluded by default", ) + async def test_transport_readiness_measured(self): + """Test that ClientConnectedFrame after startup emits on_transport_readiness_measured.""" + observer = StartupTimingObserver() + processor = FastProcessor() + + readiness_reports = [] + + @observer.event_handler("on_transport_readiness_measured") + async def on_readiness(obs, report): + readiness_reports.append(report) + + frames_to_send = [ClientConnectedFrame(), TextFrame(text="hello")] + + await run_test( + processor, + frames_to_send=frames_to_send, + expected_down_frames=[ClientConnectedFrame, TextFrame], + observers=[observer], + ) + + self.assertEqual(len(readiness_reports), 1) + report = readiness_reports[0] + self.assertIsInstance(report, TransportReadinessReport) + self.assertGreater(report.readiness_secs, 0) + + async def test_transport_readiness_only_first(self): + """Test that only the first ClientConnectedFrame triggers the event.""" + observer = StartupTimingObserver() + processor = FastProcessor() + + readiness_reports = [] + + @observer.event_handler("on_transport_readiness_measured") + async def on_readiness(obs, report): + readiness_reports.append(report) + + frames_to_send = [ + ClientConnectedFrame(), + ClientConnectedFrame(), + TextFrame(text="hello"), + ] + + await run_test( + processor, + frames_to_send=frames_to_send, + expected_down_frames=[ClientConnectedFrame, ClientConnectedFrame, TextFrame], + observers=[observer], + ) + + self.assertEqual(len(readiness_reports), 1) + + async def test_transport_readiness_without_start_frame(self): + """Test that ClientConnectedFrame before StartFrame does not crash.""" + observer = StartupTimingObserver() + + # Directly call on_push_frame with a ClientConnectedFrame before any + # StartFrame has been seen. This should be a no-op (no crash). + from pipecat.observers.base_observer import FramePushed + + processor = FastProcessor() + destination = FastProcessor() + data = FramePushed( + source=processor, + destination=destination, + frame=ClientConnectedFrame(), + direction=FrameDirection.DOWNSTREAM, + timestamp=1000, + ) + await observer.on_push_frame(data) + + # No event should have been emitted. + self.assertFalse(observer._transport_readiness_measured) + if __name__ == "__main__": unittest.main() From de87894778e541b0f174b0be639e50e0825c2887 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Sun, 1 Mar 2026 08:47:40 -0500 Subject: [PATCH 178/189] Update changelog for #3881 --- changelog/3881.added.2.md | 1 + changelog/3881.added.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog/3881.added.2.md diff --git a/changelog/3881.added.2.md b/changelog/3881.added.2.md new file mode 100644 index 000000000..a5bda94c1 --- /dev/null +++ b/changelog/3881.added.2.md @@ -0,0 +1 @@ +- Added `ClientConnectedFrame`, a new `SystemFrame` pushed by all transports (Daily, LiveKit, FastAPI WebSocket, WebSocket Server, SmallWebRTC, HeyGen, Tavus) when a client connects. Enables observers to track transport readiness timing. diff --git a/changelog/3881.added.md b/changelog/3881.added.md index 694e052ce..cbf6d0293 100644 --- a/changelog/3881.added.md +++ b/changelog/3881.added.md @@ -1 +1 @@ -- Added `StartupTimingObserver` for measuring how long each processor's `start()` method takes during pipeline startup. Useful for diagnosing cold start slowness and identifying initialization bottlenecks. +- Added `StartupTimingObserver` for measuring how long each processor's `start()` method takes during pipeline startup. Also measures transport readiness — the time from `StartFrame` to first client connection — via the `on_transport_readiness_measured` event. Useful for diagnosing cold start slowness and identifying initialization bottlenecks. From 68e8732e72ba5f20ec3b03129f97d4fa2271b190 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Mon, 2 Mar 2026 10:41:05 -0500 Subject: [PATCH 179/189] Add BotConnectedFrame and on_transport_timing_report event Add BotConnectedFrame (SystemFrame) pushed by SFU transports (Daily, LiveKit, HeyGen, Tavus) when the bot joins the room. Replace the on_transport_readiness_measured event with on_transport_timing_report which includes both bot_connected_secs and client_connected_secs. --- changelog/3881.added.3.md | 1 + .../foundational/29-turn-tracking-observer.py | 8 +- src/pipecat/frames/frames.py | 12 ++ .../observers/startup_timing_observer.py | 93 +++++++++----- src/pipecat/services/heygen/client.py | 7 +- src/pipecat/services/heygen/video.py | 5 + src/pipecat/services/tavus/video.py | 5 + src/pipecat/transports/daily/transport.py | 3 + src/pipecat/transports/heygen/transport.py | 9 ++ src/pipecat/transports/livekit/transport.py | 3 + src/pipecat/transports/tavus/transport.py | 12 ++ tests/test_startup_timing_observer.py | 114 +++++++++++++++--- 12 files changed, 215 insertions(+), 57 deletions(-) create mode 100644 changelog/3881.added.3.md diff --git a/changelog/3881.added.3.md b/changelog/3881.added.3.md new file mode 100644 index 000000000..cad26e876 --- /dev/null +++ b/changelog/3881.added.3.md @@ -0,0 +1 @@ +Added `BotConnectedFrame` for SFU transports and `on_transport_timing_report` event to `StartupTimingObserver` with bot and client connection timing. diff --git a/examples/foundational/29-turn-tracking-observer.py b/examples/foundational/29-turn-tracking-observer.py index ad0b448e9..4af28f1ed 100644 --- a/examples/foundational/29-turn-tracking-observer.py +++ b/examples/foundational/29-turn-tracking-observer.py @@ -111,9 +111,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): for timing in report.processor_timings: logger.info(f" {timing.processor_name}: {timing.duration_secs:.3f}s") - @startup_observer.event_handler("on_transport_readiness_measured") - async def on_transport_readiness_measured(observer, report): - logger.info(f"Transport readiness: {report.readiness_secs:.3f}s") + @startup_observer.event_handler("on_transport_timing_report") + async def on_transport_timing_report(observer, report): + if report.bot_connected_secs is not None: + logger.info(f"Bot connected: {report.bot_connected_secs:.3f}s") + logger.info(f"Client connected: {report.client_connected_secs:.3f}s") turn_observer = task.turn_tracking_observer if turn_observer: diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index b5e368c53..86778e564 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -1910,6 +1910,18 @@ class StopFrame(ControlFrame, UninterruptibleFrame): pass +@dataclass +class BotConnectedFrame(SystemFrame): + """Frame indicating the bot has connected to the transport service. + + Pushed downstream by SFU transports (Daily, LiveKit, HeyGen, Tavus) + when the bot successfully joins the room. Non-SFU transports do not + emit this frame. + """ + + pass + + @dataclass class ClientConnectedFrame(SystemFrame): """Frame indicating that a client has connected to the transport. diff --git a/src/pipecat/observers/startup_timing_observer.py b/src/pipecat/observers/startup_timing_observer.py index d6b1c8fa9..555a10cb0 100644 --- a/src/pipecat/observers/startup_timing_observer.py +++ b/src/pipecat/observers/startup_timing_observer.py @@ -12,9 +12,9 @@ when a ``StartFrame`` arrives at a processor (``on_process_frame``) versus when it leaves (``on_push_frame``), giving the exact ``start()`` duration for each processor in the pipeline. -It also measures transport readiness — the time from ``StartFrame`` to the -first ``ClientConnectedFrame`` — via a separate ``on_transport_readiness_measured`` -event. +It also measures transport timing — the time from ``StartFrame`` to the +first ``BotConnectedFrame`` (SFU transports only) and ``ClientConnectedFrame`` +— via a separate ``on_transport_timing_report`` event. Example:: @@ -25,9 +25,11 @@ Example:: for t in report.processor_timings: print(f"{t.processor_name}: {t.duration_secs:.3f}s") - @observer.event_handler("on_transport_readiness_measured") - async def on_readiness(observer, report): - print(f"Transport ready in {report.readiness_secs:.3f}s") + @observer.event_handler("on_transport_timing_report") + async def on_transport(observer, report): + if report.bot_connected_secs is not None: + print(f"Bot connected in {report.bot_connected_secs:.3f}s") + print(f"Client connected in {report.client_connected_secs:.3f}s") task = PipelineTask(pipeline, observers=[observer]) """ @@ -35,9 +37,7 @@ Example:: from dataclasses import dataclass, field from typing import Dict, List, Optional, Tuple, Type -from loguru import logger - -from pipecat.frames.frames import ClientConnectedFrame, StartFrame +from pipecat.frames.frames import BotConnectedFrame, ClientConnectedFrame, StartFrame from pipecat.observers.base_observer import BaseObserver, FrameProcessed, FramePushed from pipecat.pipeline.base_pipeline import BasePipeline from pipecat.pipeline.pipeline import PipelineSink, PipelineSource @@ -74,14 +74,17 @@ class StartupTimingReport: @dataclass -class TransportReadinessReport: - """Time from pipeline start to first client connection. +class TransportTimingReport: + """Time from pipeline start to transport connection milestones. Parameters: - readiness_secs: Seconds from StartFrame to first ClientConnectedFrame. + bot_connected_secs: Seconds from StartFrame to first BotConnectedFrame + (only set for SFU transports). + client_connected_secs: Seconds from StartFrame to first ClientConnectedFrame. """ - readiness_secs: float + bot_connected_secs: Optional[float] = None + client_connected_secs: Optional[float] = None class StartupTimingObserver(BaseObserver): @@ -92,9 +95,13 @@ class StartupTimingObserver(BaseObserver): pushed downstream. This captures WebSocket connections, API authentication, model loading, and other initialization work. - Also measures transport readiness — the time from ``StartFrame`` to the - first ``ClientConnectedFrame`` — indicating how long it takes for a client - to connect after the pipeline starts. + Also measures transport timing, the time from ``StartFrame`` to connection + milestones: + + - ``bot_connected_secs``: When the bot joins the transport room + (SFU transports only, triggered by ``BotConnectedFrame``). + - ``client_connected_secs``: When a remote participant connects + (triggered by ``ClientConnectedFrame``). By default, internal pipeline processors (``PipelineSource``, ``PipelineSink``, ``Pipeline``) are excluded from the report. Pass ``processor_types`` to @@ -104,8 +111,9 @@ class StartupTimingObserver(BaseObserver): - on_startup_timing_report: Called once after startup completes with the full timing report. - - on_transport_readiness_measured: Called once when the first client connects with the - transport readiness timing. + - on_transport_timing_report: Called once when the first client connects with a + TransportTimingReport containing client_connected_secs and bot_connected_secs + (if available). Example:: @@ -118,9 +126,11 @@ class StartupTimingObserver(BaseObserver): for t in report.processor_timings: logger.info(f"{t.processor_name}: {t.duration_secs:.3f}s") - @observer.event_handler("on_transport_readiness_measured") - async def on_readiness(observer, report): - logger.info(f"Transport ready in {report.readiness_secs:.3f}s") + @observer.event_handler("on_transport_timing_report") + async def on_transport(observer, report): + if report.bot_connected_secs is not None: + logger.info(f"Bot connected in {report.bot_connected_secs:.3f}s") + logger.info(f"Client connected in {report.client_connected_secs:.3f}s") task = PipelineTask(pipeline, observers=[observer]) @@ -157,14 +167,17 @@ class StartupTimingObserver(BaseObserver): # Whether we've already emitted the startup timing report. self._startup_timing_reported = False - # Whether we've already measured transport readiness. - self._transport_readiness_measured = False + # Whether we've already measured transport timing. + self._transport_timing_reported = False # Timestamp (ns) when we first see a StartFrame arrive at a processor. self._start_frame_arrival_ns: Optional[int] = None + # Bot connected timing (stored for inclusion in the transport report). + self._bot_connected_secs: Optional[float] = None + self._register_event_handler("on_startup_timing_report") - self._register_event_handler("on_transport_readiness_measured") + self._register_event_handler("on_transport_timing_report") def _should_track(self, processor: FrameProcessor) -> bool: """Check if a processor should be tracked for timing. @@ -216,11 +229,16 @@ class StartupTimingObserver(BaseObserver): async def on_push_frame(self, data: FramePushed): """Record when a StartFrame leaves a processor and compute the delta. - Also handles ``ClientConnectedFrame`` to measure transport readiness. + Also handles ``BotConnectedFrame`` and ``ClientConnectedFrame`` to + measure transport timing. Args: data: The frame push event data. """ + if isinstance(data.frame, BotConnectedFrame): + self._handle_bot_connected(data) + return + if isinstance(data.frame, ClientConnectedFrame): await self._handle_client_connected(data) return @@ -249,16 +267,27 @@ class StartupTimingObserver(BaseObserver): ) ) - async def _handle_client_connected(self, data: FramePushed): - """Measure transport readiness on first client connection.""" - if self._transport_readiness_measured or self._start_frame_arrival_ns is None: + def _handle_bot_connected(self, data: FramePushed): + """Record bot connected timing on first BotConnectedFrame.""" + if self._bot_connected_secs is not None or self._start_frame_arrival_ns is None: return - self._transport_readiness_measured = True delta_ns = data.timestamp - self._start_frame_arrival_ns - readiness_secs = delta_ns / 1e9 - report = TransportReadinessReport(readiness_secs=readiness_secs) - await self._call_event_handler("on_transport_readiness_measured", report) + self._bot_connected_secs = delta_ns / 1e9 + + async def _handle_client_connected(self, data: FramePushed): + """Emit transport timing report on first ClientConnectedFrame.""" + if self._transport_timing_reported or self._start_frame_arrival_ns is None: + return + + self._transport_timing_reported = True + delta_ns = data.timestamp - self._start_frame_arrival_ns + client_connected_secs = delta_ns / 1e9 + report = TransportTimingReport( + bot_connected_secs=self._bot_connected_secs, + client_connected_secs=client_connected_secs, + ) + await self._call_event_handler("on_transport_timing_report", report) async def _emit_report(self): """Build and emit the startup timing report.""" diff --git a/src/pipecat/services/heygen/client.py b/src/pipecat/services/heygen/client.py index 4018d3858..6d45d6114 100644 --- a/src/pipecat/services/heygen/client.py +++ b/src/pipecat/services/heygen/client.py @@ -62,10 +62,12 @@ class HeyGenCallbacks(BaseModel): """Callback handlers for HeyGen events. Parameters: - on_participant_connected: Called when a participant connects - on_participant_disconnected: Called when a participant disconnects + on_connected: Called when the bot connects to the LiveKit room. + on_participant_connected: Called when a participant connects. + on_participant_disconnected: Called when a participant disconnects. """ + on_connected: Callable[[], Awaitable[None]] on_participant_connected: Callable[[str], Awaitable[None]] on_participant_disconnected: Callable[[str], Awaitable[None]] @@ -251,6 +253,7 @@ class HeyGenClient: logger.debug(f"HeyGenClient send_interval: {self._send_interval}") await self._ws_connect() await self._livekit_connect() + self._call_event_callback(self._callbacks.on_connected) async def stop(self) -> None: """Stop the client and terminate all connections. diff --git a/src/pipecat/services/heygen/video.py b/src/pipecat/services/heygen/video.py index b97f4a5ed..7f3624f35 100644 --- a/src/pipecat/services/heygen/video.py +++ b/src/pipecat/services/heygen/video.py @@ -128,6 +128,7 @@ class HeyGenVideoService(AIService): session_request=self._session_request, service_type=self._service_type, callbacks=HeyGenCallbacks( + on_connected=self._on_connected, on_participant_connected=self._on_participant_connected, on_participant_disconnected=self._on_participant_disconnected, ), @@ -144,6 +145,10 @@ class HeyGenVideoService(AIService): await self._client.cleanup() self._client = None + async def _on_connected(self): + """Handle bot connected to LiveKit room.""" + logger.info("HeyGen bot connected to LiveKit room") + async def _on_participant_connected(self, participant_id: str): """Handle participant connected events.""" logger.info(f"Participant connected {participant_id}") diff --git a/src/pipecat/services/tavus/video.py b/src/pipecat/services/tavus/video.py index d9f259797..8c63ff354 100644 --- a/src/pipecat/services/tavus/video.py +++ b/src/pipecat/services/tavus/video.py @@ -94,6 +94,7 @@ class TavusVideoService(AIService): """ await super().setup(setup) callbacks = TavusCallbacks( + on_joined=self._on_joined, on_participant_joined=self._on_participant_joined, on_participant_left=self._on_participant_left, ) @@ -119,6 +120,10 @@ class TavusVideoService(AIService): await self._client.cleanup() self._client = None + async def _on_joined(self, data): + """Handle bot joined the Daily room.""" + logger.info("Tavus bot joined Daily room") + async def _on_participant_left(self, participant, reason): """Handle participant leaving the session.""" participant_id = participant["id"] diff --git a/src/pipecat/transports/daily/transport.py b/src/pipecat/transports/daily/transport.py index cb24b23fa..97aebe915 100644 --- a/src/pipecat/transports/daily/transport.py +++ b/src/pipecat/transports/daily/transport.py @@ -24,6 +24,7 @@ from pydantic import BaseModel from pipecat.audio.vad.vad_analyzer import VADAnalyzer, VADParams from pipecat.frames.frames import ( + BotConnectedFrame, CancelFrame, ClientConnectedFrame, DataFrame, @@ -2579,6 +2580,8 @@ class DailyTransport(BaseTransport): if error: await self._on_error(f"Unable to start transcription: {error}") await self._call_event_handler("on_joined", data) + if self._input: + await self._input.push_frame(BotConnectedFrame()) async def _on_left(self): """Handle room left events.""" diff --git a/src/pipecat/transports/heygen/transport.py b/src/pipecat/transports/heygen/transport.py index 77ccda09f..d79d0080e 100644 --- a/src/pipecat/transports/heygen/transport.py +++ b/src/pipecat/transports/heygen/transport.py @@ -23,6 +23,7 @@ from loguru import logger from pipecat.frames.frames import ( AudioRawFrame, + BotConnectedFrame, BotStartedSpeakingFrame, BotStoppedSpeakingFrame, CancelFrame, @@ -340,6 +341,7 @@ class HeyGenTransport(BaseTransport): session_request=session_request, service_type=service_type, callbacks=HeyGenCallbacks( + on_connected=self._on_connected, on_participant_connected=self._on_participant_connected, on_participant_disconnected=self._on_participant_disconnected, ), @@ -350,9 +352,16 @@ class HeyGenTransport(BaseTransport): # Register supported handlers. The user will only be able to register # these handlers. + self._register_event_handler("on_connected") self._register_event_handler("on_client_connected") self._register_event_handler("on_client_disconnected") + async def _on_connected(self): + """Handle bot connected to LiveKit room.""" + await self._call_event_handler("on_connected") + if self._input: + await self._input.push_frame(BotConnectedFrame()) + async def _on_participant_disconnected(self, participant_id: str): logger.debug(f"HeyGen participant {participant_id} disconnected") if participant_id != "heygen": diff --git a/src/pipecat/transports/livekit/transport.py b/src/pipecat/transports/livekit/transport.py index e4435016c..7e9c1de35 100644 --- a/src/pipecat/transports/livekit/transport.py +++ b/src/pipecat/transports/livekit/transport.py @@ -23,6 +23,7 @@ from pipecat.audio.utils import create_stream_resampler from pipecat.audio.vad.vad_analyzer import VADAnalyzer from pipecat.frames.frames import ( AudioRawFrame, + BotConnectedFrame, CancelFrame, ClientConnectedFrame, EndFrame, @@ -1132,6 +1133,8 @@ class LiveKitTransport(BaseTransport): async def _on_connected(self): """Handle room connected events.""" await self._call_event_handler("on_connected") + if self._input: + await self._input.push_frame(BotConnectedFrame()) async def _on_disconnected(self): """Handle room disconnected events.""" diff --git a/src/pipecat/transports/tavus/transport.py b/src/pipecat/transports/tavus/transport.py index 114f33ca0..6db44d431 100644 --- a/src/pipecat/transports/tavus/transport.py +++ b/src/pipecat/transports/tavus/transport.py @@ -21,6 +21,7 @@ from loguru import logger from pydantic import BaseModel from pipecat.frames.frames import ( + BotConnectedFrame, CancelFrame, ClientConnectedFrame, EndFrame, @@ -133,10 +134,12 @@ class TavusCallbacks(BaseModel): """Callback handlers for Tavus events. Parameters: + on_joined: Called when the bot joins the Daily room. on_participant_joined: Called when a participant joins the conversation. on_participant_left: Called when a participant leaves the conversation. """ + on_joined: Callable[[Mapping[str, Any]], Awaitable[None]] on_participant_joined: Callable[[Mapping[str, Any]], Awaitable[None]] on_participant_left: Callable[[Mapping[str, Any], str], Awaitable[None]] @@ -271,6 +274,7 @@ class TavusTransportClient: async def _on_joined(self, data): """Handle joined event.""" logger.debug("TavusTransportClient joined!") + await self._callbacks.on_joined(data) async def _on_left(self): """Handle left event.""" @@ -703,6 +707,7 @@ class TavusTransport(BaseTransport): self._params = params callbacks = TavusCallbacks( + on_joined=self._on_joined, on_participant_joined=self._on_participant_joined, on_participant_left=self._on_participant_left, ) @@ -721,9 +726,16 @@ class TavusTransport(BaseTransport): # Register supported handlers. The user will only be able to register # these handlers. + self._register_event_handler("on_joined") self._register_event_handler("on_client_connected") self._register_event_handler("on_client_disconnected") + async def _on_joined(self, data): + """Handle bot joined room event.""" + await self._call_event_handler("on_joined", data) + if self._input: + await self._input.push_frame(BotConnectedFrame()) + async def _on_participant_left(self, participant, reason): """Handle participant left events.""" persona_name = await self._client.get_persona_name() diff --git a/tests/test_startup_timing_observer.py b/tests/test_startup_timing_observer.py index efabf5bc7..3c89b9ca3 100644 --- a/tests/test_startup_timing_observer.py +++ b/tests/test_startup_timing_observer.py @@ -1,11 +1,17 @@ import asyncio import unittest -from pipecat.frames.frames import ClientConnectedFrame, Frame, StartFrame, TextFrame +from pipecat.frames.frames import ( + BotConnectedFrame, + ClientConnectedFrame, + Frame, + StartFrame, + TextFrame, +) from pipecat.observers.startup_timing_observer import ( StartupTimingObserver, StartupTimingReport, - TransportReadinessReport, + TransportTimingReport, ) from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.tests.utils import run_test @@ -182,16 +188,16 @@ class TestStartupTimingObserver(unittest.IsolatedAsyncioTestCase): f"Internal processor {t.processor_name} should be excluded by default", ) - async def test_transport_readiness_measured(self): - """Test that ClientConnectedFrame after startup emits on_transport_readiness_measured.""" + async def test_transport_timing_client_only(self): + """Test that ClientConnectedFrame emits on_transport_timing_report.""" observer = StartupTimingObserver() processor = FastProcessor() - readiness_reports = [] + transport_reports = [] - @observer.event_handler("on_transport_readiness_measured") - async def on_readiness(obs, report): - readiness_reports.append(report) + @observer.event_handler("on_transport_timing_report") + async def on_transport(obs, report): + transport_reports.append(report) frames_to_send = [ClientConnectedFrame(), TextFrame(text="hello")] @@ -202,21 +208,22 @@ class TestStartupTimingObserver(unittest.IsolatedAsyncioTestCase): observers=[observer], ) - self.assertEqual(len(readiness_reports), 1) - report = readiness_reports[0] - self.assertIsInstance(report, TransportReadinessReport) - self.assertGreater(report.readiness_secs, 0) + self.assertEqual(len(transport_reports), 1) + report = transport_reports[0] + self.assertIsInstance(report, TransportTimingReport) + self.assertGreater(report.client_connected_secs, 0) + self.assertIsNone(report.bot_connected_secs) - async def test_transport_readiness_only_first(self): + async def test_transport_timing_only_first_client(self): """Test that only the first ClientConnectedFrame triggers the event.""" observer = StartupTimingObserver() processor = FastProcessor() - readiness_reports = [] + transport_reports = [] - @observer.event_handler("on_transport_readiness_measured") - async def on_readiness(obs, report): - readiness_reports.append(report) + @observer.event_handler("on_transport_timing_report") + async def on_transport(obs, report): + transport_reports.append(report) frames_to_send = [ ClientConnectedFrame(), @@ -231,9 +238,9 @@ class TestStartupTimingObserver(unittest.IsolatedAsyncioTestCase): observers=[observer], ) - self.assertEqual(len(readiness_reports), 1) + self.assertEqual(len(transport_reports), 1) - async def test_transport_readiness_without_start_frame(self): + async def test_transport_timing_without_start_frame(self): """Test that ClientConnectedFrame before StartFrame does not crash.""" observer = StartupTimingObserver() @@ -253,7 +260,74 @@ class TestStartupTimingObserver(unittest.IsolatedAsyncioTestCase): await observer.on_push_frame(data) # No event should have been emitted. - self.assertFalse(observer._transport_readiness_measured) + self.assertFalse(observer._transport_timing_reported) + + async def test_bot_and_client_connected(self): + """Test that BotConnectedFrame timing is included in the transport report.""" + observer = StartupTimingObserver() + processor = FastProcessor() + + transport_reports = [] + + @observer.event_handler("on_transport_timing_report") + async def on_transport(obs, report): + transport_reports.append(report) + + frames_to_send = [ + BotConnectedFrame(), + ClientConnectedFrame(), + TextFrame(text="hello"), + ] + + await run_test( + processor, + frames_to_send=frames_to_send, + expected_down_frames=[BotConnectedFrame, ClientConnectedFrame, TextFrame], + observers=[observer], + ) + + self.assertEqual(len(transport_reports), 1) + report = transport_reports[0] + self.assertGreater(report.client_connected_secs, 0) + self.assertIsNotNone(report.bot_connected_secs) + self.assertGreater(report.bot_connected_secs, 0) + + # Client connected should be >= bot connected. + self.assertGreaterEqual(report.client_connected_secs, report.bot_connected_secs) + + async def test_bot_connected_only_first(self): + """Test that only the first BotConnectedFrame is recorded.""" + observer = StartupTimingObserver() + processor = FastProcessor() + + transport_reports = [] + + @observer.event_handler("on_transport_timing_report") + async def on_transport(obs, report): + transport_reports.append(report) + + frames_to_send = [ + BotConnectedFrame(), + BotConnectedFrame(), + ClientConnectedFrame(), + TextFrame(text="hello"), + ] + + await run_test( + processor, + frames_to_send=frames_to_send, + expected_down_frames=[ + BotConnectedFrame, + BotConnectedFrame, + ClientConnectedFrame, + TextFrame, + ], + observers=[observer], + ) + + # Only one transport report, with bot timing from first frame. + self.assertEqual(len(transport_reports), 1) + self.assertIsNotNone(transport_reports[0].bot_connected_secs) if __name__ == "__main__": From 75669b12a2a4a07c396932a0e67afa951733db2c Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Mon, 2 Mar 2026 11:01:26 -0500 Subject: [PATCH 180/189] Convert observer data models to Pydantic BaseModel with timestamps Switch ProcessorStartupTiming, StartupTimingReport, and TransportTimingReport from dataclasses to Pydantic BaseModel. Add start_time (Unix timestamp) fields and wall clock conversion for monotonic observer timestamps. --- .../observers/startup_timing_observer.py | 37 +++++++++++++++---- tests/test_startup_timing_observer.py | 3 ++ 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/src/pipecat/observers/startup_timing_observer.py b/src/pipecat/observers/startup_timing_observer.py index 555a10cb0..6dd574cdc 100644 --- a/src/pipecat/observers/startup_timing_observer.py +++ b/src/pipecat/observers/startup_timing_observer.py @@ -34,9 +34,11 @@ Example:: task = PipelineTask(pipeline, observers=[observer]) """ -from dataclasses import dataclass, field +import time from typing import Dict, List, Optional, Tuple, Type +from pydantic import BaseModel, Field + from pipecat.frames.frames import BotConnectedFrame, ClientConnectedFrame, StartFrame from pipecat.observers.base_observer import BaseObserver, FrameProcessed, FramePushed from pipecat.pipeline.base_pipeline import BasePipeline @@ -47,42 +49,45 @@ from pipecat.processors.frame_processor import FrameProcessor _INTERNAL_TYPES = (PipelineSink, PipelineSource, BasePipeline) -@dataclass -class ProcessorStartupTiming: +class ProcessorStartupTiming(BaseModel): """Startup timing for a single processor. Parameters: processor_name: The name of the processor. + start_time: Unix timestamp when the processor's start() began. duration_secs: How long the processor's start() took, in seconds. """ processor_name: str + start_time: float duration_secs: float -@dataclass -class StartupTimingReport: +class StartupTimingReport(BaseModel): """Report of startup timings for all measured processors. Parameters: + start_time: Unix timestamp when the first processor began starting. total_duration_secs: Total wall-clock time from first to last processor start. processor_timings: Per-processor timing data, in pipeline order. """ + start_time: float total_duration_secs: float - processor_timings: List[ProcessorStartupTiming] = field(default_factory=list) + processor_timings: List[ProcessorStartupTiming] = Field(default_factory=list) -@dataclass -class TransportTimingReport: +class TransportTimingReport(BaseModel): """Time from pipeline start to transport connection milestones. Parameters: + start_time: Unix timestamp of the StartFrame (pipeline start). bot_connected_secs: Seconds from StartFrame to first BotConnectedFrame (only set for SFU transports). client_connected_secs: Seconds from StartFrame to first ClientConnectedFrame. """ + start_time: float bot_connected_secs: Optional[float] = None client_connected_secs: Optional[float] = None @@ -176,9 +181,19 @@ class StartupTimingObserver(BaseObserver): # Bot connected timing (stored for inclusion in the transport report). self._bot_connected_secs: Optional[float] = None + # Wall clock reference for converting monotonic ns to Unix timestamps. + self._wall_clock_ref: Optional[float] = None + self._mono_clock_ref_ns: Optional[int] = None + self._register_event_handler("on_startup_timing_report") self._register_event_handler("on_transport_timing_report") + def _mono_to_wall(self, mono_ns: int) -> float: + """Convert a monotonic nanosecond timestamp to a Unix wall clock time.""" + if self._wall_clock_ref is None or self._mono_clock_ref_ns is None: + return 0.0 + return self._wall_clock_ref + (mono_ns - self._mono_clock_ref_ns) / 1e9 + def _should_track(self, processor: FrameProcessor) -> bool: """Check if a processor should be tracked for timing. @@ -212,6 +227,8 @@ class StartupTimingObserver(BaseObserver): if self._start_frame_id is None: self._start_frame_id = data.frame.id self._start_frame_arrival_ns = data.timestamp + self._wall_clock_ref = time.time() + self._mono_clock_ref_ns = data.timestamp elif data.frame.id != self._start_frame_id: return @@ -263,6 +280,7 @@ class StartupTimingObserver(BaseObserver): self._timings.append( ProcessorStartupTiming( processor_name=processor.name, + start_time=self._mono_to_wall(arrival_ts), duration_secs=duration_secs, ) ) @@ -284,6 +302,7 @@ class StartupTimingObserver(BaseObserver): delta_ns = data.timestamp - self._start_frame_arrival_ns client_connected_secs = delta_ns / 1e9 report = TransportTimingReport( + start_time=self._mono_to_wall(self._start_frame_arrival_ns), bot_connected_secs=self._bot_connected_secs, client_connected_secs=client_connected_secs, ) @@ -296,8 +315,10 @@ class StartupTimingObserver(BaseObserver): self._startup_timing_reported = True total = sum(t.duration_secs for t in self._timings) + start_time = self._timings[0].start_time if self._timings else 0.0 report = StartupTimingReport( + start_time=start_time, total_duration_secs=total, processor_timings=self._timings, ) diff --git a/tests/test_startup_timing_observer.py b/tests/test_startup_timing_observer.py index 3c89b9ca3..2bc246754 100644 --- a/tests/test_startup_timing_observer.py +++ b/tests/test_startup_timing_observer.py @@ -151,9 +151,11 @@ class TestStartupTimingObserver(unittest.IsolatedAsyncioTestCase): report = reports[0] self.assertIsInstance(report, StartupTimingReport) self.assertIsInstance(report.total_duration_secs, float) + self.assertGreater(report.start_time, 0) for timing in report.processor_timings: self.assertIsInstance(timing.processor_name, str) self.assertIsInstance(timing.duration_secs, float) + self.assertGreater(timing.start_time, 0) async def test_excludes_internal_processors(self): """Test that internal pipeline processors are excluded by default.""" @@ -211,6 +213,7 @@ class TestStartupTimingObserver(unittest.IsolatedAsyncioTestCase): self.assertEqual(len(transport_reports), 1) report = transport_reports[0] self.assertIsInstance(report, TransportTimingReport) + self.assertGreater(report.start_time, 0) self.assertGreater(report.client_connected_secs, 0) self.assertIsNone(report.bot_connected_secs) From 193f93c2cec5edd91e30a93e04181c86eaabad83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Mon, 2 Mar 2026 10:16:27 -0800 Subject: [PATCH 181/189] Update Nvidia example to use llama-3.3-70b-instruct model --- examples/foundational/07r-interruptible-nvidia.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/foundational/07r-interruptible-nvidia.py b/examples/foundational/07r-interruptible-nvidia.py index 18e0b5d5f..d3e34c61f 100644 --- a/examples/foundational/07r-interruptible-nvidia.py +++ b/examples/foundational/07r-interruptible-nvidia.py @@ -55,7 +55,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): stt = NvidiaSTTService(api_key=os.getenv("NVIDIA_API_KEY")) llm = NvidiaLLMService( - api_key=os.getenv("NVIDIA_API_KEY"), model="meta/llama-3.1-405b-instruct" + api_key=os.getenv("NVIDIA_API_KEY"), + model="meta/llama-3.3-70b-instruct", ) tts = NvidiaTTSService(api_key=os.getenv("NVIDIA_API_KEY")) From bbbfdfd32143940726662ca729d5d2a79637286c Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Mon, 2 Mar 2026 14:07:34 -0500 Subject: [PATCH 182/189] Replace per-processor start_time with start_offset_secs Use start_offset_secs (offset from StartFrame) on ProcessorStartupTiming instead of a wall-clock timestamp. Reports keep a single start_time anchor for dashboard visualization. Remove _mono_to_wall conversion. --- .../observers/startup_timing_observer.py | 28 ++++++++----------- tests/test_startup_timing_observer.py | 2 +- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/src/pipecat/observers/startup_timing_observer.py b/src/pipecat/observers/startup_timing_observer.py index 6dd574cdc..8233ed2b8 100644 --- a/src/pipecat/observers/startup_timing_observer.py +++ b/src/pipecat/observers/startup_timing_observer.py @@ -54,12 +54,13 @@ class ProcessorStartupTiming(BaseModel): Parameters: processor_name: The name of the processor. - start_time: Unix timestamp when the processor's start() began. + start_offset_secs: Offset in seconds from the StartFrame to when this + processor's start() began. duration_secs: How long the processor's start() took, in seconds. """ processor_name: str - start_time: float + start_offset_secs: float duration_secs: float @@ -181,19 +182,12 @@ class StartupTimingObserver(BaseObserver): # Bot connected timing (stored for inclusion in the transport report). self._bot_connected_secs: Optional[float] = None - # Wall clock reference for converting monotonic ns to Unix timestamps. - self._wall_clock_ref: Optional[float] = None - self._mono_clock_ref_ns: Optional[int] = None + # Wall clock time when the StartFrame was first seen. + self._start_wall_clock: Optional[float] = None self._register_event_handler("on_startup_timing_report") self._register_event_handler("on_transport_timing_report") - def _mono_to_wall(self, mono_ns: int) -> float: - """Convert a monotonic nanosecond timestamp to a Unix wall clock time.""" - if self._wall_clock_ref is None or self._mono_clock_ref_ns is None: - return 0.0 - return self._wall_clock_ref + (mono_ns - self._mono_clock_ref_ns) / 1e9 - def _should_track(self, processor: FrameProcessor) -> bool: """Check if a processor should be tracked for timing. @@ -227,8 +221,7 @@ class StartupTimingObserver(BaseObserver): if self._start_frame_id is None: self._start_frame_id = data.frame.id self._start_frame_arrival_ns = data.timestamp - self._wall_clock_ref = time.time() - self._mono_clock_ref_ns = data.timestamp + self._start_wall_clock = time.time() elif data.frame.id != self._start_frame_id: return @@ -277,10 +270,12 @@ class StartupTimingObserver(BaseObserver): duration_ns = data.timestamp - arrival_ts duration_secs = duration_ns / 1e9 + start_offset_secs = (arrival_ts - self._start_frame_arrival_ns) / 1e9 + self._timings.append( ProcessorStartupTiming( processor_name=processor.name, - start_time=self._mono_to_wall(arrival_ts), + start_offset_secs=start_offset_secs, duration_secs=duration_secs, ) ) @@ -302,7 +297,7 @@ class StartupTimingObserver(BaseObserver): delta_ns = data.timestamp - self._start_frame_arrival_ns client_connected_secs = delta_ns / 1e9 report = TransportTimingReport( - start_time=self._mono_to_wall(self._start_frame_arrival_ns), + start_time=self._start_wall_clock or 0.0, bot_connected_secs=self._bot_connected_secs, client_connected_secs=client_connected_secs, ) @@ -315,10 +310,9 @@ class StartupTimingObserver(BaseObserver): self._startup_timing_reported = True total = sum(t.duration_secs for t in self._timings) - start_time = self._timings[0].start_time if self._timings else 0.0 report = StartupTimingReport( - start_time=start_time, + start_time=self._start_wall_clock or 0.0, total_duration_secs=total, processor_timings=self._timings, ) diff --git a/tests/test_startup_timing_observer.py b/tests/test_startup_timing_observer.py index 2bc246754..6355c6081 100644 --- a/tests/test_startup_timing_observer.py +++ b/tests/test_startup_timing_observer.py @@ -155,7 +155,7 @@ class TestStartupTimingObserver(unittest.IsolatedAsyncioTestCase): for timing in report.processor_timings: self.assertIsInstance(timing.processor_name, str) self.assertIsInstance(timing.duration_secs, float) - self.assertGreater(timing.start_time, 0) + self.assertGreaterEqual(timing.start_offset_secs, 0) async def test_excludes_internal_processors(self): """Test that internal pipeline processors are excluded by default.""" From 0cfd953a900f388a09ca40cedf3f49775f6d1cae Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Mon, 2 Mar 2026 14:15:41 -0500 Subject: [PATCH 183/189] Use _ArrivalInfo dataclass instead of tuple for arrival tracking --- .../observers/startup_timing_observer.py | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/pipecat/observers/startup_timing_observer.py b/src/pipecat/observers/startup_timing_observer.py index 8233ed2b8..d4a010d33 100644 --- a/src/pipecat/observers/startup_timing_observer.py +++ b/src/pipecat/observers/startup_timing_observer.py @@ -35,6 +35,7 @@ Example:: """ import time +from dataclasses import dataclass from typing import Dict, List, Optional, Tuple, Type from pydantic import BaseModel, Field @@ -49,6 +50,14 @@ from pipecat.processors.frame_processor import FrameProcessor _INTERNAL_TYPES = (PipelineSink, PipelineSource, BasePipeline) +@dataclass +class _ArrivalInfo: + """Internal record of when a StartFrame arrived at a processor.""" + + processor: FrameProcessor + arrival_ts_ns: int + + class ProcessorStartupTiming(BaseModel): """Startup timing for a single processor. @@ -161,8 +170,8 @@ class StartupTimingObserver(BaseObserver): super().__init__(**kwargs) self._processor_types = processor_types - # Map processor ID -> (processor, arrival_timestamp_ns) - self._arrivals: Dict[int, Tuple[FrameProcessor, int]] = {} + # Map processor ID -> arrival info. + self._arrivals: Dict[int, _ArrivalInfo] = {} # Collected timings in pipeline order. self._timings: List[ProcessorStartupTiming] = [] @@ -234,7 +243,9 @@ class StartupTimingObserver(BaseObserver): return if self._should_track(data.processor): - self._arrivals[data.processor.id] = (data.processor, data.timestamp) + self._arrivals[data.processor.id] = _ArrivalInfo( + processor=data.processor, arrival_ts_ns=data.timestamp + ) async def on_push_frame(self, data: FramePushed): """Record when a StartFrame leaves a processor and compute the delta. @@ -266,15 +277,13 @@ class StartupTimingObserver(BaseObserver): if arrival is None: return - processor, arrival_ts = arrival - duration_ns = data.timestamp - arrival_ts + duration_ns = data.timestamp - arrival.arrival_ts_ns duration_secs = duration_ns / 1e9 - - start_offset_secs = (arrival_ts - self._start_frame_arrival_ns) / 1e9 + start_offset_secs = (arrival.arrival_ts_ns - self._start_frame_arrival_ns) / 1e9 self._timings.append( ProcessorStartupTiming( - processor_name=processor.name, + processor_name=arrival.processor.name, start_offset_secs=start_offset_secs, duration_secs=duration_secs, ) From 389d0c3fb6adbbf7d926a3e3a6fea49a8aaae3d0 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Mon, 2 Mar 2026 14:33:55 -0500 Subject: [PATCH 184/189] Use on_pipeline_started from PipelineTask for startup report Replace the PipelineSink detection in StartupTimingObserver with an on_pipeline_started() callback from PipelineTask via TaskObserver. This fixes premature report emission when using ParallelPipeline, which has its own inner PipelineSinks per branch. --- src/pipecat/observers/base_observer.py | 8 +++++ .../observers/startup_timing_observer.py | 31 +++++++++---------- src/pipecat/pipeline/task.py | 1 + src/pipecat/pipeline/task_observer.py | 14 ++++++++- 4 files changed, 37 insertions(+), 17 deletions(-) diff --git a/src/pipecat/observers/base_observer.py b/src/pipecat/observers/base_observer.py index 78e36fec8..70c79224a 100644 --- a/src/pipecat/observers/base_observer.py +++ b/src/pipecat/observers/base_observer.py @@ -100,3 +100,11 @@ class BaseObserver(BaseObject): data: The event data containing details about the frame transfer. """ pass + + async def on_pipeline_started(self): + """Called when the pipeline has fully started. + + Fired after the ``StartFrame`` has been processed by all processors + in the pipeline, including nested ``ParallelPipeline`` branches. + """ + pass diff --git a/src/pipecat/observers/startup_timing_observer.py b/src/pipecat/observers/startup_timing_observer.py index d4a010d33..a1ea04d47 100644 --- a/src/pipecat/observers/startup_timing_observer.py +++ b/src/pipecat/observers/startup_timing_observer.py @@ -43,11 +43,11 @@ from pydantic import BaseModel, Field from pipecat.frames.frames import BotConnectedFrame, ClientConnectedFrame, StartFrame from pipecat.observers.base_observer import BaseObserver, FrameProcessed, FramePushed from pipecat.pipeline.base_pipeline import BasePipeline -from pipecat.pipeline.pipeline import PipelineSink, PipelineSource +from pipecat.pipeline.pipeline import PipelineSource from pipecat.processors.frame_processor import FrameProcessor # Internal pipeline types excluded from tracking by default. -_INTERNAL_TYPES = (PipelineSink, PipelineSource, BasePipeline) +_INTERNAL_TYPES = (PipelineSource, BasePipeline) @dataclass @@ -118,9 +118,9 @@ class StartupTimingObserver(BaseObserver): - ``client_connected_secs``: When a remote participant connects (triggered by ``ClientConnectedFrame``). - By default, internal pipeline processors (``PipelineSource``, ``PipelineSink``, - ``Pipeline``) are excluded from the report. Pass ``processor_types`` to - measure only specific types. + By default, internal pipeline processors (``PipelineSource``, ``Pipeline``) + are excluded from the report. Pass ``processor_types`` to measure only + specific types. Event handlers available: @@ -211,12 +211,19 @@ class StartupTimingObserver(BaseObserver): # Default: exclude internal pipeline plumbing. return not isinstance(processor, _INTERNAL_TYPES) + async def on_pipeline_started(self): + """Emit the startup timing report when the pipeline has fully started. + + Called by the ``PipelineTask`` after the ``StartFrame`` has been + processed by all processors, including nested ``ParallelPipeline`` + branches. + """ + if self._timings: + await self._emit_report() + async def on_process_frame(self, data: FrameProcessed): """Record when a StartFrame arrives at a processor. - When a ``StartFrame`` reaches a ``PipelineSink``, startup is complete - (the frame has traversed the entire pipeline) and the report is emitted. - Args: data: The frame processing event data. """ @@ -234,14 +241,6 @@ class StartupTimingObserver(BaseObserver): elif data.frame.id != self._start_frame_id: return - # When the StartFrame reaches a PipelineSink, all processors have - # completed start(). PipelineSinks use direct mode so the outermost - # sink fires last within the same synchronous call chain. - if isinstance(data.processor, PipelineSink): - if self._timings: - await self._emit_report() - return - if self._should_track(data.processor): self._arrivals[data.processor.id] = _ArrivalInfo( processor=data.processor, arrival_ts_ns=data.timestamp diff --git a/src/pipecat/pipeline/task.py b/src/pipecat/pipeline/task.py index deae6290c..906d55eb6 100644 --- a/src/pipecat/pipeline/task.py +++ b/src/pipecat/pipeline/task.py @@ -915,6 +915,7 @@ class PipelineTask(BasePipelineTask): if isinstance(frame, StartFrame): await self._call_event_handler("on_pipeline_started", frame) + await self._observer.on_pipeline_started() # Start heartbeat tasks now that StartFrame has been processed # by all processors in the pipeline diff --git a/src/pipecat/pipeline/task_observer.py b/src/pipecat/pipeline/task_observer.py index 4d33fd60e..dc2040e07 100644 --- a/src/pipecat/pipeline/task_observer.py +++ b/src/pipecat/pipeline/task_observer.py @@ -39,6 +39,12 @@ class Proxy: observer: BaseObserver +class _PipelineStartedSignal: + """Internal sentinel queued to observers when the pipeline has started.""" + + pass + + class TaskObserver(BaseObserver): """Proxy observer that manages multiple observers without blocking the pipeline. @@ -129,6 +135,10 @@ class TaskObserver(BaseObserver): for proxy in self._proxies: await proxy.cleanup() + async def on_pipeline_started(self): + """Forward pipeline started signal to all managed observers.""" + await self._send_to_proxy(_PipelineStartedSignal()) + async def on_process_frame(self, data: FrameProcessed): """Queue frame data for all managed observers. @@ -186,7 +196,9 @@ class TaskObserver(BaseObserver): while True: data = await queue.get() - if isinstance(data, FramePushed): + if isinstance(data, _PipelineStartedSignal): + await observer.on_pipeline_started() + elif isinstance(data, FramePushed): if on_push_frame_deprecated: await observer.on_push_frame( data.source, data.destination, data.frame, data.direction, data.timestamp From c1743dcffd16e05785c7c68317981280b299de94 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Mon, 2 Mar 2026 15:22:44 -0500 Subject: [PATCH 185/189] Rename Tavus event, on_connected --- src/pipecat/transports/tavus/transport.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/pipecat/transports/tavus/transport.py b/src/pipecat/transports/tavus/transport.py index 6db44d431..cb6844250 100644 --- a/src/pipecat/transports/tavus/transport.py +++ b/src/pipecat/transports/tavus/transport.py @@ -134,12 +134,12 @@ class TavusCallbacks(BaseModel): """Callback handlers for Tavus events. Parameters: - on_joined: Called when the bot joins the Daily room. + on_connected: Called when the bot connects to the room. on_participant_joined: Called when a participant joins the conversation. on_participant_left: Called when a participant leaves the conversation. """ - on_joined: Callable[[Mapping[str, Any]], Awaitable[None]] + on_connected: Callable[[Mapping[str, Any]], Awaitable[None]] on_participant_joined: Callable[[Mapping[str, Any]], Awaitable[None]] on_participant_left: Callable[[Mapping[str, Any], str], Awaitable[None]] @@ -274,7 +274,7 @@ class TavusTransportClient: async def _on_joined(self, data): """Handle joined event.""" logger.debug("TavusTransportClient joined!") - await self._callbacks.on_joined(data) + await self._callbacks.on_connected(data) async def _on_left(self): """Handle left event.""" @@ -669,6 +669,7 @@ class TavusTransport(BaseTransport): Event handlers available: + - on_connected(transport, data): Bot connected to the room - on_client_connected(transport, participant): Participant connected to the session - on_client_disconnected(transport, participant): Participant disconnected from the session @@ -707,7 +708,7 @@ class TavusTransport(BaseTransport): self._params = params callbacks = TavusCallbacks( - on_joined=self._on_joined, + on_connected=self._on_joined, on_participant_joined=self._on_participant_joined, on_participant_left=self._on_participant_left, ) @@ -726,13 +727,13 @@ class TavusTransport(BaseTransport): # Register supported handlers. The user will only be able to register # these handlers. - self._register_event_handler("on_joined") + self._register_event_handler("on_connected") self._register_event_handler("on_client_connected") self._register_event_handler("on_client_disconnected") async def _on_joined(self, data): """Handle bot joined room event.""" - await self._call_event_handler("on_joined", data) + await self._call_event_handler("on_connected", data) if self._input: await self._input.push_frame(BotConnectedFrame()) From dbdb54ce0f306a38fb0d49f0d8146d5594f9cd39 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Mon, 2 Mar 2026 15:44:37 -0500 Subject: [PATCH 186/189] Add on_connected event handler to DailyTransport for cross-transport consistency --- src/pipecat/transports/daily/transport.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/pipecat/transports/daily/transport.py b/src/pipecat/transports/daily/transport.py index 97aebe915..dc9868426 100644 --- a/src/pipecat/transports/daily/transport.py +++ b/src/pipecat/transports/daily/transport.py @@ -2072,6 +2072,8 @@ class DailyTransport(BaseTransport): Event handlers available: - on_joined: Called when the bot joins the room. Args: (data: dict) + - on_connected: Called when the bot connects to the room (alias for + on_joined). Args: (data: dict) - on_left: Called when the bot leaves the room. - on_before_leave: [sync] Called just before the bot leaves the room. - on_error: Called when a transport error occurs. Args: (error: str) @@ -2189,6 +2191,7 @@ class DailyTransport(BaseTransport): # Register supported handlers. The user will only be able to register # these handlers. self._register_event_handler("on_active_speaker_changed") + self._register_event_handler("on_connected") self._register_event_handler("on_joined") self._register_event_handler("on_left") self._register_event_handler("on_error") @@ -2580,6 +2583,8 @@ class DailyTransport(BaseTransport): if error: await self._on_error(f"Unable to start transcription: {error}") await self._call_event_handler("on_joined", data) + # Also call on_connected for compatibility with other transports + await self._call_event_handler("on_connected", data) if self._input: await self._input.push_frame(BotConnectedFrame()) From d0ecb3c7a8bee801878015f99000d8d8a68403fc Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Mon, 2 Mar 2026 16:24:21 -0500 Subject: [PATCH 187/189] Revert "Deprecate processing metrics (ProcessingMetricsData)" (#3852) This reverts commit 127b52bad5309d5e6b3df90a7100f69b49f81c55. --- changelog/3852.deprecated.md | 1 - src/pipecat/metrics/metrics.py | 4 ---- src/pipecat/processors/frame_processor.py | 16 ---------------- .../metrics/frame_processor_metrics.py | 8 -------- 4 files changed, 29 deletions(-) delete mode 100644 changelog/3852.deprecated.md diff --git a/changelog/3852.deprecated.md b/changelog/3852.deprecated.md deleted file mode 100644 index 666c7c58a..000000000 --- a/changelog/3852.deprecated.md +++ /dev/null @@ -1 +0,0 @@ -- Deprecated `ProcessingMetricsData` and `start_processing_metrics()`/`stop_processing_metrics()` on `FrameProcessor` and `FrameProcessorMetrics`. These metrics don't accurately depict a service's performance. Instead, TTFB metrics are recommended. Processing metrics will be removed in the 1.0.0 version. diff --git a/src/pipecat/metrics/metrics.py b/src/pipecat/metrics/metrics.py index 37ab99447..2030306e5 100644 --- a/src/pipecat/metrics/metrics.py +++ b/src/pipecat/metrics/metrics.py @@ -41,10 +41,6 @@ class TTFBMetricsData(MetricsData): class ProcessingMetricsData(MetricsData): """General processing time metrics data. - .. deprecated:: 0.0.104 - Processing metrics are deprecated and will be removed in a future version. - Use TTFB metrics instead. - Parameters: value: Processing time measurement in seconds. """ diff --git a/src/pipecat/processors/frame_processor.py b/src/pipecat/processors/frame_processor.py index 3e7b48442..bfe818696 100644 --- a/src/pipecat/processors/frame_processor.py +++ b/src/pipecat/processors/frame_processor.py @@ -441,35 +441,19 @@ class FrameProcessor(BaseObject): if frame: await self.push_frame(frame) - _processing_metrics_warned = False - async def start_processing_metrics(self, *, start_time: Optional[float] = None): """Start processing metrics collection. - .. deprecated:: 0.0.104 - Processing metrics are deprecated and will be removed in a future version. - Use TTFB metrics instead. - Args: start_time: Optional timestamp to use as the start time. If None, uses the current time. """ if self.can_generate_metrics() and self.metrics_enabled: - if not FrameProcessor._processing_metrics_warned: - FrameProcessor._processing_metrics_warned = True - logger.warning( - "Processing metrics are deprecated and will be removed in a future version. " - "Use TTFB metrics instead." - ) await self._metrics.start_processing_metrics(start_time=start_time) async def stop_processing_metrics(self, *, end_time: Optional[float] = None): """Stop processing metrics collection and push results. - .. deprecated:: 0.0.104 - Processing metrics are deprecated and will be removed in a future version. - Use TTFB metrics instead. - Args: end_time: Optional timestamp to use as the end time. If None, uses the current time. diff --git a/src/pipecat/processors/metrics/frame_processor_metrics.py b/src/pipecat/processors/metrics/frame_processor_metrics.py index ef637b5ad..7a52895a2 100644 --- a/src/pipecat/processors/metrics/frame_processor_metrics.py +++ b/src/pipecat/processors/metrics/frame_processor_metrics.py @@ -150,10 +150,6 @@ class FrameProcessorMetrics(BaseObject): async def start_processing_metrics(self, *, start_time: Optional[float] = None): """Start measuring processing time. - .. deprecated:: 0.0.104 - Processing metrics are deprecated and will be removed in a future version. - Use TTFB metrics instead. - Args: start_time: Optional timestamp to use as the start time. If None, uses the current time. @@ -163,10 +159,6 @@ class FrameProcessorMetrics(BaseObject): async def stop_processing_metrics(self, *, end_time: Optional[float] = None): """Stop processing time measurement and generate metrics frame. - .. deprecated:: 0.0.104 - Processing metrics are deprecated and will be removed in a future version. - Use TTFB metrics instead. - Args: end_time: Optional timestamp to use as the end time. If None, uses the current time. From 4a61d5bfadc55651e921be8bb1ac40affbacd929 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Mon, 2 Mar 2026 12:04:51 -0800 Subject: [PATCH 188/189] Add broadcast_interruption() to FrameProcessor Replace the round-trip push_interruption_task_frame_and_wait() mechanism with broadcast_interruption(), which pushes an InterruptionFrame both upstream and downstream directly from the calling processor. This eliminates race conditions (transcription arriving before the InterruptionFrame comes back), swallowed-event timeouts (frame blocked before reaching the sink), and the complexity of _wait_for_interruption flag / queue bypass / frame.complete() obligations. - Add broadcast_interruption() to FrameProcessor - Deprecate push_interruption_task_frame_and_wait() (delegates to new method) - Remove event field and complete() from InterruptionFrame/InterruptionTaskFrame - Remove _wait_for_interruption flag and all special-case logic - Remove frame.complete() calls in stt_mute_filter and llm_response_universal - Update all 17 call sites to use broadcast_interruption() - Update tests --- changelog/3900.added.md | 1 + changelog/3900.changed.md | 1 + changelog/3900.deprecated.md | 1 + .../voicemail/voicemail_detector.py | 2 +- src/pipecat/frames/frames.py | 29 +---- src/pipecat/pipeline/task.py | 4 +- .../processors/aggregators/dtmf_aggregator.py | 2 +- .../processors/aggregators/llm_response.py | 2 +- .../aggregators/llm_response_universal.py | 8 +- .../processors/filters/stt_mute_filter.py | 6 - src/pipecat/processors/frame_processor.py | 77 ++++-------- src/pipecat/processors/frameworks/rtvi.py | 2 +- src/pipecat/services/deepgram/flux/stt.py | 2 +- src/pipecat/services/deepgram/stt.py | 2 +- src/pipecat/services/gladia/stt.py | 2 +- .../services/google/gemini_live/llm.py | 2 +- src/pipecat/services/grok/realtime/llm.py | 2 +- src/pipecat/services/openai/realtime/llm.py | 2 +- src/pipecat/services/openai/stt.py | 2 +- .../services/openai_realtime_beta/openai.py | 2 +- src/pipecat/services/sarvam/stt.py | 2 +- src/pipecat/services/speechmatics/stt.py | 2 +- src/pipecat/transports/base_input.py | 2 +- src/pipecat/turns/user_turn_processor.py | 2 +- tests/test_context_aggregators.py | 3 +- tests/test_frame_processor.py | 117 +++--------------- tests/test_stt_mute_filter.py | 13 +- 27 files changed, 68 insertions(+), 224 deletions(-) create mode 100644 changelog/3900.added.md create mode 100644 changelog/3900.changed.md create mode 100644 changelog/3900.deprecated.md diff --git a/changelog/3900.added.md b/changelog/3900.added.md new file mode 100644 index 000000000..08921c004 --- /dev/null +++ b/changelog/3900.added.md @@ -0,0 +1 @@ +- Added `broadcast_interruption()` to `FrameProcessor`. This method pushes an `InterruptionFrame` both upstream and downstream directly from the calling processor, avoiding the round-trip through the pipeline task that `push_interruption_task_frame_and_wait()` required. diff --git a/changelog/3900.changed.md b/changelog/3900.changed.md new file mode 100644 index 000000000..59b4cdb95 --- /dev/null +++ b/changelog/3900.changed.md @@ -0,0 +1 @@ +- Removed `event` field and `complete()` method from `InterruptionFrame`. Removed `event` field from `InterruptionTaskFrame`. These are no longer needed since `broadcast_interruption()` does not require a round-trip completion signal. diff --git a/changelog/3900.deprecated.md b/changelog/3900.deprecated.md new file mode 100644 index 000000000..421e10e92 --- /dev/null +++ b/changelog/3900.deprecated.md @@ -0,0 +1 @@ +- Deprecated `push_interruption_task_frame_and_wait()` in `FrameProcessor`. Use `broadcast_interruption()` instead. The old method now delegates to `broadcast_interruption()` and logs a deprecation warning. diff --git a/src/pipecat/extensions/voicemail/voicemail_detector.py b/src/pipecat/extensions/voicemail/voicemail_detector.py index 7e22e535a..470f5dd54 100644 --- a/src/pipecat/extensions/voicemail/voicemail_detector.py +++ b/src/pipecat/extensions/voicemail/voicemail_detector.py @@ -368,7 +368,7 @@ class ClassificationProcessor(FrameProcessor): await self._voicemail_notifier.notify() # Clear buffered TTS frames # Interrupt the current pipeline to stop any ongoing processing - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() # Set the voicemail event to trigger the voicemail handler self._voicemail_event.clear() diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index 126f3c001..9d6f78d6c 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -11,7 +11,6 @@ including data frames, system frames, and control frames for audio, video, text, and LLM processing. """ -import asyncio import time from dataclasses import dataclass, field from typing import ( @@ -1141,24 +1140,9 @@ class InterruptionFrame(SystemFrame): This frame is used to interrupt the pipeline. For example, when a user starts speaking to cancel any in-progress bot output. It can also be pushed by any processor. - - Parameters: - event: Optional event set when the frame has fully traversed the - pipeline. - """ - event: Optional[asyncio.Event] = None - - def complete(self): - """Signal that this interruption has been fully processed. - - Called automatically when the frame reaches the pipeline sink, or - manually when the frame is consumed before reaching it (e.g. when - the user is muted). - """ - if self.event: - self.event.set() + pass @dataclass @@ -1825,16 +1809,11 @@ class InterruptionTaskFrame(TaskFrame): """Frame indicating the pipeline should be interrupted. This frame should be pushed upstream to indicate the pipeline should be - interrupted. The pipeline task converts this into an `InterruptionFrame` and - sends it downstream. The `event` is passed to the `InterruptionFrame` so it - can signal when the interruption has fully traversed the pipeline. - - Parameters: - event: Optional event passed to the corresponding `InterruptionFrame`. - + interrupted. The pipeline task converts this into an `InterruptionFrame` + and sends it downstream. """ - event: Optional[asyncio.Event] = None + pass @dataclass diff --git a/src/pipecat/pipeline/task.py b/src/pipecat/pipeline/task.py index deae6290c..291ed5506 100644 --- a/src/pipecat/pipeline/task.py +++ b/src/pipecat/pipeline/task.py @@ -892,7 +892,7 @@ class PipelineTask(BasePipelineTask): # pipeline. This is in case the push task is blocked waiting for a # pipeline-ending frame to finish traversing the pipeline. logger.debug(f"{self}: received interruption task frame {frame}") - await self._pipeline.queue_frame(InterruptionFrame(event=frame.event)) + await self._pipeline.queue_frame(InterruptionFrame()) elif isinstance(frame, ErrorFrame): await self._call_event_handler("on_pipeline_error", frame) if frame.fatal: @@ -931,8 +931,6 @@ class PipelineTask(BasePipelineTask): self._pipeline_end_event.set() elif isinstance(frame, CancelFrame): self._pipeline_end_event.set() - elif isinstance(frame, InterruptionFrame): - frame.complete() elif isinstance(frame, HeartbeatFrame): await self._heartbeat_queue.put(frame) diff --git a/src/pipecat/processors/aggregators/dtmf_aggregator.py b/src/pipecat/processors/aggregators/dtmf_aggregator.py index 1b9c59158..ea56ba6fc 100644 --- a/src/pipecat/processors/aggregators/dtmf_aggregator.py +++ b/src/pipecat/processors/aggregators/dtmf_aggregator.py @@ -104,7 +104,7 @@ class DTMFAggregator(FrameProcessor): # For first digit, schedule interruption. if is_first_digit: - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() # Check for immediate flush conditions if frame.button == self._termination_digit: diff --git a/src/pipecat/processors/aggregators/llm_response.py b/src/pipecat/processors/aggregators/llm_response.py index 44e5ce252..7c246b209 100644 --- a/src/pipecat/processors/aggregators/llm_response.py +++ b/src/pipecat/processors/aggregators/llm_response.py @@ -581,7 +581,7 @@ class LLMUserContextAggregator(LLMContextResponseAggregator): logger.debug( "Interruption conditions met - pushing interruption and aggregation" ) - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() await self._process_aggregation() else: logger.debug("Interruption conditions not met - not pushing aggregation") diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py index 96f3702be..cf6c81e5f 100644 --- a/src/pipecat/processors/aggregators/llm_response_universal.py +++ b/src/pipecat/processors/aggregators/llm_response_universal.py @@ -608,12 +608,6 @@ class LLMUserAggregator(LLMContextAggregator): if should_mute_frame: logger.trace(f"{frame.name} suppressed - user currently muted") - # When muted, the InterruptionFrame won't propagate further and - # will never reach the pipeline sink. Complete it here so - # push_interruption_task_frame_and_wait() doesn't hang. - if should_mute_frame and isinstance(frame, InterruptionFrame): - frame.complete() - should_mute_next_time = False for s in self._params.user_mute_strategies: should_mute_next_time |= await s.process_frame(frame) @@ -737,7 +731,7 @@ class LLMUserAggregator(LLMContextAggregator): await self._user_idle_controller.process_frame(UserStartedSpeakingFrame()) if params.enable_interruptions and self._allow_interruptions: - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() await self._call_event_handler("on_user_turn_started", strategy) diff --git a/src/pipecat/processors/filters/stt_mute_filter.py b/src/pipecat/processors/filters/stt_mute_filter.py index f5d008e28..9f522a20d 100644 --- a/src/pipecat/processors/filters/stt_mute_filter.py +++ b/src/pipecat/processors/filters/stt_mute_filter.py @@ -234,12 +234,6 @@ class STTMuteFilter(FrameProcessor): await self.push_frame(frame, direction) else: logger.trace(f"{frame.__class__.__name__} suppressed - STT currently muted") - - # When muted, the InterruptionFrame won't propagate further - # and will never reach the pipeline sink. Complete it here so - # push_interruption_task_frame_and_wait() doesn't hang. - if isinstance(frame, InterruptionFrame): - frame.complete() else: # Pass all other frames through await self.push_frame(frame, direction) diff --git a/src/pipecat/processors/frame_processor.py b/src/pipecat/processors/frame_processor.py index 3e7b48442..69c503e71 100644 --- a/src/pipecat/processors/frame_processor.py +++ b/src/pipecat/processors/frame_processor.py @@ -41,7 +41,6 @@ from pipecat.frames.frames import ( FrameProcessorResumeFrame, FrameProcessorResumeUrgentFrame, InterruptionFrame, - InterruptionTaskFrame, StartFrame, SystemFrame, UninterruptibleFrame, @@ -240,10 +239,6 @@ class FrameProcessor(BaseObject): self.__process_frame_task: Optional[asyncio.Task] = None self.__process_current_frame: Optional[Frame] = None - # Set while awaiting push_interruption_task_frame_and_wait() so that - # _start_interruption() knows not to cancel the process task. - self._wait_for_interruption = False - # Frame processor events. self._register_event_handler("on_before_process_frame", sync=True) self._register_event_handler("on_after_process_frame", sync=True) @@ -329,7 +324,7 @@ class FrameProcessor(BaseObject): warnings.simplefilter("always") warnings.warn( "`FrameProcessor.interruptions_allowed` is deprecated. " - "Use `LLMUserAggregator`'s new `user_mute_strategies` parameter instead.", + "Use `LLMUserAggregator`'s new `user_mute_strategies` parameter instead.", DeprecationWarning, stacklevel=2, ) @@ -647,15 +642,6 @@ class FrameProcessor(BaseObject): if self._cancelling: return - # If we are waiting for an interruption, bypass all queued system frames - # and process the frame right away. This is because a previous system - # frame might be waiting for the interruption frame blocking the input - # task, so this InterruptionFrame would never be dequeued and we'd - # deadlock. - if self._wait_for_interruption and isinstance(frame, InterruptionFrame): - await self.__process_frame(frame, direction, callback) - return - if self._enable_direct_mode: await self.__process_frame(frame, direction, callback) else: @@ -790,43 +776,32 @@ class FrameProcessor(BaseObject): await self._call_event_handler("on_after_push_frame", frame) + async def broadcast_interruption(self): + """Broadcast an `InterruptionFrame` both upstream and downstream.""" + logger.debug(f"{self}: broadcasting interruption") + self.__reset_process_task() + await self.stop_all_metrics() + await self.broadcast_frame(InterruptionFrame) + async def push_interruption_task_frame_and_wait(self, *, timeout: float = 5.0): """Push an interruption task frame upstream and wait for the interruption. - This function sends an `InterruptionTaskFrame` upstream to the - pipeline task. The task creates a corresponding `InterruptionFrame` - and sends it downstream through the pipeline. An `asyncio.Event` is - attached to both frames so the caller can wait until the interruption - has fully traversed the pipeline. The event is set when the - `InterruptionFrame` reaches the pipeline sink. If the frame does - not complete within the given timeout, a warning is logged and the - event is forcibly set so the caller is unblocked. - - Args: - timeout: Maximum seconds to wait for the interruption to complete. + .. deprecated:: 0.0.104 + Use :meth:`broadcast_interruption` instead. This method now + delegates to ``broadcast_interruption()`` and ignores *timeout*. """ - self._wait_for_interruption = True + import warnings - event = asyncio.Event() + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "`FrameProcessor.push_interruption_task_frame_and_wait()` is deprecated. " + "Use `FrameProcessor.broadcast_interruption()` instead.", + DeprecationWarning, + stacklevel=2, + ) - await self.push_frame(InterruptionTaskFrame(event=event), FrameDirection.UPSTREAM) - - # Wait for the `InterruptionFrame` to complete and log a warning if it - # takes too long. If it does take too long make sure we unblock it, - # otherwise we will hang here forever. - while not event.is_set(): - try: - await asyncio.wait_for(event.wait(), timeout=timeout) - except asyncio.TimeoutError: - logger.warning( - f"{self}: InterruptionFrame has not completed after" - f" {timeout}s. Make sure InterruptionFrame.complete()" - " is being called (e.g. if the frame is being blocked" - " or consumed before reaching the pipeline sink)." - ) - event.set() - - self._wait_for_interruption = False + await self.broadcast_interruption() async def broadcast_frame(self, frame_cls: Type[Frame], **kwargs): """Broadcasts a frame of the specified class upstream and downstream. @@ -933,15 +908,7 @@ class FrameProcessor(BaseObject): async def _start_interruption(self): """Start handling an interruption by cancelling current tasks.""" try: - if self._wait_for_interruption: - # If we get here we know the process task was just waiting for - # an interruption (push_interruption_task_frame_and_wait()), so - # we can't cancel the task because it might still need to do - # more things (e.g. pushing a frame after the - # interruption). Instead we just drain the queue because this is - # an interruption. - self.__reset_process_task() - elif isinstance(self.__process_current_frame, UninterruptibleFrame): + if isinstance(self.__process_current_frame, UninterruptibleFrame): # We don't want to cancel UninterruptibleFrame, so we simply # cleanup the queue. self.__reset_process_queue() diff --git a/src/pipecat/processors/frameworks/rtvi.py b/src/pipecat/processors/frameworks/rtvi.py index e01e95714..eb1e79f3e 100644 --- a/src/pipecat/processors/frameworks/rtvi.py +++ b/src/pipecat/processors/frameworks/rtvi.py @@ -1702,7 +1702,7 @@ class RTVIProcessor(FrameProcessor): async def interrupt_bot(self): """Send a bot interruption frame upstream.""" - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() async def send_server_message(self, data: Any): """Send a server message to the client.""" diff --git a/src/pipecat/services/deepgram/flux/stt.py b/src/pipecat/services/deepgram/flux/stt.py index d509b267e..984906c6c 100644 --- a/src/pipecat/services/deepgram/flux/stt.py +++ b/src/pipecat/services/deepgram/flux/stt.py @@ -675,7 +675,7 @@ class DeepgramFluxSTTService(WebsocketSTTService): self._user_is_speaking = True await self.broadcast_frame(UserStartedSpeakingFrame) if self._should_interrupt: - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() await self.start_metrics() await self._call_event_handler("on_start_of_turn", transcript) if transcript: diff --git a/src/pipecat/services/deepgram/stt.py b/src/pipecat/services/deepgram/stt.py index 497d6aae1..8eb246cf2 100644 --- a/src/pipecat/services/deepgram/stt.py +++ b/src/pipecat/services/deepgram/stt.py @@ -471,7 +471,7 @@ class DeepgramSTTService(STTService): await self._call_event_handler("on_speech_started", *args, **kwargs) await self.broadcast_frame(UserStartedSpeakingFrame) if self._should_interrupt: - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() async def _on_utterance_end(self, *args, **kwargs): await self._call_event_handler("on_utterance_end", *args, **kwargs) diff --git a/src/pipecat/services/gladia/stt.py b/src/pipecat/services/gladia/stt.py index 045a56613..bba554b4a 100644 --- a/src/pipecat/services/gladia/stt.py +++ b/src/pipecat/services/gladia/stt.py @@ -613,7 +613,7 @@ class GladiaSTTService(WebsocketSTTService): await self.broadcast_frame(UserStartedSpeakingFrame) if self._should_interrupt: - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() async def _on_speech_ended(self): """Handle speech end event from Gladia. diff --git a/src/pipecat/services/google/gemini_live/llm.py b/src/pipecat/services/google/gemini_live/llm.py index d06f941c7..2ed11c739 100644 --- a/src/pipecat/services/google/gemini_live/llm.py +++ b/src/pipecat/services/google/gemini_live/llm.py @@ -1265,7 +1265,7 @@ class GeminiLiveLLMService(LLMService): # combination with the context aggregator default # turn strategies. logger.debug("Gemini VAD: interrupted signal received") - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() elif message.server_content and message.server_content.model_turn: await self._handle_msg_model_turn(message) elif ( diff --git a/src/pipecat/services/grok/realtime/llm.py b/src/pipecat/services/grok/realtime/llm.py index 6d148f6d7..7a4e73806 100644 --- a/src/pipecat/services/grok/realtime/llm.py +++ b/src/pipecat/services/grok/realtime/llm.py @@ -734,7 +734,7 @@ class GrokRealtimeLLMService(LLMService): """Handle speech started event from VAD.""" await self._truncate_current_audio_response() await self.broadcast_frame(UserStartedSpeakingFrame) - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() async def _handle_evt_speech_stopped(self, evt): """Handle speech stopped event from VAD.""" diff --git a/src/pipecat/services/openai/realtime/llm.py b/src/pipecat/services/openai/realtime/llm.py index a6667c7c8..07b6aa82b 100644 --- a/src/pipecat/services/openai/realtime/llm.py +++ b/src/pipecat/services/openai/realtime/llm.py @@ -839,7 +839,7 @@ class OpenAIRealtimeLLMService(LLMService): async def _handle_evt_speech_started(self, evt): await self._truncate_current_audio_response() await self.broadcast_frame(UserStartedSpeakingFrame) - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() async def _handle_evt_speech_stopped(self, evt): await self.start_ttfb_metrics() diff --git a/src/pipecat/services/openai/stt.py b/src/pipecat/services/openai/stt.py index 9a52be114..32895f8b5 100644 --- a/src/pipecat/services/openai/stt.py +++ b/src/pipecat/services/openai/stt.py @@ -639,7 +639,7 @@ class OpenAIRealtimeSTTService(WebsocketSTTService): logger.debug("Server VAD: speech started") await self.broadcast_frame(UserStartedSpeakingFrame) if self._should_interrupt: - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() await self.start_processing_metrics() async def _handle_speech_stopped(self, evt: dict): diff --git a/src/pipecat/services/openai_realtime_beta/openai.py b/src/pipecat/services/openai_realtime_beta/openai.py index 8614713ff..c912ed45c 100644 --- a/src/pipecat/services/openai_realtime_beta/openai.py +++ b/src/pipecat/services/openai_realtime_beta/openai.py @@ -709,7 +709,7 @@ class OpenAIRealtimeBetaLLMService(LLMService): async def _handle_evt_speech_started(self, evt): await self._truncate_current_audio_response() await self.broadcast_frame(UserStartedSpeakingFrame) - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() async def _handle_evt_speech_stopped(self, evt): await self.start_ttfb_metrics() diff --git a/src/pipecat/services/sarvam/stt.py b/src/pipecat/services/sarvam/stt.py index 9e245aece..e368ceb02 100644 --- a/src/pipecat/services/sarvam/stt.py +++ b/src/pipecat/services/sarvam/stt.py @@ -644,7 +644,7 @@ class SarvamSTTService(STTService): logger.debug("User started speaking") await self._call_event_handler("on_speech_started") await self.broadcast_frame(UserStartedSpeakingFrame) - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() elif signal == "END_SPEECH": logger.debug("User stopped speaking") diff --git a/src/pipecat/services/speechmatics/stt.py b/src/pipecat/services/speechmatics/stt.py index ac18a36e3..bdeb3b249 100644 --- a/src/pipecat/services/speechmatics/stt.py +++ b/src/pipecat/services/speechmatics/stt.py @@ -836,7 +836,7 @@ class SpeechmaticsSTTService(STTService): # await self.start_processing_metrics() await self.broadcast_frame(UserStartedSpeakingFrame) if self._should_interrupt: - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() async def _handle_end_of_turn(self, message: dict[str, Any]) -> None: """Handle EndOfTurn events. diff --git a/src/pipecat/transports/base_input.py b/src/pipecat/transports/base_input.py index 49c28149a..1da672ab7 100644 --- a/src/pipecat/transports/base_input.py +++ b/src/pipecat/transports/base_input.py @@ -558,7 +558,7 @@ class BaseInputTransport(FrameProcessor): # Make sure we notify about interruptions quickly out-of-band. if should_push_immediate_interruption and self._allow_interruptions: - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() elif self.interruption_strategies and self._bot_speaking: logger.debug( "User started speaking while bot is speaking with interruption config - " diff --git a/src/pipecat/turns/user_turn_processor.py b/src/pipecat/turns/user_turn_processor.py index 7f8995202..85bc658dd 100644 --- a/src/pipecat/turns/user_turn_processor.py +++ b/src/pipecat/turns/user_turn_processor.py @@ -182,7 +182,7 @@ class UserTurnProcessor(FrameProcessor): await self._user_idle_controller.process_frame(UserStartedSpeakingFrame()) if params.enable_interruptions and self._allow_interruptions: - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() await self._call_event_handler("on_user_turn_started", strategy) diff --git a/tests/test_context_aggregators.py b/tests/test_context_aggregators.py index 24dae0b4c..37d36bfef 100644 --- a/tests/test_context_aggregators.py +++ b/tests/test_context_aggregators.py @@ -21,7 +21,6 @@ from pipecat.frames.frames import ( FunctionCallResultProperties, InterimTranscriptionFrame, InterruptionFrame, - InterruptionTaskFrame, LLMContextAssistantTimestampFrame, LLMContextFrame, LLMFullResponseEndFrame, @@ -567,7 +566,7 @@ class BaseTestUserContextAggregator: SleepFrame(), UserStoppedSpeakingFrame(), ] - expected_up_frames = [InterruptionTaskFrame] + expected_up_frames = [InterruptionFrame] expected_down_frames = [ BotStartedSpeakingFrame, UserStartedSpeakingFrame, diff --git a/tests/test_frame_processor.py b/tests/test_frame_processor.py index 138c8e6d8..a875741e3 100644 --- a/tests/test_frame_processor.py +++ b/tests/test_frame_processor.py @@ -9,8 +9,6 @@ import unittest from dataclasses import dataclass, field from typing import List -from loguru import logger - from pipecat.frames.frames import ( DataFrame, EndFrame, @@ -85,50 +83,38 @@ class TestFrameProcessor(unittest.IsolatedAsyncioTestCase): assert before_push_called assert after_push_called - async def test_interruption_and_wait(self): - class DelayFrameProcessor(FrameProcessor): - """This processors just gives time to the event loop to change - between tasks. Otherwise things happen to fast.""" - - async def process_frame(self, frame: Frame, direction: FrameDirection): - await super().process_frame(frame, direction) - await asyncio.sleep(0.1) - await self.push_frame(frame, direction) + async def test_broadcast_interruption(self): + """Test that broadcast_interruption() pushes InterruptionFrame both + directions and allows subsequent code to run.""" class InterruptFrameProcessor(FrameProcessor): async def process_frame(self, frame: Frame, direction: FrameDirection): await super().process_frame(frame, direction) if isinstance(frame, TextFrame): - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() await self.push_frame(OutputTransportMessageUrgentFrame(message=frame.text)) else: await self.push_frame(frame, direction) - pipeline = Pipeline([DelayFrameProcessor(), InterruptFrameProcessor()]) + pipeline = Pipeline([InterruptFrameProcessor()]) frames_to_send = [ - # Just a random interruption to make sure we don't clear anything - # before the actual `InterruptionTaskFrame` interruption. - InterruptionFrame(), - # This will generate an `InterruptionTaskFrame` and will wait for an - # `InterruptionFrame`. TextFrame(text="Hello from Pipecat!"), - # Just give time for everything to complete. SleepFrame(sleep=0.5), - EndFrame(), ] expected_down_frames = [ - InterruptionFrame, InterruptionFrame, OutputTransportMessageUrgentFrame, - EndFrame, + ] + expected_up_frames = [ + InterruptionFrame, ] await run_test( pipeline, frames_to_send=frames_to_send, expected_down_frames=expected_down_frames, - send_end_frame=False, + expected_up_frames=expected_up_frames, ) async def test_interruptible_frames(self): @@ -454,33 +440,20 @@ class TestFrameProcessor(unittest.IsolatedAsyncioTestCase): stop_frames = [f for f in received_frames if isinstance(f, StopFrame)] self.assertEqual(len(stop_frames), 1, "StopFrame should survive interruption") - async def test_interruption_frame_complete_sets_event(self): - """Test that InterruptionFrame.complete() sets the event.""" - event = asyncio.Event() - frame = InterruptionFrame(event=event) - self.assertFalse(event.is_set()) - frame.complete() - self.assertTrue(event.is_set()) - - async def test_interruption_frame_complete_without_event(self): - """Test that InterruptionFrame.complete() is safe without an event.""" - frame = InterruptionFrame() - frame.complete() # Should not raise - - async def test_interruption_event_set_at_pipeline_sink(self): - """Test that the event from push_interruption_task_frame_and_wait() - is set when the InterruptionFrame reaches the pipeline sink.""" - event_was_set = False + async def test_broadcast_interruption_allows_subsequent_code(self): + """Test that broadcast_interruption() returns immediately, allowing the + caller to run code afterwards (e.g. push an urgent frame).""" + code_after_ran = False class InterruptOnTextProcessor(FrameProcessor): async def process_frame(self, frame: Frame, direction: FrameDirection): - nonlocal event_was_set + nonlocal code_after_ran await super().process_frame(frame, direction) if isinstance(frame, TextFrame): - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() - event_was_set = True + code_after_ran = True await self.push_frame(OutputTransportMessageUrgentFrame(message="done")) else: await self.push_frame(frame, direction) @@ -499,63 +472,7 @@ class TestFrameProcessor(unittest.IsolatedAsyncioTestCase): frames_to_send=frames_to_send, expected_down_frames=expected_down_frames, ) - self.assertTrue(event_was_set, "Event should be set after InterruptionFrame completes") - - async def test_interruption_completion_timeout_warning(self): - """Test that a warning is logged when an InterruptionFrame is blocked - and never reaches the pipeline sink.""" - warnings = [] - handler_id = logger.add( - lambda msg: warnings.append(str(msg)), level="WARNING", format="{message}" - ) - - try: - - class BlockInterruptionProcessor(FrameProcessor): - """Blocks InterruptionFrames, completing them after a delay.""" - - async def process_frame(self, frame: Frame, direction: FrameDirection): - await super().process_frame(frame, direction) - if isinstance(frame, InterruptionFrame): - # Complete after the timeout so the warning fires - # but the test doesn't hang. - async def delayed_complete(): - await asyncio.sleep(1.0) - frame.complete() - - asyncio.create_task(delayed_complete()) - return - await self.push_frame(frame, direction) - - class InterruptOnTextProcessor(FrameProcessor): - async def process_frame(self, frame: Frame, direction: FrameDirection): - await super().process_frame(frame, direction) - if isinstance(frame, TextFrame): - await self.push_interruption_task_frame_and_wait(timeout=0.5) - await self.push_frame(OutputTransportMessageUrgentFrame(message="done")) - else: - await self.push_frame(frame, direction) - - pipeline = Pipeline([BlockInterruptionProcessor(), InterruptOnTextProcessor()]) - - frames_to_send = [ - TextFrame(text="trigger"), - ] - expected_down_frames = [ - OutputTransportMessageUrgentFrame, - ] - await run_test( - pipeline, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - ) - finally: - logger.remove(handler_id) - - self.assertTrue( - any("InterruptionFrame has not completed" in w for w in warnings), - "Expected a timeout warning about InterruptionFrame not completing", - ) + self.assertTrue(code_after_ran, "Code after broadcast_interruption() should execute") if __name__ == "__main__": diff --git a/tests/test_stt_mute_filter.py b/tests/test_stt_mute_filter.py index adf4611df..8f55bdecb 100644 --- a/tests/test_stt_mute_filter.py +++ b/tests/test_stt_mute_filter.py @@ -4,7 +4,6 @@ # SPDX-License-Identifier: BSD 2-Clause License # -import asyncio import unittest from pipecat.frames.frames import ( @@ -329,17 +328,13 @@ class TestSTTMuteFilter(unittest.IsolatedAsyncioTestCase): expected_down_frames=expected_returned_frames, ) - async def test_interruption_frame_completed_when_muted(self): - """Test that InterruptionFrame.complete() is called when the frame is - suppressed due to muting, so push_interruption_task_frame_and_wait() - doesn't hang.""" + async def test_interruption_frame_suppressed_when_muted(self): + """Test that InterruptionFrame is suppressed when the filter is muted.""" filter = STTMuteFilter(config=STTMuteConfig(strategies={STTMuteStrategy.ALWAYS})) - event = asyncio.Event() - frames_to_send = [ BotStartedSpeakingFrame(), - InterruptionFrame(event=event), + InterruptionFrame(), BotStoppedSpeakingFrame(), ] @@ -354,8 +349,6 @@ class TestSTTMuteFilter(unittest.IsolatedAsyncioTestCase): expected_down_frames=expected_returned_frames, ) - self.assertTrue(event.is_set(), "InterruptionFrame.complete() should be called when muted") - if __name__ == "__main__": unittest.main() From 741ff14d3a552a2530aaa2c01afe94c7a3b893df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Mon, 2 Mar 2026 12:06:08 -0800 Subject: [PATCH 189/189] Rename changelog files to use PR #3896 and mark breaking change --- changelog/{3900.added.md => 3896.added.md} | 0 changelog/3896.changed.md | 1 + changelog/{3900.deprecated.md => 3896.deprecated.md} | 0 changelog/3900.changed.md | 1 - 4 files changed, 1 insertion(+), 1 deletion(-) rename changelog/{3900.added.md => 3896.added.md} (100%) create mode 100644 changelog/3896.changed.md rename changelog/{3900.deprecated.md => 3896.deprecated.md} (100%) delete mode 100644 changelog/3900.changed.md diff --git a/changelog/3900.added.md b/changelog/3896.added.md similarity index 100% rename from changelog/3900.added.md rename to changelog/3896.added.md diff --git a/changelog/3896.changed.md b/changelog/3896.changed.md new file mode 100644 index 000000000..3b7e4f807 --- /dev/null +++ b/changelog/3896.changed.md @@ -0,0 +1 @@ +- ⚠️ Removed `event` field and `complete()` method from `InterruptionFrame`. Removed `event` field from `InterruptionTaskFrame`. These are no longer needed since `broadcast_interruption()` does not require a round-trip completion signal. diff --git a/changelog/3900.deprecated.md b/changelog/3896.deprecated.md similarity index 100% rename from changelog/3900.deprecated.md rename to changelog/3896.deprecated.md diff --git a/changelog/3900.changed.md b/changelog/3900.changed.md deleted file mode 100644 index 59b4cdb95..000000000 --- a/changelog/3900.changed.md +++ /dev/null @@ -1 +0,0 @@ -- Removed `event` field and `complete()` method from `InterruptionFrame`. Removed `event` field from `InterruptionTaskFrame`. These are no longer needed since `broadcast_interruption()` does not require a round-trip completion signal.