Merge pull request #3941 from pipecat-ai/mb/stt-settings-updates

STT services: settings and examples fixes
This commit is contained in:
Mark Backman
2026-03-06 15:21:30 -05:00
committed by GitHub
38 changed files with 125 additions and 104 deletions

View File

@@ -233,14 +233,14 @@ def can_generate_metrics(self) -> bool:
### Service Settings
Every STT, LLM, TTS, and image-generation service exposes a **Settings dataclass** that serves two roles:
Every AI service (STT, LLM, TTS, image generation, etc.) exposes a **Settings dataclass** that serves two roles:
1. **Store mode** — the service's `self._settings` holds the current value of every runtime-updatable field.
2. **Delta mode** — an update frame carries only the fields that changed; unset fields remain `NOT_GIVEN`.
2. **Delta mode** — an update frame (e.g. `TTSUpdateSettingsFrame`) specifies only the fields that should change; unspecified fields remain `NOT_GIVEN`.
#### Defining your Settings class
Extend `STTSettings`, `TTSSettings`, `LLMSettings`, or `ImageGenSettings`. The base classes already provide common fields (e.g. `model`, `voice`, `language`). You only need to add **service-specific knobs that should be runtime-updatable**:
Extend `STTSettings`, `TTSSettings`, `LLMSettings`, or `ImageGenSettings` (or, if your service directly subclasses `AIService`, `ServiceSettings`). The base classes already provide common fields (e.g. `model`, `voice`, `language`). You only need to add **service-specific knobs that should be runtime-updatable**:
```python
from dataclasses import dataclass, field
@@ -320,7 +320,7 @@ svc = MyTTSService(
#### Reacting to runtime changes
STT, LLM, and TTS services support runtime configuration changes via `*UpdateSettingsFrame`s (e.g. `STTUpdateSettingsFrame`, `TTSUpdateSettingsFrame`, `LLMUpdateSettingsFrame`).
AI services support runtime configuration changes via `*UpdateSettingsFrame`s (e.g. `STTUpdateSettingsFrame`, `TTSUpdateSettingsFrame`, `LLMUpdateSettingsFrame`).
To react to runtime setting changes, override `_update_settings`. The base implementation applies the delta to `self._settings` and returns a `dict` mapping each changed field name to its **pre-update** value. Your override should call `super()` first, then act on the changed fields. A common implementation might look like:

View File

@@ -65,8 +65,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
llm = AzureLLMService(
api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
model=os.getenv("AZURE_CHATGPT_MODEL"),
settings=AzureLLMSettings(
model=os.getenv("AZURE_CHATGPT_MODEL"),
system_instruction="You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
),
)

View File

@@ -65,8 +65,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
llm = AzureLLMService(
api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
model=os.getenv("AZURE_CHATGPT_MODEL"),
settings=AzureLLMSettings(
model=os.getenv("AZURE_CHATGPT_MODEL"),
system_instruction="You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
),
)

View File

@@ -63,9 +63,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
llm = AWSBedrockLLMService(
aws_region="us-west-2",
model="us.anthropic.claude-haiku-4-5-20251001-v1:0",
params=AWSBedrockLLMService.InputParams(temperature=0.8),
settings=AWSBedrockLLMSettings(
model="us.anthropic.claude-haiku-4-5-20251001-v1:0",
temperature=0.8,
system_instruction="You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
),
)

View File

@@ -55,8 +55,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
stt = GoogleSTTService(
settings=GoogleSTTSettings(
languages=Language.EN_US,
model="chirp_3",
languages=[Language.EN_US],
# Add model to use a specific model
# model="chirp_3",
),
credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"),
location="us",

View File

@@ -94,7 +94,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
api_key=os.getenv("ASSEMBLYAI_API_KEY"),
vad_force_turn_endpoint=False, # Use AssemblyAI's built-in turn detection
settings=AssemblyAISTTSettings(
speech_model="u3-rt-pro",
model="u3-rt-pro",
# Optional: Tune turn detection timing (defaults shown below)
# min_turn_silence=100, # Default
# max_turn_silence=1000, # Default

View File

@@ -51,13 +51,13 @@ transport_params = {
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = (
SonioxSTTService(
api_key=os.getenv("SONIOX_API_KEY"),
settings=SonioxSTTSettings(
language_hints=[Language.EN],
language_hints_strict=True,
),
stt = SonioxSTTService(
api_key=os.getenv("SONIOX_API_KEY"),
settings=SonioxSTTSettings(
# Add language hints to use a specific language
# Add strict mode to enforce the language hints
language_hints=[Language.EN],
language_hints_strict=True,
),
)

View File

@@ -99,7 +99,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
llm = AWSBedrockLLMService(
aws_region="us-west-2",
settings=AWSBedrockLLMSettings(
model="us.anthropic.claude-3-7-sonnet-20250219-v1:0",
model="us.anthropic.claude-sonnet-4-6",
# Note: usually, prefer providing latency="optimized" param.
# Here we can't because AWS Bedrock doesn't support it for Claude 3.7,
# which we need for image input.
@@ -170,7 +170,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
context.add_message(
{
"role": "user",
"content": f"Please introduce yourself to the user. Use '{client_id}' as the user ID during function calls.",
"content": f"Please introduce yourself to the user briefly; don't mention the camera. Use '{client_id}' as the user ID during function calls.",
}
)
await task.queue_frames([LLMRunFrame()])

View File

@@ -99,7 +99,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
logger.info(f"Client connected")
context.add_message({"user": "system", "content": "Please introduce yourself to the user."})
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
await task.queue_frames([LLMRunFrame()])
await asyncio.sleep(10)

View File

@@ -255,7 +255,7 @@ class LLMContext:
this method, which is part of the public API of OpenAILLMContext but
doesn't need to be for LLMContext.
.. deprecated::
.. deprecated:: 0.0.92
Use `get_messages()` instead.
Returns:

View File

@@ -27,7 +27,7 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
class UserIdleProcessor(FrameProcessor):
"""Monitors user inactivity and triggers callbacks after timeout periods.
.. deprecated::
.. deprecated:: 0.0.100
UserIdleProcessor is deprecated in 0.0.100 and will be removed in a future version.
Use LLMUserAggregator with user_idle_timeout parameter instead.

View File

@@ -170,7 +170,7 @@ class AnthropicLLMService(LLMService):
class InputParams(BaseModel):
"""Input parameters for Anthropic model inference.
.. deprecated::
.. deprecated:: 0.0.105
Use ``AnthropicLLMSettings`` instead. Pass settings directly via the
``settings`` parameter of :class:`AnthropicLLMService`.
@@ -231,12 +231,12 @@ class AnthropicLLMService(LLMService):
api_key: Anthropic API key for authentication.
model: Model name to use.
.. deprecated::
.. deprecated:: 0.0.105
Use ``settings=AnthropicLLMSettings(model=...)`` instead.
params: Optional model parameters for inference.
.. deprecated::
.. deprecated:: 0.0.105
Use ``settings=AnthropicLLMSettings(...)`` instead.
settings: Runtime-updatable settings for this service. When both

View File

@@ -81,7 +81,7 @@ def map_language_from_assemblyai(language_code: str) -> Language:
@dataclass
class AssemblyAISTTSettings(STTSettings):
"""Settings for the AssemblyAI STT service.
"""Settings for AssemblyAISTTService.
Parameters:
formatted_finals: Whether to enable transcript formatting.
@@ -99,6 +99,8 @@ class AssemblyAISTTSettings(STTSettings):
language_detection: Enable automatic language detection.
format_turns: Whether to format transcript turns.
speaker_labels: Enable speaker diarization.
vad_threshold: VAD confidence threshold (0.01.0) for classifying
audio frames as silence. Only applicable to u3-rt-pro.
"""
formatted_finals: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
@@ -115,6 +117,7 @@ class AssemblyAISTTSettings(STTSettings):
language_detection: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
format_turns: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
speaker_labels: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
vad_threshold: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
class AssemblyAISTTService(WebsocketSTTService):
@@ -199,6 +202,7 @@ class AssemblyAISTTService(WebsocketSTTService):
language_detection=None,
format_turns=True,
speaker_labels=None,
vad_threshold=None,
)
# 2. Apply direct init arg overrides (deprecated)
@@ -227,6 +231,7 @@ class AssemblyAISTTService(WebsocketSTTService):
default_settings.language_detection = connection_params.language_detection
default_settings.format_turns = connection_params.format_turns
default_settings.speaker_labels = connection_params.speaker_labels
default_settings.vad_threshold = connection_params.vad_threshold
# 4. Apply settings delta (canonical API, always wins)
if settings is not None:
@@ -463,6 +468,7 @@ class AssemblyAISTTService(WebsocketSTTService):
"language_detection": s.language_detection,
"format_turns": s.format_turns,
"speaker_labels": s.speaker_labels,
"vad_threshold": s.vad_threshold,
}
for k, v in optional_fields.items():
@@ -651,7 +657,7 @@ class AssemblyAISTTService(WebsocketSTTService):
await self.start_processing_metrics()
await self.broadcast_frame(UserStartedSpeakingFrame)
if self._should_interrupt:
await self.push_interruption_task_frame_and_wait()
await self.broadcast_interruption()
self._user_speaking = True
async def _handle_termination(self, message: TerminationMessage):

View File

@@ -754,7 +754,7 @@ class AWSBedrockLLMService(LLMService):
class InputParams(BaseModel):
"""Input parameters for AWS Bedrock LLM service.
.. deprecated::
.. deprecated:: 0.0.105
Use ``AWSBedrockLLMSettings`` instead. Pass settings directly via the
``settings`` parameter of :class:`AWSBedrockLLMService`.
@@ -795,7 +795,7 @@ class AWSBedrockLLMService(LLMService):
Args:
model: The AWS Bedrock model identifier to use.
.. deprecated::
.. deprecated:: 0.0.105
Use ``settings=AWSBedrockLLMSettings(model=...)`` instead.
aws_access_key: AWS access key ID. If None, uses default credentials.
@@ -804,7 +804,7 @@ class AWSBedrockLLMService(LLMService):
aws_region: AWS region for the Bedrock service.
params: Model parameters and configuration.
.. deprecated::
.. deprecated:: 0.0.105
Use ``settings=AWSBedrockLLMSettings(...)`` instead.
settings: Runtime-updatable settings for this service. When both

View File

@@ -280,7 +280,7 @@ class AWSNovaSonicLLMService(LLMService):
- Nova Sonic (the older model): "us-east-1", "ap-northeast-1"
model: Model identifier. Defaults to "amazon.nova-2-sonic-v1:0".
.. deprecated::
.. deprecated:: 0.0.105
Use ``settings=AWSNovaSonicLLMSettings(model=...)`` instead.
voice_id: Voice ID for speech synthesis.
@@ -289,7 +289,7 @@ class AWSNovaSonicLLMService(LLMService):
- Nova 2 Sonic (the default model): see https://docs.aws.amazon.com/nova/latest/nova2-userguide/sonic-language-support.html
- Nova Sonic (the older model): see https://docs.aws.amazon.com/nova/latest/userguide/available-voices.html.
.. deprecated::
.. deprecated:: 0.0.105
Use ``settings=AWSNovaSonicLLMSettings(voice=...)`` instead.
params: Model parameters for audio configuration and inference.

View File

@@ -47,7 +47,7 @@ except ModuleNotFoundError as e:
@dataclass
class AWSTranscribeSTTSettings(STTSettings):
"""Settings for the AWS Transcribe STT service."""
"""Settings for AWSTranscribeSTTService."""
pass
@@ -99,13 +99,13 @@ class AWSTranscribeSTTService(WebsocketSTTService):
# 1. Initialize default_settings with hardcoded defaults
default_settings = AWSTranscribeSTTSettings(
model=None,
language=self.language_to_service_language(Language.EN) or "en-US",
language=self.language_to_service_language(Language.EN),
)
# 2. Apply direct init arg overrides (deprecated)
if language is not None:
_warn_deprecated_param("language", AWSTranscribeSTTSettings, "language")
default_settings.language = self.language_to_service_language(language) or "en-US"
default_settings.language = self.language_to_service_language(language)
# 3. No params to apply

View File

@@ -53,7 +53,7 @@ except ModuleNotFoundError as e:
@dataclass
class AzureSTTSettings(STTSettings):
"""Settings for the Azure STT service."""
"""Settings for AzureSTTService."""
pass

View File

@@ -46,7 +46,7 @@ except ModuleNotFoundError as e:
@dataclass
class CartesiaSTTSettings(STTSettings):
"""Settings for the Cartesia STT service."""
"""Settings for CartesiaSTTService."""
pass

View File

@@ -71,7 +71,7 @@ class FluxEventType(str, Enum):
@dataclass
class DeepgramFluxSTTSettings(STTSettings):
"""Settings for the Deepgram Flux STT service.
"""Settings for DeepgramFluxSTTService.
Parameters:
eager_eot_threshold: EagerEndOfTurn/TurnResumed threshold. Off by default.
@@ -81,7 +81,6 @@ class DeepgramFluxSTTSettings(STTSettings):
eot_timeout_ms: Time in ms after speech to finish a turn regardless of EOT
confidence (default 5000).
keyterm: Keyterms to boost recognition accuracy for specialized terminology.
tag: Tags to label requests for identification during usage reporting.
min_confidence: Minimum confidence required to create a TranscriptionFrame.
"""
@@ -89,7 +88,6 @@ class DeepgramFluxSTTSettings(STTSettings):
eot_threshold: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
eot_timeout_ms: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
keyterm: list | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
tag: list | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
min_confidence: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
@@ -157,6 +155,7 @@ class DeepgramFluxSTTService(WebsocketSTTService):
mip_opt_out: Optional[bool] = None,
model: Optional[str] = None,
flux_encoding: str = "linear16",
tag: Optional[list] = None,
params: Optional[InputParams] = None,
should_interrupt: bool = True,
settings: Optional[DeepgramFluxSTTSettings] = None,
@@ -177,6 +176,7 @@ class DeepgramFluxSTTService(WebsocketSTTService):
flux_encoding: Audio encoding format required by Flux API. Must be "linear16".
Raw signed little-endian 16-bit PCM encoding.
tag: Tags to label requests for identification during usage reporting.
params: InputParams instance containing detailed API configuration options.
.. deprecated:: 0.0.105
@@ -224,7 +224,6 @@ class DeepgramFluxSTTService(WebsocketSTTService):
eot_threshold=None,
eot_timeout_ms=None,
keyterm=[],
tag=[],
min_confidence=None,
)
@@ -241,7 +240,8 @@ class DeepgramFluxSTTService(WebsocketSTTService):
default_settings.eot_threshold = params.eot_threshold
default_settings.eot_timeout_ms = params.eot_timeout_ms
default_settings.keyterm = params.keyterm or []
default_settings.tag = params.tag or []
if params.tag and tag is None:
tag = params.tag
default_settings.min_confidence = params.min_confidence
if params.mip_opt_out is not None:
mip_opt_out = params.mip_opt_out
@@ -261,6 +261,7 @@ class DeepgramFluxSTTService(WebsocketSTTService):
self._should_interrupt = should_interrupt
self._encoding = flux_encoding
self._mip_opt_out = mip_opt_out
self._tag = tag or []
self._websocket_url = None
self._receive_task = None
@@ -469,7 +470,7 @@ class DeepgramFluxSTTService(WebsocketSTTService):
url_params.append(urlencode({"keyterm": keyterm}))
# Add tag parameters (can have multiple)
for tag_value in self._settings.tag:
for tag_value in self._tag:
url_params.append(urlencode({"tag": tag_value}))
self._websocket_url = f"{self._url}?{'&'.join(url_params)}"

View File

@@ -177,7 +177,7 @@ class LiveOptions:
@dataclass
class DeepgramSTTSettings(STTSettings):
"""Settings for Deepgram STT services.
"""Settings for DeepgramSTTService.
``model`` and ``language`` are inherited from ``STTSettings`` /
``ServiceSettings``. Additional Deepgram connection params may

View File

@@ -179,19 +179,19 @@ class CommitStrategy(str, Enum):
@dataclass
class ElevenLabsSTTSettings(STTSettings):
"""Settings for the ElevenLabs file-based STT service.
"""Settings for ElevenLabsSTTService.
Parameters:
tag_audio_events: Whether to include audio events like (laughter),
(coughing) in the transcription.
"""
tag_audio_events: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
tag_audio_events: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
@dataclass
class ElevenLabsRealtimeSTTSettings(STTSettings):
"""Settings for the ElevenLabs Realtime STT service.
"""Settings for ElevenLabsRealtimeSTTService.
See ``ElevenLabsRealtimeSTTService.InputParams`` for detailed descriptions.
@@ -277,8 +277,8 @@ class ElevenLabsSTTService(SegmentedSTTService):
# 1. Initialize default_settings with hardcoded defaults
default_settings = ElevenLabsSTTSettings(
model="scribe_v2",
language="eng",
tag_audio_events=True,
language=language_to_elevenlabs_language(Language.EN),
tag_audio_events=None,
)
# 2. Apply direct init arg overrides (deprecated)
@@ -291,9 +291,7 @@ class ElevenLabsSTTService(SegmentedSTTService):
_warn_deprecated_param("params", ElevenLabsSTTSettings)
if not settings:
if params.language is not None:
default_settings.language = (
self.language_to_service_language(params.language) or "eng"
)
default_settings.language = language_to_elevenlabs_language(params.language)
default_settings.tag_audio_events = params.tag_audio_events
# 4. Apply settings delta (canonical API, always wins)
@@ -354,10 +352,11 @@ class ElevenLabsSTTService(SegmentedSTTService):
content_type="audio/x-wav",
)
# Add required model_id, language_code, and tag_audio_events
# Add required model_id and language_code
data.add_field("model_id", self._settings.model)
data.add_field("language_code", self._settings.language)
data.add_field("tag_audio_events", str(self._settings.tag_audio_events).lower())
if self._settings.tag_audio_events is not None:
data.add_field("tag_audio_events", str(self._settings.tag_audio_events).lower())
async with self._session.post(url, data=data, headers=headers) as response:
if response.status != 200:

View File

@@ -143,7 +143,7 @@ def language_to_fal_language(language: Language) -> Optional[str]:
@dataclass
class FalSTTSettings(STTSettings):
"""Settings for the Fal Wizper STT service."""
"""Settings for FalSTTService."""
pass
@@ -215,7 +215,7 @@ class FalSTTService(SegmentedSTTService):
# 1. Initialize default_settings with hardcoded defaults
default_settings = FalSTTSettings(
model=None,
language=language_to_fal_language(Language.EN) or "en",
language=language_to_fal_language(Language.EN),
)
# 2. (no deprecated direct args for this service)
@@ -224,9 +224,8 @@ class FalSTTService(SegmentedSTTService):
if params is not None:
_warn_deprecated_param("params", FalSTTSettings)
if not settings:
default_settings.language = (
language_to_fal_language(params.language) if params.language else "en"
)
if params.language is not None:
default_settings.language = language_to_fal_language(params.language)
if params.task != "transcribe":
task = params.task
if params.chunk_level != "segment":

View File

@@ -188,7 +188,7 @@ class _InputParamsDescriptor:
@dataclass
class GladiaSTTSettings(STTSettings):
"""Settings for Gladia STT service.
"""Settings for GladiaSTTService.
Parameters:
language_config: Language detection and handling configuration.

View File

@@ -552,7 +552,7 @@ class ContextWindowCompressionParams(BaseModel):
class InputParams(BaseModel):
"""Input parameters for Gemini Live generation.
.. deprecated::
.. deprecated:: 0.0.105
Use ``GeminiLiveLLMSettings`` instead.
Parameters:
@@ -678,7 +678,7 @@ class GeminiLiveLLMService(LLMService):
model: Model identifier to use.
.. deprecated::
.. deprecated:: 0.0.105
Use ``settings=GeminiLiveLLMSettings(model=...)`` instead.
voice_id: TTS voice identifier. Defaults to "Charon".
@@ -691,7 +691,7 @@ class GeminiLiveLLMService(LLMService):
tools: Tools/functions available to the model. Defaults to None.
params: Configuration parameters for the model.
.. deprecated::
.. deprecated:: 0.0.105
Use ``settings=GeminiLiveLLMSettings(...)`` instead.
settings: Gemini Live LLM settings. If provided together with deprecated

View File

@@ -88,7 +88,7 @@ class GeminiLiveVertexLLMService(GeminiLiveLLMService):
project_id: Google Cloud project ID.
model: Model identifier to use.
.. deprecated::
.. deprecated:: 0.0.105
Use ``settings=GeminiLiveLLMSettings(model=...)`` instead.
voice_id: TTS voice identifier. Defaults to "Charon".
@@ -102,7 +102,7 @@ class GeminiLiveVertexLLMService(GeminiLiveLLMService):
params: Configuration parameters for the model along with Vertex AI
location and project ID.
.. deprecated::
.. deprecated:: 0.0.105
Use ``settings=GeminiLiveLLMSettings(...)`` instead.
settings: Gemini Live LLM settings. If provided together with deprecated

View File

@@ -754,7 +754,7 @@ class GoogleLLMService(LLMService):
class InputParams(BaseModel):
"""Input parameters for Google AI models.
.. deprecated::
.. deprecated:: 0.0.105
Use ``settings=GoogleLLMSettings(...)`` instead.
Parameters:
@@ -797,12 +797,12 @@ class GoogleLLMService(LLMService):
api_key: Google AI API key for authentication.
model: Model name to use.
.. deprecated::
.. deprecated:: 0.0.105
Use ``settings=GoogleLLMSettings(model=...)`` instead.
params: Optional model parameters for inference.
.. deprecated::
.. deprecated:: 0.0.105
Use ``settings=GoogleLLMSettings(...)`` instead.
settings: Runtime-updatable settings for this service. When both

View File

@@ -128,14 +128,14 @@ class GoogleVertexLLMService(GoogleLLMService):
credentials_path: Path to the service account JSON file.
model: Model identifier (e.g., "gemini-2.5-flash").
.. deprecated::
.. deprecated:: 0.0.105
Use ``settings=GoogleLLMSettings(model=...)`` instead.
location: GCP region for Vertex AI endpoint (e.g., "us-east4").
project_id: Google Cloud project ID.
params: Input parameters for the model.
.. deprecated::
.. deprecated:: 0.0.105
Use ``settings=GoogleLLMSettings(...)`` instead.
settings: Runtime-updatable settings for this service. When both

View File

@@ -360,7 +360,7 @@ def language_to_google_stt_language(language: Language) -> Optional[str]:
@dataclass
class GoogleSTTSettings(STTSettings):
"""Settings for Google Cloud Speech-to-Text V2.
"""Settings for GoogleSTTService.
Parameters:
languages: List of ``Language`` enums for recognition
@@ -653,7 +653,7 @@ class GoogleSTTService(STTService):
async def set_languages(self, languages: List[Language]):
"""Update the service's recognition languages.
.. deprecated::
.. deprecated:: 0.0.104
Use ``STTUpdateSettingsFrame`` with ``GoogleSTTSettings(languages=...)``
instead.

View File

@@ -482,7 +482,7 @@ def language_to_gemini_tts_language(language: Language) -> Optional[str]:
@dataclass
class GoogleHttpTTSSettings(TTSSettings):
"""Settings for Google HTTP TTS service.
"""Settings for GoogleHttpTTSService.
Parameters:
pitch: Voice pitch adjustment (e.g., "+2st", "-50%").
@@ -512,8 +512,8 @@ class GoogleHttpTTSSettings(TTSSettings):
@dataclass
class GoogleStreamTTSSettings(TTSSettings):
"""Settings for Google streaming TTS service.
class GoogleTTSSettings(TTSSettings):
"""Settings for GoogleTTSService.
Parameters:
speaking_rate: The speaking rate, in the range [0.25, 2.0].
@@ -522,9 +522,14 @@ class GoogleStreamTTSSettings(TTSSettings):
speaking_rate: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
#: .. deprecated:: 0.0.105
#: Use ``GoogleTTSSettings`` instead.
GoogleStreamTTSSettings = GoogleTTSSettings
@dataclass
class GeminiTTSSettings(TTSSettings):
"""Settings for Gemini TTS service.
"""Settings for GeminiTTSService.
Parameters:
prompt: Optional style instructions for how to synthesize the content.
@@ -619,6 +624,13 @@ class GoogleHttpTTSService(TTSService):
model=None,
voice="en-US-Chirp3-HD-Charon",
language="en-US",
pitch=None,
rate=None,
speaking_rate=None,
volume=None,
emphasis=None,
gender=None,
google_style=None,
)
# 2. Apply direct init arg overrides (deprecated)
@@ -1008,13 +1020,13 @@ class GoogleTTSService(GoogleBaseTTSService):
)
"""
_settings: GoogleStreamTTSSettings
_settings: GoogleTTSSettings
class InputParams(BaseModel):
"""Input parameters for Google streaming TTS configuration.
.. deprecated:: 0.0.105
Use ``GoogleStreamTTSSettings`` directly via the ``settings`` parameter instead.
Use ``GoogleTTSSettings`` directly via the ``settings`` parameter instead.
Parameters:
language: Language for synthesis. Defaults to English.
@@ -1034,7 +1046,7 @@ class GoogleTTSService(GoogleBaseTTSService):
voice_cloning_key: Optional[str] = None,
sample_rate: Optional[int] = None,
params: Optional[InputParams] = None,
settings: Optional[GoogleStreamTTSSettings] = None,
settings: Optional[GoogleTTSSettings] = None,
**kwargs,
):
"""Initializes the Google streaming TTS service.
@@ -1046,34 +1058,35 @@ class GoogleTTSService(GoogleBaseTTSService):
voice_id: Google TTS voice identifier (e.g., "en-US-Chirp3-HD-Charon").
.. deprecated:: 0.0.105
Use ``settings=GoogleStreamTTSSettings(voice=...)`` instead.
Use ``settings=GoogleTTSSettings(voice=...)`` instead.
voice_cloning_key: The voice cloning key for Chirp 3 custom voices.
sample_rate: Audio sample rate in Hz. If None, uses default.
params: Language configuration parameters.
.. deprecated:: 0.0.105
Use ``settings=GoogleStreamTTSSettings(...)`` instead.
Use ``settings=GoogleTTSSettings(...)`` instead.
settings: Runtime-updatable settings. When provided alongside deprecated
parameters, ``settings`` values take precedence.
**kwargs: Additional arguments passed to parent TTSService.
"""
# 1. Initialize default_settings with hardcoded defaults
default_settings = GoogleStreamTTSSettings(
default_settings = GoogleTTSSettings(
model=None,
voice="en-US-Chirp3-HD-Charon",
language="en-US",
speaking_rate=None,
)
# 2. Apply direct init arg overrides (deprecated)
if voice_id is not None:
_warn_deprecated_param("voice_id", GoogleStreamTTSSettings, "voice")
_warn_deprecated_param("voice_id", GoogleTTSSettings, "voice")
default_settings.voice = voice_id
# 3. Apply params overrides — only if settings not provided
if params is not None:
_warn_deprecated_param("params", GoogleStreamTTSSettings)
_warn_deprecated_param("params", GoogleTTSSettings)
if not settings:
if params.language is not None:
default_settings.language = self.language_to_service_language(params.language)
@@ -1104,7 +1117,7 @@ class GoogleTTSService(GoogleBaseTTSService):
Args:
delta: Settings delta. Can include 'speaking_rate' (float).
"""
if isinstance(delta, GoogleStreamTTSSettings) and is_given(delta.speaking_rate):
if isinstance(delta, GoogleTTSSettings) and is_given(delta.speaking_rate):
rate_value = float(delta.speaking_rate)
if not (0.25 <= rate_value <= 2.0):
logger.warning(
@@ -1308,6 +1321,9 @@ class GeminiTTSService(GoogleBaseTTSService):
model="gemini-2.5-flash-tts",
voice="Kore",
language="en-US",
prompt=None,
multi_speaker=False,
speaker_configs=None,
)
# 2. Apply direct init arg overrides (deprecated)

View File

@@ -68,7 +68,7 @@ def language_to_gradium_language(language: Language) -> Optional[str]:
@dataclass
class GradiumSTTSettings(STTSettings):
"""Settings for the Gradium STT service."""
"""Settings for GradiumSTTService."""
pass

View File

@@ -93,14 +93,14 @@ def language_to_nvidia_riva_language(language: Language) -> Optional[str]:
@dataclass
class NvidiaSTTSettings(STTSettings):
"""Settings for the NVIDIA Riva streaming STT service."""
"""Settings for NvidiaSTTService."""
pass
@dataclass
class NvidiaSegmentedSTTSettings(STTSettings):
"""Settings for the NVIDIA Riva segmented STT service.
"""Settings for NvidiaSegmentedSTTService.
Parameters:
profanity_filter: Whether to filter profanity from results.

View File

@@ -182,7 +182,7 @@ _OPENAI_SAMPLE_RATE = 24000
@dataclass
class OpenAIRealtimeSTTSettings(STTSettings):
"""Settings for the OpenAI Realtime STT service.
"""Settings for OpenAIRealtimeSTTService.
Parameters:
prompt: Optional prompt text to guide transcription style.

View File

@@ -134,7 +134,7 @@ class OpenAIRealtimeBetaLLMService(LLMService):
api_key: OpenAI API key for authentication.
model: OpenAI model name.
.. deprecated::
.. deprecated:: 0.0.105
Use ``settings=OpenAIRealtimeBetaLLMSettings(model=...)`` instead.
base_url: WebSocket base URL for the realtime API.

View File

@@ -139,7 +139,7 @@ MODEL_CONFIGS: Dict[str, ModelConfig] = {
@dataclass
class SarvamSTTSettings(STTSettings):
"""Settings for the Sarvam STT service.
"""Settings for SarvamSTTService.
Parameters:
prompt: Optional prompt to guide transcription/translation style/context.
@@ -414,7 +414,7 @@ class SarvamSTTService(STTService):
async def set_prompt(self, prompt: Optional[str]):
"""Set the transcription/translation prompt and reconnect.
.. deprecated::
.. deprecated:: 0.0.104
Use ``STTUpdateSettingsFrame(SarvamSTTSettings(prompt=...))`` instead.
Args:

View File

@@ -141,7 +141,7 @@ def _prepare_language_hints(
@dataclass
class SonioxSTTSettings(STTSettings):
"""Settings for Soniox STT service.
"""Settings for SonioxSTTService.
Parameters:
language_hints: List of language hints to use for transcription.

View File

@@ -85,12 +85,11 @@ class TurnDetectionMode(str, Enum):
@dataclass
class SpeechmaticsSTTSettings(STTSettings):
"""Settings for Speechmatics STT service.
"""Settings for SpeechmaticsSTTService.
See ``SpeechmaticsSTTService.InputParams`` for detailed descriptions of each field.
Parameters:
model: The operating point / model name.
domain: Domain for Speechmatics API.
turn_detection_mode: Endpoint handling mode.
speaker_active_format: Formatter for active speaker ID.
@@ -490,16 +489,16 @@ class SpeechmaticsSTTService(STTService):
default_settings.prefer_current_speaker = _params.prefer_current_speaker
default_settings.extra_params = _params.extra_params
# Build SDK config from settings, then resolve model from operating_point
# --- 4. Settings delta (canonical API, always wins) ---
if settings is not None:
default_settings.apply_update(settings)
# Build SDK config from settings, set model name before calling super
self._client: VoiceAgentClient | None = None
self._audio_encoding = encoding
self._config: VoiceAgentConfig = self._build_config(default_settings)
default_settings.model = self._config.operating_point.value
# --- 4. Settings delta (canonical API, always wins) ---
if settings is not None:
default_settings.apply_update(settings)
super().__init__(
sample_rate=sample_rate,
ttfs_p99_latency=ttfs_p99_latency,

View File

@@ -28,7 +28,7 @@ from pipecat.utils.tracing.service_decorators import traced_stt
@dataclass
class BaseWhisperSTTSettings(STTSettings):
"""Settings for Whisper API-based STT services.
"""Settings for BaseWhisperSTTService.
Parameters:
prompt: Optional text to guide the model's style or continue

View File

@@ -176,7 +176,7 @@ def language_to_whisper_language(language: Language) -> Optional[str]:
@dataclass
class WhisperSTTSettings(STTSettings):
"""Settings for the local Whisper (Faster Whisper) STT service.
"""Settings for WhisperSTTService.
Parameters:
no_speech_prob: Probability threshold for filtering non-speech segments.
@@ -187,7 +187,7 @@ class WhisperSTTSettings(STTSettings):
@dataclass
class WhisperMLXSTTSettings(STTSettings):
"""Settings for the MLX Whisper STT service.
"""Settings for WhisperMLXSTTService.
Parameters:
no_speech_prob: Probability threshold for filtering non-speech segments.