Add ability to update options

This commit is contained in:
Mark Backman
2025-02-10 16:57:44 -05:00
parent ce0358804b
commit a9c2197dc6
2 changed files with 101 additions and 2 deletions

View File

@@ -57,7 +57,7 @@ pip install "pipecat-ai[option,...]"
| Category | Services | Install Command Example |
| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- |
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) | `pip install "pipecat-ai[deepgram]"` |
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) | `pip install "pipecat-ai[deepgram]"` |
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Together AI](https://docs.pipecat.ai/server/services/llm/together) | `pip install "pipecat-ai[openai]"` |
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) | `pip install "pipecat-ai[cartesia]"` |
| Speech-to-Speech | [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) | `pip install "pipecat-ai[google]"` |

View File

@@ -1536,6 +1536,7 @@ class GoogleSTTService(STTService):
await self._connect()
async def set_model(self, model: str):
"""Update the service's recognition model."""
await super().set_model(model)
self._settings["model"] = model
# Recreate stream with new model
@@ -1555,6 +1556,104 @@ class GoogleSTTService(STTService):
await super().cancel(frame)
await self._disconnect()
async def update_options(
self,
*,
language: Optional[Language] = None,
model: Optional[str] = None,
enable_automatic_punctuation: Optional[bool] = None,
enable_spoken_punctuation: Optional[bool] = None,
enable_spoken_emojis: Optional[bool] = None,
profanity_filter: Optional[bool] = None,
enable_word_time_offsets: Optional[bool] = None,
enable_word_confidence: Optional[bool] = None,
enable_interim_results: Optional[bool] = None,
enable_voice_activity_events: Optional[bool] = None,
location: Optional[str] = None,
) -> None:
"""Update service options dynamically.
Args:
language: New recognition language.
model: New recognition model.
enable_automatic_punctuation: Enable/disable automatic punctuation.
enable_spoken_punctuation: Enable/disable spoken punctuation.
enable_spoken_emojis: Enable/disable spoken emojis.
profanity_filter: Enable/disable profanity filter.
enable_word_time_offsets: Enable/disable word timing info.
enable_word_confidence: Enable/disable word confidence scores.
enable_interim_results: Enable/disable interim results.
enable_voice_activity_events: Enable/disable voice activity detection.
location: New Google Cloud location.
Note:
Changes that affect the streaming configuration will cause
the stream to be reconnected.
"""
needs_reconnect = False
# Update settings with new values
if language is not None:
logger.debug(f"Updating language to: {language}")
self._settings["language_code"] = self.language_to_service_language(language)
needs_reconnect = True
if model is not None:
logger.debug(f"Updating model to: {model}")
self._settings["model"] = model
needs_reconnect = True
if enable_automatic_punctuation is not None:
logger.debug(f"Updating automatic punctuation to: {enable_automatic_punctuation}")
self._settings["enable_automatic_punctuation"] = enable_automatic_punctuation
needs_reconnect = True
if enable_spoken_punctuation is not None:
logger.debug(f"Updating spoken punctuation to: {enable_spoken_punctuation}")
self._settings["enable_spoken_punctuation"] = enable_spoken_punctuation
needs_reconnect = True
if enable_spoken_emojis is not None:
logger.debug(f"Updating spoken emojis to: {enable_spoken_emojis}")
self._settings["enable_spoken_emojis"] = enable_spoken_emojis
needs_reconnect = True
if profanity_filter is not None:
logger.debug(f"Updating profanity filter to: {profanity_filter}")
self._settings["profanity_filter"] = profanity_filter
needs_reconnect = True
if enable_word_time_offsets is not None:
logger.debug(f"Updating word time offsets to: {enable_word_time_offsets}")
self._settings["enable_word_time_offsets"] = enable_word_time_offsets
needs_reconnect = True
if enable_word_confidence is not None:
logger.debug(f"Updating word confidence to: {enable_word_confidence}")
self._settings["enable_word_confidence"] = enable_word_confidence
needs_reconnect = True
if enable_interim_results is not None:
logger.debug(f"Updating interim results to: {enable_interim_results}")
self._settings["enable_interim_results"] = enable_interim_results
needs_reconnect = True
if enable_voice_activity_events is not None:
logger.debug(f"Updating voice activity events to: {enable_voice_activity_events}")
self._settings["enable_voice_activity_events"] = enable_voice_activity_events
needs_reconnect = True
if location is not None:
logger.debug(f"Updating location to: {location}")
self._location = location
needs_reconnect = True
# Reconnect the stream if necessary
if needs_reconnect and self._streaming_task:
logger.debug("Reconnecting stream due to configuration changes")
await self._disconnect()
await self._connect()
async def _connect(self):
"""Initialize streaming recognition config and stream."""
logger.debug("Connecting to Google Speech-to-Text")
@@ -1605,7 +1704,7 @@ class GoogleSTTService(STTService):
async def _request_generator(self):
"""Generates requests for the streaming recognize method."""
recognizer_path = f"projects/{self._project_id}/locations/{self._location}/recognizers/_"
logger.debug(f"Using recognizer path: {recognizer_path}")
logger.trace(f"Using recognizer path: {recognizer_path}")
try:
# First, send the recognition config