Add ability to update options
This commit is contained in:
@@ -57,7 +57,7 @@ pip install "pipecat-ai[option,...]"
|
||||
|
||||
| Category | Services | Install Command Example |
|
||||
| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) | `pip install "pipecat-ai[deepgram]"` |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) | `pip install "pipecat-ai[deepgram]"` |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Together AI](https://docs.pipecat.ai/server/services/llm/together) | `pip install "pipecat-ai[openai]"` |
|
||||
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) | `pip install "pipecat-ai[cartesia]"` |
|
||||
| Speech-to-Speech | [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) | `pip install "pipecat-ai[google]"` |
|
||||
|
||||
@@ -1536,6 +1536,7 @@ class GoogleSTTService(STTService):
|
||||
await self._connect()
|
||||
|
||||
async def set_model(self, model: str):
|
||||
"""Update the service's recognition model."""
|
||||
await super().set_model(model)
|
||||
self._settings["model"] = model
|
||||
# Recreate stream with new model
|
||||
@@ -1555,6 +1556,104 @@ class GoogleSTTService(STTService):
|
||||
await super().cancel(frame)
|
||||
await self._disconnect()
|
||||
|
||||
async def update_options(
|
||||
self,
|
||||
*,
|
||||
language: Optional[Language] = None,
|
||||
model: Optional[str] = None,
|
||||
enable_automatic_punctuation: Optional[bool] = None,
|
||||
enable_spoken_punctuation: Optional[bool] = None,
|
||||
enable_spoken_emojis: Optional[bool] = None,
|
||||
profanity_filter: Optional[bool] = None,
|
||||
enable_word_time_offsets: Optional[bool] = None,
|
||||
enable_word_confidence: Optional[bool] = None,
|
||||
enable_interim_results: Optional[bool] = None,
|
||||
enable_voice_activity_events: Optional[bool] = None,
|
||||
location: Optional[str] = None,
|
||||
) -> None:
|
||||
"""Update service options dynamically.
|
||||
|
||||
Args:
|
||||
language: New recognition language.
|
||||
model: New recognition model.
|
||||
enable_automatic_punctuation: Enable/disable automatic punctuation.
|
||||
enable_spoken_punctuation: Enable/disable spoken punctuation.
|
||||
enable_spoken_emojis: Enable/disable spoken emojis.
|
||||
profanity_filter: Enable/disable profanity filter.
|
||||
enable_word_time_offsets: Enable/disable word timing info.
|
||||
enable_word_confidence: Enable/disable word confidence scores.
|
||||
enable_interim_results: Enable/disable interim results.
|
||||
enable_voice_activity_events: Enable/disable voice activity detection.
|
||||
location: New Google Cloud location.
|
||||
|
||||
Note:
|
||||
Changes that affect the streaming configuration will cause
|
||||
the stream to be reconnected.
|
||||
"""
|
||||
needs_reconnect = False
|
||||
|
||||
# Update settings with new values
|
||||
if language is not None:
|
||||
logger.debug(f"Updating language to: {language}")
|
||||
self._settings["language_code"] = self.language_to_service_language(language)
|
||||
needs_reconnect = True
|
||||
|
||||
if model is not None:
|
||||
logger.debug(f"Updating model to: {model}")
|
||||
self._settings["model"] = model
|
||||
needs_reconnect = True
|
||||
|
||||
if enable_automatic_punctuation is not None:
|
||||
logger.debug(f"Updating automatic punctuation to: {enable_automatic_punctuation}")
|
||||
self._settings["enable_automatic_punctuation"] = enable_automatic_punctuation
|
||||
needs_reconnect = True
|
||||
|
||||
if enable_spoken_punctuation is not None:
|
||||
logger.debug(f"Updating spoken punctuation to: {enable_spoken_punctuation}")
|
||||
self._settings["enable_spoken_punctuation"] = enable_spoken_punctuation
|
||||
needs_reconnect = True
|
||||
|
||||
if enable_spoken_emojis is not None:
|
||||
logger.debug(f"Updating spoken emojis to: {enable_spoken_emojis}")
|
||||
self._settings["enable_spoken_emojis"] = enable_spoken_emojis
|
||||
needs_reconnect = True
|
||||
|
||||
if profanity_filter is not None:
|
||||
logger.debug(f"Updating profanity filter to: {profanity_filter}")
|
||||
self._settings["profanity_filter"] = profanity_filter
|
||||
needs_reconnect = True
|
||||
|
||||
if enable_word_time_offsets is not None:
|
||||
logger.debug(f"Updating word time offsets to: {enable_word_time_offsets}")
|
||||
self._settings["enable_word_time_offsets"] = enable_word_time_offsets
|
||||
needs_reconnect = True
|
||||
|
||||
if enable_word_confidence is not None:
|
||||
logger.debug(f"Updating word confidence to: {enable_word_confidence}")
|
||||
self._settings["enable_word_confidence"] = enable_word_confidence
|
||||
needs_reconnect = True
|
||||
|
||||
if enable_interim_results is not None:
|
||||
logger.debug(f"Updating interim results to: {enable_interim_results}")
|
||||
self._settings["enable_interim_results"] = enable_interim_results
|
||||
needs_reconnect = True
|
||||
|
||||
if enable_voice_activity_events is not None:
|
||||
logger.debug(f"Updating voice activity events to: {enable_voice_activity_events}")
|
||||
self._settings["enable_voice_activity_events"] = enable_voice_activity_events
|
||||
needs_reconnect = True
|
||||
|
||||
if location is not None:
|
||||
logger.debug(f"Updating location to: {location}")
|
||||
self._location = location
|
||||
needs_reconnect = True
|
||||
|
||||
# Reconnect the stream if necessary
|
||||
if needs_reconnect and self._streaming_task:
|
||||
logger.debug("Reconnecting stream due to configuration changes")
|
||||
await self._disconnect()
|
||||
await self._connect()
|
||||
|
||||
async def _connect(self):
|
||||
"""Initialize streaming recognition config and stream."""
|
||||
logger.debug("Connecting to Google Speech-to-Text")
|
||||
@@ -1605,7 +1704,7 @@ class GoogleSTTService(STTService):
|
||||
async def _request_generator(self):
|
||||
"""Generates requests for the streaming recognize method."""
|
||||
recognizer_path = f"projects/{self._project_id}/locations/{self._location}/recognizers/_"
|
||||
logger.debug(f"Using recognizer path: {recognizer_path}")
|
||||
logger.trace(f"Using recognizer path: {recognizer_path}")
|
||||
|
||||
try:
|
||||
# First, send the recognition config
|
||||
|
||||
Reference in New Issue
Block a user