From 3b9a8946f9e078d99fbc987065e955de69e7f695 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Thu, 10 Apr 2025 08:17:52 -0400 Subject: [PATCH] Update GeminiMultimodalLiveLLMService base_url --- CHANGELOG.md | 6 ++++++ .../services/gemini_multimodal_live/gemini.py | 13 +++++-------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 416a0bddb..033db7a1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 type was incorrectly handled as a codec retransmission. - Avoid initial video delays. +### Changed + +- Updated `GeminiMultimodalLiveLLMService`’s default `model` to + `models/gemini-2.0-flash-live-001` and `base_url` to the `v1beta` websocket + URL. + ### Fixed - Fixed an issue in the Azure TTS services where the language was being set diff --git a/src/pipecat/services/gemini_multimodal_live/gemini.py b/src/pipecat/services/gemini_multimodal_live/gemini.py index 6ee545561..7b8524d91 100644 --- a/src/pipecat/services/gemini_multimodal_live/gemini.py +++ b/src/pipecat/services/gemini_multimodal_live/gemini.py @@ -164,7 +164,7 @@ class GeminiMultimodalLiveLLMService(LLMService): self, *, api_key: str, - base_url: str = "", + base_url: str = "generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent", model="models/gemini-2.0-flash-live-001", voice_id: str = "Charon", start_audio_paused: bool = False, @@ -179,11 +179,8 @@ class GeminiMultimodalLiveLLMService(LLMService): ): super().__init__(base_url=base_url, **kwargs) self._last_sent_time = 0 - self.api_key = api_key - if base_url: - self.base_url = base_url - else: - self.base_url = "generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent" + self._api_key = api_key + self._base_url = base_url self.set_model_name(model) self._voice_id = voice_id @@ -410,8 +407,8 @@ class GeminiMultimodalLiveLLMService(LLMService): logger.info("Connecting to Gemini service") try: - logger.info(f"Connecting to wss://{self.base_url}") - uri = f"wss://{self.base_url}?key={self.api_key}" + logger.info(f"Connecting to wss://{self._base_url}") + uri = f"wss://{self._base_url}?key={self._api_key}" self._websocket = await websockets.connect(uri=uri) self._receive_task = self.create_task(self._receive_task_handler()) self._transcribe_audio_task = self.create_task(self._transcribe_audio_handler())