Merge pull request #3228 from pipecat-ai/mb/gemini-live-update

Update GeminiLiveLLMService model to gemini-2.5-flash-native-audio-pr…
2025-12-12 14:32:45 -05:00
parent 22288648e6 6cda357ce8
commit 5f6448a8a4
4 changed files with 11 additions and 10 deletions
--- a/changelog/3228.changed.md
+++ b/changelog/3228.changed.md
@@ -0,0 +1,4 @@
+- Updated default models for:
+
+  - `GeminiLiveLLMService` to `gemini-2.5-flash-native-audio-preview-12-2025`.
+  - `GeminiLiveVertexLLMService` to `gemini-live-2.5-flash-native-audio`.
--- a/src/pipecat/services/google/gemini_live/llm.py
+++ b/src/pipecat/services/google/gemini_live/llm.py
@@ -563,18 +563,18 @@ class InputParams(BaseModel):
        context_window_compression: Context compression settings. Defaults to None.
        thinking: Thinking settings. Defaults to None.
            Note that these settings may require specifying a model that
-            supports them, e.g. "gemini-2.5-flash-native-audio-preview-09-2025".
+            supports them, e.g. "gemini-2.5-flash-native-audio-preview-12-2025".
        enable_affective_dialog: Enable affective dialog, which allows Gemini
            to adapt to expression and tone. Defaults to None.
            Note that these settings may require specifying a model that
-            supports them, e.g. "gemini-2.5-flash-native-audio-preview-09-2025".
+            supports them, e.g. "gemini-2.5-flash-native-audio-preview-12-2025".
            Also note that this setting may require specifying an API version that
            supports it, e.g. HttpOptions(api_version="v1alpha").
        proactivity: Proactivity settings, which allows Gemini to proactively
            decide how to behave, such as whether to avoid responding to
            content that is not relevant. Defaults to None.
            Note that these settings may require specifying a model that
-            supports them, e.g. "gemini-2.5-flash-native-audio-preview-09-2025".
+            supports them, e.g. "gemini-2.5-flash-native-audio-preview-12-2025".
            Also note that this setting may require specifying an API version that
            supports it, e.g. HttpOptions(api_version="v1alpha").
        extra: Additional parameters. Defaults to empty dict.
@@ -615,7 +615,7 @@ class GeminiLiveLLMService(LLMService):
        *,
        api_key: str,
        base_url: Optional[str] = None,
-        model="models/gemini-2.0-flash-live-001",
+        model="models/gemini-2.5-flash-native-audio-preview-12-2025",
        voice_id: str = "Charon",
        start_audio_paused: bool = False,
        start_video_paused: bool = False,
@@ -638,7 +638,7 @@ class GeminiLiveLLMService(LLMService):
                    Please use `http_options` to customize requests made by the
                    API client.

-            model: Model identifier to use. Defaults to "models/gemini-2.0-flash-live-001".
+            model: Model identifier to use. Defaults to "models/gemini-2.5-flash-native-audio-preview-12-2025".
            voice_id: TTS voice identifier. Defaults to "Charon".
            start_audio_paused: Whether to start with audio input paused. Defaults to False.
            start_video_paused: Whether to start with video input paused. Defaults to False.
--- a/src/pipecat/services/google/gemini_live/llm_vertex.py
+++ b/src/pipecat/services/google/gemini_live/llm_vertex.py
@@ -51,7 +51,7 @@ class GeminiLiveVertexLLMService(GeminiLiveLLMService):
        credentials_path: Optional[str] = None,
        location: str,
        project_id: str,
-        model="google/gemini-2.0-flash-live-preview-04-09",
+        model="google/gemini-live-2.5-flash-native-audio",
        voice_id: str = "Charon",
        start_audio_paused: bool = False,
        start_video_paused: bool = False,
@@ -70,7 +70,7 @@ class GeminiLiveVertexLLMService(GeminiLiveLLMService):
            credentials_path: Path to the service account JSON file.
            location: GCP region for Vertex AI endpoint (e.g., "us-east4").
            project_id: Google Cloud project ID.
-            model: Model identifier to use. Defaults to "models/gemini-2.0-flash-live-preview-04-09".
+            model: Model identifier to use. Defaults to "models/gemini-live-2.5-flash-native-audio".
            voice_id: TTS voice identifier. Defaults to "Charon".
            start_audio_paused: Whether to start with audio input paused. Defaults to False.
            start_video_paused: Whether to start with video input paused. Defaults to False.
--- a/tests/test_transcript_processor.py
+++ b/tests/test_transcript_processor.py
@@ -730,9 +730,6 @@ class TestThoughtTranscription(unittest.IsolatedAsyncioTestCase):
        self.assertIsInstance(second_message, ThoughtTranscriptionMessage)
        self.assertEqual(second_message.content, "Second consideration")

-        # Verify timestamps are different
-        self.assertNotEqual(first_message.timestamp, second_message.timestamp)
-
    async def test_empty_thought_handling(self):
        """Test that empty thoughts are not emitted"""
        processor = AssistantTranscriptProcessor(process_thoughts=True)