Merge pull request #1149 from pipecat-ai/mb/update-google-default-llm-model

Use gemini-2.0-flash-001 as the default model for GoogleLLMService
2025-02-06 12:41:13 -05:00
parent 0efa2711ff 4904f52cee
commit 428e763814
8 changed files with 14 additions and 24 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
  and should be set manually from the serializer constructor if a different
  value is needed.

+### Changed
+
+- Use `gemini-2.0-flash-001` as the default model for `GoogleLLMSerivce`.
+
 ## [0.0.55] - 2025-02-05

 ### Added
--- a/examples/foundational/07s-interruptible-google-audio-in.py
+++ b/examples/foundational/07s-interruptible-google-audio-in.py
@@ -216,11 +216,7 @@ async def main():
            voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22",  # British Lady
        )

-        llm = GoogleLLMService(
-            model="gemini-1.5-flash-latest",
-            # model="gemini-exp-1114",
-            api_key=os.getenv("GOOGLE_API_KEY"),
-        )
+        llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"), model="gemini-2.0-flash-001")

        messages = [
            {
--- a/examples/foundational/12a-describe-video-gemini-flash.py
+++ b/examples/foundational/12a-describe-video-gemini-flash.py
@@ -72,9 +72,7 @@ async def main():

        vision_aggregator = VisionImageFrameAggregator()

-        google = GoogleLLMService(
-            model="gemini-1.5-flash-latest", api_key=os.getenv("GOOGLE_API_KEY")
-        )
+        google = GoogleLLMService(model="gemini-2.0-flash-001", api_key=os.getenv("GOOGLE_API_KEY"))

        tts = CartesiaTTSService(
            api_key=os.getenv("CARTESIA_API_KEY"),
--- a/examples/foundational/14e-function-calling-gemini.py
+++ b/examples/foundational/14e-function-calling-gemini.py
@@ -62,11 +62,7 @@ async def main():
            voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22",  # British Lady
        )

-        llm = GoogleLLMService(
-            model="gemini-1.5-flash-latest",
-            # model="gemini-exp-1114",
-            api_key=os.getenv("GOOGLE_API_KEY"),
-        )
+        llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"), model="gemini-2.0-flash-001")
        llm.register_function("get_weather", get_weather)
        llm.register_function("get_image", get_image)

--- a/examples/foundational/20d-persistent-context-gemini.py
+++ b/examples/foundational/20d-persistent-context-gemini.py
@@ -237,7 +237,7 @@ async def main():
            voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22",  # British Lady
        )

-        llm = GoogleLLMService(model="gemini-1.5-flash-latest", api_key=os.getenv("GOOGLE_API_KEY"))
+        llm = GoogleLLMService(model="gemini-2.0-flash-001", api_key=os.getenv("GOOGLE_API_KEY"))

        # you can either register a single function for all function calls, or specific functions
        # llm.register_function(None, fetch_weather_from_api)
--- a/examples/foundational/22d-natural-conversation-gemini-audio.py
+++ b/examples/foundational/22d-natural-conversation-gemini-audio.py
@@ -55,13 +55,9 @@ load_dotenv(override=True)
 logger.remove(0)
 logger.add(sys.stderr, level="DEBUG")

-# TRANSCRIBER_MODEL = "gemini-1.5-flash-latest"
-# CLASSIFIER_MODEL = "gemini-1.5-flash-latest"
-# CONVERSATION_MODEL = "gemini-1.5-flash-latest"
-
-TRANSCRIBER_MODEL = "gemini-2.0-flash-exp"
-CLASSIFIER_MODEL = "gemini-2.0-flash-exp"
-CONVERSATION_MODEL = "gemini-2.0-flash-exp"
+TRANSCRIBER_MODEL = "gemini-2.0-flash-001"
+CLASSIFIER_MODEL = "gemini-2.0-flash-001"
+CONVERSATION_MODEL = "gemini-2.0-flash-001"

 transcriber_system_instruction = """You are an audio transcriber. You are receiving audio from a user. Your job is to
 transcribe the input audio to text exactly as it was said by the user.
--- a/examples/foundational/25-google-audio-in.py
+++ b/examples/foundational/25-google-audio-in.py
@@ -292,7 +292,7 @@ async def main():

        conversation_llm = GoogleLLMService(
            name="Conversation",
-            model="gemini-1.5-flash-latest",
+            model="gemini-2.0-flash-001",
            # model="gemini-exp-1121",
            api_key=os.getenv("GOOGLE_API_KEY"),
            # we can give the GoogleLLMService a system instruction to use directly
@@ -303,7 +303,7 @@ async def main():

        input_transcription_llm = GoogleLLMService(
            name="Transcription",
-            model="gemini-1.5-flash-latest",
+            model="gemini-2.0-flash-001",
            # model="gemini-exp-1121",
            api_key=os.getenv("GOOGLE_API_KEY"),
            system_instruction=transcriber_system_message,
--- a/src/pipecat/services/google/google.py
+++ b/src/pipecat/services/google/google.py
@@ -639,7 +639,7 @@ class GoogleLLMService(LLMService):
        self,
        *,
        api_key: str,
-        model: str = "gemini-1.5-flash-latest",
+        model: str = "gemini-2.0-flash-001",
        params: InputParams = InputParams(),
        system_instruction: Optional[str] = None,
        tools: Optional[List[Dict[str, Any]]] = None,