diff --git a/CHANGELOG.md b/CHANGELOG.md index 2cda9f663..1f4405944 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 and should be set manually from the serializer constructor if a different value is needed. +### Changed + +- Use `gemini-2.0-flash-001` as the default model for `GoogleLLMSerivce`. + ## [0.0.55] - 2025-02-05 ### Added diff --git a/examples/foundational/07s-interruptible-google-audio-in.py b/examples/foundational/07s-interruptible-google-audio-in.py index 57e1a5c98..d4c9bc9d7 100644 --- a/examples/foundational/07s-interruptible-google-audio-in.py +++ b/examples/foundational/07s-interruptible-google-audio-in.py @@ -216,11 +216,7 @@ async def main(): voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady ) - llm = GoogleLLMService( - model="gemini-1.5-flash-latest", - # model="gemini-exp-1114", - api_key=os.getenv("GOOGLE_API_KEY"), - ) + llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"), model="gemini-2.0-flash-001") messages = [ { diff --git a/examples/foundational/12a-describe-video-gemini-flash.py b/examples/foundational/12a-describe-video-gemini-flash.py index c166fa376..05debe44b 100644 --- a/examples/foundational/12a-describe-video-gemini-flash.py +++ b/examples/foundational/12a-describe-video-gemini-flash.py @@ -72,9 +72,7 @@ async def main(): vision_aggregator = VisionImageFrameAggregator() - google = GoogleLLMService( - model="gemini-1.5-flash-latest", api_key=os.getenv("GOOGLE_API_KEY") - ) + google = GoogleLLMService(model="gemini-2.0-flash-001", api_key=os.getenv("GOOGLE_API_KEY")) tts = CartesiaTTSService( api_key=os.getenv("CARTESIA_API_KEY"), diff --git a/examples/foundational/14e-function-calling-gemini.py b/examples/foundational/14e-function-calling-gemini.py index 28297d5ba..a05ad7f75 100644 --- a/examples/foundational/14e-function-calling-gemini.py +++ b/examples/foundational/14e-function-calling-gemini.py @@ -62,11 +62,7 @@ async def main(): voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady ) - llm = GoogleLLMService( - model="gemini-1.5-flash-latest", - # model="gemini-exp-1114", - api_key=os.getenv("GOOGLE_API_KEY"), - ) + llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"), model="gemini-2.0-flash-001") llm.register_function("get_weather", get_weather) llm.register_function("get_image", get_image) diff --git a/examples/foundational/20d-persistent-context-gemini.py b/examples/foundational/20d-persistent-context-gemini.py index 2841d23d0..1fb9d7b21 100644 --- a/examples/foundational/20d-persistent-context-gemini.py +++ b/examples/foundational/20d-persistent-context-gemini.py @@ -237,7 +237,7 @@ async def main(): voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady ) - llm = GoogleLLMService(model="gemini-1.5-flash-latest", api_key=os.getenv("GOOGLE_API_KEY")) + llm = GoogleLLMService(model="gemini-2.0-flash-001", api_key=os.getenv("GOOGLE_API_KEY")) # you can either register a single function for all function calls, or specific functions # llm.register_function(None, fetch_weather_from_api) diff --git a/examples/foundational/22d-natural-conversation-gemini-audio.py b/examples/foundational/22d-natural-conversation-gemini-audio.py index ffd7ca77c..6829f8370 100644 --- a/examples/foundational/22d-natural-conversation-gemini-audio.py +++ b/examples/foundational/22d-natural-conversation-gemini-audio.py @@ -55,13 +55,9 @@ load_dotenv(override=True) logger.remove(0) logger.add(sys.stderr, level="DEBUG") -# TRANSCRIBER_MODEL = "gemini-1.5-flash-latest" -# CLASSIFIER_MODEL = "gemini-1.5-flash-latest" -# CONVERSATION_MODEL = "gemini-1.5-flash-latest" - -TRANSCRIBER_MODEL = "gemini-2.0-flash-exp" -CLASSIFIER_MODEL = "gemini-2.0-flash-exp" -CONVERSATION_MODEL = "gemini-2.0-flash-exp" +TRANSCRIBER_MODEL = "gemini-2.0-flash-001" +CLASSIFIER_MODEL = "gemini-2.0-flash-001" +CONVERSATION_MODEL = "gemini-2.0-flash-001" transcriber_system_instruction = """You are an audio transcriber. You are receiving audio from a user. Your job is to transcribe the input audio to text exactly as it was said by the user. diff --git a/examples/foundational/25-google-audio-in.py b/examples/foundational/25-google-audio-in.py index 1d90294cd..a6108be09 100644 --- a/examples/foundational/25-google-audio-in.py +++ b/examples/foundational/25-google-audio-in.py @@ -292,7 +292,7 @@ async def main(): conversation_llm = GoogleLLMService( name="Conversation", - model="gemini-1.5-flash-latest", + model="gemini-2.0-flash-001", # model="gemini-exp-1121", api_key=os.getenv("GOOGLE_API_KEY"), # we can give the GoogleLLMService a system instruction to use directly @@ -303,7 +303,7 @@ async def main(): input_transcription_llm = GoogleLLMService( name="Transcription", - model="gemini-1.5-flash-latest", + model="gemini-2.0-flash-001", # model="gemini-exp-1121", api_key=os.getenv("GOOGLE_API_KEY"), system_instruction=transcriber_system_message, diff --git a/src/pipecat/services/google/google.py b/src/pipecat/services/google/google.py index 971bf61c0..1b0665d4f 100644 --- a/src/pipecat/services/google/google.py +++ b/src/pipecat/services/google/google.py @@ -639,7 +639,7 @@ class GoogleLLMService(LLMService): self, *, api_key: str, - model: str = "gemini-1.5-flash-latest", + model: str = "gemini-2.0-flash-001", params: InputParams = InputParams(), system_instruction: Optional[str] = None, tools: Optional[List[Dict[str, Any]]] = None,