diff --git a/examples/foundational/26d-gemini-live-text.py b/examples/foundational/26d-gemini-live-text.py deleted file mode 100644 index 05b3bdce9..000000000 --- a/examples/foundational/26d-gemini-live-text.py +++ /dev/null @@ -1,142 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - - -import os - -from dotenv import load_dotenv -from loguru import logger - -from pipecat.frames.frames import LLMRunFrame -from pipecat.pipeline.pipeline import Pipeline -from pipecat.pipeline.runner import PipelineRunner -from pipecat.pipeline.task import PipelineParams, PipelineTask -from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair -from pipecat.runner.types import RunnerArguments -from pipecat.runner.utils import create_transport -from pipecat.services.cartesia.tts import CartesiaTTSService -from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService, GeminiModalities -from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.daily.transport import DailyParams -from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams - -load_dotenv(override=True) - - -SYSTEM_INSTRUCTION = f""" -"You are Gemini Chatbot, a friendly, helpful robot. - -Your goal is to demonstrate your capabilities in a succinct way. - -Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. - -Respond to what the user said in a creative and helpful way. Keep your responses brief. One or two sentences at most. -""" - - -# We use lambdas to defer transport parameter creation until the transport -# type is selected at runtime. -transport_params = { - "daily": lambda: DailyParams( - audio_in_enabled=True, - audio_out_enabled=True, - ), - "twilio": lambda: FastAPIWebsocketParams( - audio_in_enabled=True, - audio_out_enabled=True, - ), - "webrtc": lambda: TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - ), -} - - -async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): - logger.info(f"Starting bot") - - # KNOWN ISSUE: If using GeminiLiveVertexLLMService, you cannot specify a - # modality other than AUDIO (at least not if using the service's default - # model, which is a native audio model: - # https://cloud.google.com/vertex-ai/generative-ai/docs/live-api/tools#native-audio). - llm = GeminiLiveLLMService( - api_key=os.getenv("GOOGLE_API_KEY"), - settings=GeminiLiveLLMService.Settings( - system_instruction=SYSTEM_INSTRUCTION, - modalities=GeminiModalities.TEXT, - ), - tools=[{"google_search": {}}, {"code_execution": {}}], - ) - - # Optionally, you can set the response modalities via a function - # llm.set_model_modalities( - # GeminiMultimodalModalities.TEXT - # ) - - tts = CartesiaTTSService( - api_key=os.getenv("CARTESIA_API_KEY"), voice_id="71a7ad14-091c-4e8e-a314-022ece01c121" - ) - - messages = [ - { - "role": "developer", - "content": 'Start by saying "Hello, I\'m Gemini".', - }, - ] - - # Set up conversation context and management - # The context_aggregator will automatically collect conversation context - context = LLMContext(messages) - # Server-side VAD is enabled by default; no local VAD is added. - user_aggregator, assistant_aggregator = LLMContextAggregatorPair(context) - - pipeline = Pipeline( - [ - transport.input(), - user_aggregator, - llm, - tts, - transport.output(), - assistant_aggregator, - ] - ) - - task = PipelineTask( - pipeline, - params=PipelineParams( - enable_metrics=True, - enable_usage_metrics=True, - ), - idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, - ) - - @transport.event_handler("on_client_connected") - async def on_client_connected(transport, client): - logger.info(f"Client connected") - # Kick off the conversation. - await task.queue_frames([LLMRunFrame()]) - - @transport.event_handler("on_client_disconnected") - async def on_client_disconnected(transport, client): - logger.info(f"Client disconnected") - await task.cancel() - - runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) - - await runner.run(task) - - -async def bot(runner_args: RunnerArguments): - """Main bot entry point compatible with Pipecat Cloud.""" - transport = await create_transport(runner_args, transport_params) - await run_bot(transport, runner_args) - - -if __name__ == "__main__": - from pipecat.runner.run import main - - main() diff --git a/src/pipecat/services/google/gemini_live/llm.py b/src/pipecat/services/google/gemini_live/llm.py index f58972ea4..544efdc77 100644 --- a/src/pipecat/services/google/gemini_live/llm.py +++ b/src/pipecat/services/google/gemini_live/llm.py @@ -787,6 +787,13 @@ class GeminiLiveLLMService(LLMService): if settings is not None: default_settings.apply_update(settings) + # Warn if user requested TEXT modality + if default_settings.modalities == GeminiModalities.TEXT: + logger.warning( + f"Modality {default_settings.modalities.value!r} may not be supported by recent " + "Gemini Live models." + ) + super().__init__( base_url=base_url, settings=default_settings, @@ -920,6 +927,10 @@ class GeminiLiveLLMService(LLMService): Args: modalities: The modalities to use for responses. """ + if modalities == GeminiModalities.TEXT: + logger.warning( + f"Modality {modalities.value!r} may not be supported by recent Gemini Live models." + ) self._settings.modalities = modalities def set_language(self, language: Language):