From bc769eaa82c42b6f177b744910afab5ef82004a9 Mon Sep 17 00:00:00 2001 From: asilvestre Date: Fri, 15 May 2026 15:03:32 +0200 Subject: [PATCH] Changing the example to use OpenAI --- examples/transports/transports-vonage.py | 82 ++++++++++++++++-------- src/pipecat/runner/run.py | 4 +- src/pipecat/runner/types.py | 4 +- src/pipecat/runner/utils.py | 2 +- 4 files changed, 59 insertions(+), 33 deletions(-) diff --git a/examples/transports/transports-vonage.py b/examples/transports/transports-vonage.py index 774ca1696..cf85d4a9a 100644 --- a/examples/transports/transports-vonage.py +++ b/examples/transports/transports-vonage.py @@ -4,7 +4,7 @@ # SPDX-License-Identifier: BSD 2-Clause License # -"""Example of using AWS Nova Sonic LLM service with Vonage Video Connector transport.""" +"""Example of using OpenAI Realtime voice LLM service with Vonage Video Connector transport.""" import asyncio import os @@ -17,16 +17,25 @@ from loguru import logger from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.frames.frames import LLMRunFrame +from pipecat.observers.loggers.transcription_log_observer import TranscriptionLogObserver from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner -from pipecat.pipeline.task import PipelineTask +from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext from pipecat.processors.aggregators.llm_response_universal import ( LLMContextAggregatorPair, LLMUserAggregatorParams, ) from pipecat.runner.vonage import configure -from pipecat.services.aws.nova_sonic.llm import AWSNovaSonicLLMService +from pipecat.services.openai.realtime.events import ( + AudioConfiguration, + AudioInput, + InputAudioNoiseReduction, + InputAudioTranscription, + SemanticTurnDetection, + SessionProperties, +) +from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService from pipecat.transports.vonage.video_connector import ( VonageVideoConnectorTransport, VonageVideoConnectorTransportParams, @@ -39,15 +48,9 @@ logger.add(sys.stderr, level="DEBUG") async def main() -> None: - """Main entry point for the nova sonic vonage video connector example.""" + """Main entry point for the OpenAI Realtime vonage video connector example.""" (application_id, session_id, token) = await configure() - system_instruction = ( - "You are a friendly assistant. The user and you will engage in a spoken dialog exchanging " - "the transcripts of a natural real-time conversation. Keep your responses short, generally " - "two or three sentences for chatty scenarios. " - f"{AWSNovaSonicLLMService.AWAIT_TRIGGER_ASSISTANT_RESPONSE_INSTRUCTION}" - ) transport = VonageVideoConnectorTransport( application_id, session_id, @@ -59,24 +62,41 @@ async def main() -> None: ), ) - llm = AWSNovaSonicLLMService( - secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY", ""), - access_key_id=os.getenv("AWS_ACCESS_KEY_ID", ""), - region=os.getenv("AWS_REGION", ""), - session_token=os.getenv("AWS_SESSION_TOKEN", ""), - voice_id="tiffany", + llm = OpenAIRealtimeLLMService( + api_key=os.environ["OPENAI_API_KEY"], + settings=OpenAIRealtimeLLMService.Settings( + system_instruction="""You are a helpful and friendly AI. + +Act like a human, but remember that you aren't a human and that you can't do human +things in the real world. Your voice and personality should be warm and engaging, with a lively and +playful tone. + +If interacting in a non-English language, start by using the standard accent or dialect familiar to +the user. Talk quickly. + +You are participating in a voice conversation. Keep your responses concise, short, and to the point +unless specifically asked to elaborate on a topic. + +Remember, your responses should be short. Just one or two sentences, usually. Respond in English.""", + session_properties=SessionProperties( + audio=AudioConfiguration( + input=AudioInput( + transcription=InputAudioTranscription(), + turn_detection=SemanticTurnDetection(), + noise_reduction=InputAudioNoiseReduction(type="near_field"), + ) + ), + ), + ), ) + context = LLMContext( - messages=[ - {"role": "system", "content": f"{system_instruction}"}, - { - "role": "user", - "content": "Tell me a fun fact!", - }, - ], + [{"role": "developer", "content": "Say hello!"}], ) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( - context, user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()) + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), ) pipeline = Pipeline( @@ -89,19 +109,25 @@ async def main() -> None: ] ) - task = PipelineTask(pipeline) + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + observers=[TranscriptionLogObserver()], + ) - # Handle client connection event event_handler: Callable[[str], Callable[[Any], Any]] = transport.event_handler @event_handler("on_client_connected") async def on_client_connected(transport: VonageVideoConnectorTransport, client: object) -> None: - logger.info(f"Client connected") + logger.info("Client connected") await task.queue_frames([LLMRunFrame()]) runner = PipelineRunner() - await asyncio.gather(runner.run(task)) + await runner.run(task) if __name__ == "__main__": diff --git a/src/pipecat/runner/run.py b/src/pipecat/runner/run.py index 872dca5af..f654f110d 100644 --- a/src/pipecat/runner/run.py +++ b/src/pipecat/runner/run.py @@ -991,14 +991,14 @@ async def _run_vonage(): application_id, session_id, token = await configure_vonage() runner_args = VonageRunnerArguments( - application_id=application_id, session_id=session_id, token=token + application_id=application_id, vonage_session_id=session_id, token=token ) runner_args.handle_sigint = True # Get the bot module and run it directly bot_module = _get_bot_module() - print(f"Joining Vonage session: {runner_args.session_id}") + print(f"Joining Vonage session: {runner_args.vonage_session_id}") print() await bot_module.bot(runner_args) diff --git a/src/pipecat/runner/types.py b/src/pipecat/runner/types.py index bd39d71c6..ebee43842 100644 --- a/src/pipecat/runner/types.py +++ b/src/pipecat/runner/types.py @@ -105,12 +105,12 @@ class VonageRunnerArguments(RunnerArguments): Parameters: application_id: Vonage application ID - session_id: Vonage session ID + vonage_session_id: Vonage session ID token: Vonage Session Token """ application_id: str - session_id: str + vonage_session_id: str token: str diff --git a/src/pipecat/runner/utils.py b/src/pipecat/runner/utils.py index 34b66f63a..a9267b68b 100644 --- a/src/pipecat/runner/utils.py +++ b/src/pipecat/runner/utils.py @@ -640,7 +640,7 @@ async def create_transport( return VonageVideoConnectorTransport( runner_args.application_id, - runner_args.session_id, + runner_args.vonage_session_id, runner_args.token, params=params, )