From bc769eaa82c42b6f177b744910afab5ef82004a9 Mon Sep 17 00:00:00 2001
From: asilvestre <antoni.silvestrepadros@vonage.com>
Date: Fri, 15 May 2026 15:03:32 +0200
Subject: [PATCH] Changing the example to use OpenAI

---
 examples/transports/transports-vonage.py | 82 ++++++++++++++++--------
 src/pipecat/runner/run.py                |  4 +-
 src/pipecat/runner/types.py              |  4 +-
 src/pipecat/runner/utils.py              |  2 +-
 4 files changed, 59 insertions(+), 33 deletions(-)

diff --git a/examples/transports/transports-vonage.py b/examples/transports/transports-vonage.py
index 774ca1696..cf85d4a9a 100644
--- a/examples/transports/transports-vonage.py
+++ b/examples/transports/transports-vonage.py
@@ -4,7 +4,7 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
 
-"""Example of using AWS Nova Sonic LLM service with Vonage Video Connector transport."""
+"""Example of using OpenAI Realtime voice LLM service with Vonage Video Connector transport."""
 
 import asyncio
 import os
@@ -17,16 +17,25 @@ from loguru import logger
 
 from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.frames.frames import LLMRunFrame
+from pipecat.observers.loggers.transcription_log_observer import TranscriptionLogObserver
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
-from pipecat.pipeline.task import PipelineTask
+from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
     LLMContextAggregatorPair,
     LLMUserAggregatorParams,
 )
 from pipecat.runner.vonage import configure
-from pipecat.services.aws.nova_sonic.llm import AWSNovaSonicLLMService
+from pipecat.services.openai.realtime.events import (
+    AudioConfiguration,
+    AudioInput,
+    InputAudioNoiseReduction,
+    InputAudioTranscription,
+    SemanticTurnDetection,
+    SessionProperties,
+)
+from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService
 from pipecat.transports.vonage.video_connector import (
     VonageVideoConnectorTransport,
     VonageVideoConnectorTransportParams,
@@ -39,15 +48,9 @@ logger.add(sys.stderr, level="DEBUG")
 
 
 async def main() -> None:
-    """Main entry point for the nova sonic vonage video connector example."""
+    """Main entry point for the OpenAI Realtime vonage video connector example."""
     (application_id, session_id, token) = await configure()
 
-    system_instruction = (
-        "You are a friendly assistant. The user and you will engage in a spoken dialog exchanging "
-        "the transcripts of a natural real-time conversation. Keep your responses short, generally "
-        "two or three sentences for chatty scenarios. "
-        f"{AWSNovaSonicLLMService.AWAIT_TRIGGER_ASSISTANT_RESPONSE_INSTRUCTION}"
-    )
     transport = VonageVideoConnectorTransport(
         application_id,
         session_id,
@@ -59,24 +62,41 @@ async def main() -> None:
         ),
     )
 
-    llm = AWSNovaSonicLLMService(
-        secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY", ""),
-        access_key_id=os.getenv("AWS_ACCESS_KEY_ID", ""),
-        region=os.getenv("AWS_REGION", ""),
-        session_token=os.getenv("AWS_SESSION_TOKEN", ""),
-        voice_id="tiffany",
+    llm = OpenAIRealtimeLLMService(
+        api_key=os.environ["OPENAI_API_KEY"],
+        settings=OpenAIRealtimeLLMService.Settings(
+            system_instruction="""You are a helpful and friendly AI.
+
+Act like a human, but remember that you aren't a human and that you can't do human
+things in the real world. Your voice and personality should be warm and engaging, with a lively and
+playful tone.
+
+If interacting in a non-English language, start by using the standard accent or dialect familiar to
+the user. Talk quickly.
+
+You are participating in a voice conversation. Keep your responses concise, short, and to the point
+unless specifically asked to elaborate on a topic.
+
+Remember, your responses should be short. Just one or two sentences, usually. Respond in English.""",
+            session_properties=SessionProperties(
+                audio=AudioConfiguration(
+                    input=AudioInput(
+                        transcription=InputAudioTranscription(),
+                        turn_detection=SemanticTurnDetection(),
+                        noise_reduction=InputAudioNoiseReduction(type="near_field"),
+                    )
+                ),
+            ),
+        ),
     )
+
     context = LLMContext(
-        messages=[
-            {"role": "system", "content": f"{system_instruction}"},
-            {
-                "role": "user",
-                "content": "Tell me a fun fact!",
-            },
-        ],
+        [{"role": "developer", "content": "Say hello!"}],
     )
+
     user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
-        context, user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer())
+        context,
+        user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
     )
 
     pipeline = Pipeline(
@@ -89,19 +109,25 @@ async def main() -> None:
         ]
     )
 
-    task = PipelineTask(pipeline)
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        observers=[TranscriptionLogObserver()],
+    )
 
-    # Handle client connection event
     event_handler: Callable[[str], Callable[[Any], Any]] = transport.event_handler
 
     @event_handler("on_client_connected")
     async def on_client_connected(transport: VonageVideoConnectorTransport, client: object) -> None:
-        logger.info(f"Client connected")
+        logger.info("Client connected")
         await task.queue_frames([LLMRunFrame()])
 
     runner = PipelineRunner()
 
-    await asyncio.gather(runner.run(task))
+    await runner.run(task)
 
 
 if __name__ == "__main__":
diff --git a/src/pipecat/runner/run.py b/src/pipecat/runner/run.py
index 872dca5af..f654f110d 100644
--- a/src/pipecat/runner/run.py
+++ b/src/pipecat/runner/run.py
@@ -991,14 +991,14 @@ async def _run_vonage():
 
     application_id, session_id, token = await configure_vonage()
     runner_args = VonageRunnerArguments(
-        application_id=application_id, session_id=session_id, token=token
+        application_id=application_id, vonage_session_id=session_id, token=token
     )
     runner_args.handle_sigint = True
 
     # Get the bot module and run it directly
     bot_module = _get_bot_module()
 
-    print(f"Joining Vonage session: {runner_args.session_id}")
+    print(f"Joining Vonage session: {runner_args.vonage_session_id}")
     print()
 
     await bot_module.bot(runner_args)
diff --git a/src/pipecat/runner/types.py b/src/pipecat/runner/types.py
index bd39d71c6..ebee43842 100644
--- a/src/pipecat/runner/types.py
+++ b/src/pipecat/runner/types.py
@@ -105,12 +105,12 @@ class VonageRunnerArguments(RunnerArguments):
 
     Parameters:
         application_id: Vonage application ID
-        session_id: Vonage session ID
+        vonage_session_id: Vonage session ID
         token: Vonage Session Token
     """
 
     application_id: str
-    session_id: str
+    vonage_session_id: str
     token: str
 
 
diff --git a/src/pipecat/runner/utils.py b/src/pipecat/runner/utils.py
index 34b66f63a..a9267b68b 100644
--- a/src/pipecat/runner/utils.py
+++ b/src/pipecat/runner/utils.py
@@ -640,7 +640,7 @@ async def create_transport(
 
         return VonageVideoConnectorTransport(
             runner_args.application_id,
-            runner_args.session_id,
+            runner_args.vonage_session_id,
             runner_args.token,
             params=params,
         )