Changing the example to use OpenAI

2026-05-15 15:03:32 +02:00
parent ee5aa4dc71
commit bc769eaa82
4 changed files with 59 additions and 33 deletions
--- a/examples/transports/transports-vonage.py
+++ b/examples/transports/transports-vonage.py
@@ -4,7 +4,7 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #

-"""Example of using AWS Nova Sonic LLM service with Vonage Video Connector transport."""
+"""Example of using OpenAI Realtime voice LLM service with Vonage Video Connector transport."""

 import asyncio
 import os
@@ -17,16 +17,25 @@ from loguru import logger

 from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.frames.frames import LLMRunFrame
+from pipecat.observers.loggers.transcription_log_observer import TranscriptionLogObserver
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
-from pipecat.pipeline.task import PipelineTask
+from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    LLMContextAggregatorPair,
    LLMUserAggregatorParams,
 )
 from pipecat.runner.vonage import configure
-from pipecat.services.aws.nova_sonic.llm import AWSNovaSonicLLMService
+from pipecat.services.openai.realtime.events import (
+    AudioConfiguration,
+    AudioInput,
+    InputAudioNoiseReduction,
+    InputAudioTranscription,
+    SemanticTurnDetection,
+    SessionProperties,
+)
+from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService
 from pipecat.transports.vonage.video_connector import (
    VonageVideoConnectorTransport,
    VonageVideoConnectorTransportParams,
@@ -39,15 +48,9 @@ logger.add(sys.stderr, level="DEBUG")


 async def main() -> None:
-    """Main entry point for the nova sonic vonage video connector example."""
+    """Main entry point for the OpenAI Realtime vonage video connector example."""
    (application_id, session_id, token) = await configure()

-    system_instruction = (
-        "You are a friendly assistant. The user and you will engage in a spoken dialog exchanging "
-        "the transcripts of a natural real-time conversation. Keep your responses short, generally "
-        "two or three sentences for chatty scenarios. "
-        f"{AWSNovaSonicLLMService.AWAIT_TRIGGER_ASSISTANT_RESPONSE_INSTRUCTION}"
-    )
    transport = VonageVideoConnectorTransport(
        application_id,
        session_id,
@@ -59,24 +62,41 @@ async def main() -> None:
        ),
    )

-    llm = AWSNovaSonicLLMService(
-        secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY", ""),
-        access_key_id=os.getenv("AWS_ACCESS_KEY_ID", ""),
-        region=os.getenv("AWS_REGION", ""),
-        session_token=os.getenv("AWS_SESSION_TOKEN", ""),
-        voice_id="tiffany",
+    llm = OpenAIRealtimeLLMService(
+        api_key=os.environ["OPENAI_API_KEY"],
+        settings=OpenAIRealtimeLLMService.Settings(
+            system_instruction="""You are a helpful and friendly AI.
+
+Act like a human, but remember that you aren't a human and that you can't do human
+things in the real world. Your voice and personality should be warm and engaging, with a lively and
+playful tone.
+
+If interacting in a non-English language, start by using the standard accent or dialect familiar to
+the user. Talk quickly.
+
+You are participating in a voice conversation. Keep your responses concise, short, and to the point
+unless specifically asked to elaborate on a topic.
+
+Remember, your responses should be short. Just one or two sentences, usually. Respond in English.""",
+            session_properties=SessionProperties(
+                audio=AudioConfiguration(
+                    input=AudioInput(
+                        transcription=InputAudioTranscription(),
+                        turn_detection=SemanticTurnDetection(),
+                        noise_reduction=InputAudioNoiseReduction(type="near_field"),
+                    )
+                ),
+            ),
+        ),
    )
+
    context = LLMContext(
-        messages=[
-            {"role": "system", "content": f"{system_instruction}"},
-            {
-                "role": "user",
-                "content": "Tell me a fun fact!",
-            },
-        ],
+        [{"role": "developer", "content": "Say hello!"}],
    )
+
    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
-        context, user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer())
+        context,
+        user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
    )

    pipeline = Pipeline(
@@ -89,19 +109,25 @@ async def main() -> None:
        ]
    )

-    task = PipelineTask(pipeline)
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        observers=[TranscriptionLogObserver()],
+    )

-    # Handle client connection event
    event_handler: Callable[[str], Callable[[Any], Any]] = transport.event_handler

    @event_handler("on_client_connected")
    async def on_client_connected(transport: VonageVideoConnectorTransport, client: object) -> None:
-        logger.info(f"Client connected")
+        logger.info("Client connected")
        await task.queue_frames([LLMRunFrame()])

    runner = PipelineRunner()

-    await asyncio.gather(runner.run(task))
+    await runner.run(task)


 if __name__ == "__main__":
--- a/src/pipecat/runner/run.py
+++ b/src/pipecat/runner/run.py
@@ -991,14 +991,14 @@ async def _run_vonage():

    application_id, session_id, token = await configure_vonage()
    runner_args = VonageRunnerArguments(
-        application_id=application_id, session_id=session_id, token=token
+        application_id=application_id, vonage_session_id=session_id, token=token
    )
    runner_args.handle_sigint = True

    # Get the bot module and run it directly
    bot_module = _get_bot_module()

-    print(f"Joining Vonage session: {runner_args.session_id}")
+    print(f"Joining Vonage session: {runner_args.vonage_session_id}")
    print()

    await bot_module.bot(runner_args)
--- a/src/pipecat/runner/types.py
+++ b/src/pipecat/runner/types.py
@@ -105,12 +105,12 @@ class VonageRunnerArguments(RunnerArguments):

    Parameters:
        application_id: Vonage application ID
-        session_id: Vonage session ID
+        vonage_session_id: Vonage session ID
        token: Vonage Session Token
    """

    application_id: str
-    session_id: str
+    vonage_session_id: str
    token: str


--- a/src/pipecat/runner/utils.py
+++ b/src/pipecat/runner/utils.py
@@ -640,7 +640,7 @@ async def create_transport(

        return VonageVideoConnectorTransport(
            runner_args.application_id,
-            runner_args.session_id,
+            runner_args.vonage_session_id,
            runner_args.token,
            params=params,
        )