diff --git a/CHANGELOG.md b/CHANGELOG.md index 538b01f1e..f2280db9b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -89,6 +89,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- ⚠️ Updated `SmallWebRTCTransport` to align with how other transports handle + `on_client_disconnected`. Now, when the connection is closed and no reconnection + is attempted, `on_client_disconnected` is called instead of `on_client_close`. The + `on_client_close` callback is no longer used, use `on_client_disconnected` instead. + +- Check if `PipelineTask` has already been cancelled. + +- Don't raise an exception if event handler is not registered. + - Upgraded `deepgram-sdk` to 4.1.0. - Updated `GoogleTTSService` to use Google's streaming TTS API. The default @@ -147,6 +156,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Fixed a `DailyTransport` issue that was not allow capturing video frames if + framerate was greater than zero. + - Fixed a `DeegramSTTService` connection issue when the user provided their own `LiveOptions`. @@ -173,6 +185,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Other +- It is now possible to run all (or most) foundational example with multiple + transports. By default, they run with P2P (Peer-To-Peer) WebRTC so you can try + everything locally. You can also run them with Daily or even with a Twilio + phone number. + - Added foundation examples `07y-interruptible-minimax.py` and `07z-interruptible-sarvam.py`to show how to use the `MiniMaxHttpTTSService` and `SarvamTTSService`, respectively. diff --git a/examples/foundational/01-say-one-thing-piper.py b/examples/foundational/01-say-one-thing-piper.py index 2c6d6eebb..ad5ed2278 100644 --- a/examples/foundational/01-say-one-thing-piper.py +++ b/examples/foundational/01-say-one-thing-piper.py @@ -16,23 +16,25 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineTask from pipecat.services.piper.tts import PiperTTSService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams(audio_out_enabled=True), + "twilio": lambda: FastAPIWebsocketParams(audio_out_enabled=True), + "webrtc": lambda: TransportParams(audio_out_enabled=True), +} - # Create a transport using the WebRTC connection - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_out_enabled=True, - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") # Create an HTTP session async with aiohttp.ClientSession() as session: @@ -47,7 +49,7 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac async def on_client_connected(transport, client): await task.queue_frames([TTSSpeakFrame(f"Hello there!"), EndFrame()]) - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -55,4 +57,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/01-say-one-thing-rime.py b/examples/foundational/01-say-one-thing-rime.py index 46efbb3cd..6d09a20d9 100644 --- a/examples/foundational/01-say-one-thing-rime.py +++ b/examples/foundational/01-say-one-thing-rime.py @@ -16,24 +16,25 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineTask from pipecat.services.rime.tts import RimeHttpTTSService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams(audio_out_enabled=True), + "twilio": lambda: FastAPIWebsocketParams(audio_out_enabled=True), + "webrtc": lambda: TransportParams(audio_out_enabled=True), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - # Create a transport using the WebRTC connection - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_out_enabled=True, - ), - ) - # Create an HTTP session async with aiohttp.ClientSession() as session: tts = RimeHttpTTSService( @@ -49,7 +50,7 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac async def on_client_connected(transport, client): await task.queue_frames([TTSSpeakFrame(f"Hello there!"), EndFrame()]) - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -57,4 +58,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/01-say-one-thing.py b/examples/foundational/01-say-one-thing.py index fbbf23b6c..be501dec7 100644 --- a/examples/foundational/01-say-one-thing.py +++ b/examples/foundational/01-say-one-thing.py @@ -15,23 +15,25 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineTask from pipecat.services.cartesia.tts import CartesiaTTSService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams(audio_out_enabled=True), + "twilio": lambda: FastAPIWebsocketParams(audio_out_enabled=True), + "webrtc": lambda: TransportParams(audio_out_enabled=True), +} - # Create a transport using the WebRTC connection - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_out_enabled=True, - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") tts = CartesiaTTSService( api_key=os.getenv("CARTESIA_API_KEY"), @@ -45,7 +47,7 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac async def on_client_connected(transport, client): await task.queue_frames([TTSSpeakFrame(f"Hello there!"), EndFrame()]) - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -53,4 +55,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/01c-fastpitch.py b/examples/foundational/01c-fastpitch.py index effed6f01..62e4ea1ec 100644 --- a/examples/foundational/01c-fastpitch.py +++ b/examples/foundational/01c-fastpitch.py @@ -15,23 +15,25 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineTask from pipecat.services.riva.tts import FastPitchTTSService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams(audio_out_enabled=True), + "twilio": lambda: FastAPIWebsocketParams(audio_out_enabled=True), + "webrtc": lambda: TransportParams(audio_out_enabled=True), +} - # Create a transport using the WebRTC connection - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_out_enabled=True, - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") tts = FastPitchTTSService(api_key=os.getenv("NVIDIA_API_KEY")) @@ -42,7 +44,7 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac async def on_client_connected(transport, client): await task.queue_frames([TTSSpeakFrame(f"Hello there!"), EndFrame()]) - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -50,4 +52,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/02-llm-say-one-thing.py b/examples/foundational/02-llm-say-one-thing.py index 3162ffef4..0e4675fba 100644 --- a/examples/foundational/02-llm-say-one-thing.py +++ b/examples/foundational/02-llm-say-one-thing.py @@ -16,23 +16,25 @@ from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineTask from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams(audio_out_enabled=True), + "twilio": lambda: FastAPIWebsocketParams(audio_out_enabled=True), + "webrtc": lambda: TransportParams(audio_out_enabled=True), +} - # Create a transport using the WebRTC connection - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_out_enabled=True, - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") tts = CartesiaTTSService( api_key=os.getenv("CARTESIA_API_KEY"), @@ -55,7 +57,7 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac async def on_client_connected(transport, client): await task.queue_frames([LLMMessagesFrame(messages), EndFrame()]) - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -63,4 +65,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/03-still-frame.py b/examples/foundational/03-still-frame.py index 3b7ef84e3..34c5da3b0 100644 --- a/examples/foundational/03-still-frame.py +++ b/examples/foundational/03-still-frame.py @@ -16,25 +16,31 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineTask from pipecat.services.fal.image import FalImageGenService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + video_out_enabled=True, + video_out_width=1024, + video_out_height=1024, + ), + "webrtc": lambda: TransportParams( + video_out_enabled=True, + video_out_width=1024, + video_out_height=1024, + ), +} - # Create a transport using the WebRTC connection - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - video_out_enabled=True, - video_out_width=1024, - video_out_height=1024, - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") # Create an HTTP session async with aiohttp.ClientSession() as session: @@ -54,13 +60,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -68,4 +70,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/03b-still-frame-imagen.py b/examples/foundational/03b-still-frame-imagen.py index 783fd9be8..7042bd078 100644 --- a/examples/foundational/03b-still-frame-imagen.py +++ b/examples/foundational/03b-still-frame-imagen.py @@ -15,25 +15,31 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.services.google.image import GoogleImageGenService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + video_out_enabled=True, + video_out_width=1024, + video_out_height=1024, + ), + "webrtc": lambda: TransportParams( + video_out_enabled=True, + video_out_width=1024, + video_out_height=1024, + ), +} - # Create a transport using the WebRTC connection - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - video_out_enabled=True, - video_out_width=1024, - video_out_height=1024, - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") imagegen = GoogleImageGenService( api_key=os.getenv("GOOGLE_API_KEY"), @@ -54,13 +60,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -68,4 +70,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/04-transports-small-webrtc.py b/examples/foundational/04-transports-small-webrtc.py index 29ac495ea..c70b566d4 100644 --- a/examples/foundational/04-transports-small-webrtc.py +++ b/examples/foundational/04-transports-small-webrtc.py @@ -5,10 +5,17 @@ # import argparse +import asyncio import os +from contextlib import asynccontextmanager +from typing import Dict +import uvicorn from dotenv import load_dotenv +from fastapi import BackgroundTasks, FastAPI +from fastapi.responses import RedirectResponse from loguru import logger +from pipecat_ai_small_webrtc_prebuilt.frontend import SmallWebRTCPrebuiltUI from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.pipeline.pipeline import Pipeline @@ -20,14 +27,29 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import TransportParams from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.network.webrtc_connection import IceServer, SmallWebRTCConnection load_dotenv(override=True) +app = FastAPI() -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): +# Store connections by pc_id +pcs_map: Dict[str, SmallWebRTCConnection] = {} + +ice_servers = [ + IceServer( + urls="stun:stun.l.google.com:19302", + ) +] + +# Mount the frontend at / +app.mount("/client", SmallWebRTCPrebuiltUI) + + +async def run_example(webrtc_connection: SmallWebRTCConnection): logger.info(f"Starting bot") + # Create a transport using the WebRTC connection transport = SmallWebRTCTransport( webrtc_connection=webrtc_connection, params=TransportParams( @@ -88,10 +110,6 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() runner = PipelineRunner(handle_sigint=False) @@ -99,7 +117,58 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac await runner.run(task) -if __name__ == "__main__": - from run import main +@app.get("/", include_in_schema=False) +async def root_redirect(): + return RedirectResponse(url="/client/") - main() + +@app.post("/api/offer") +async def offer(request: dict, background_tasks: BackgroundTasks): + pc_id = request.get("pc_id") + + if pc_id and pc_id in pcs_map: + pipecat_connection = pcs_map[pc_id] + logger.info(f"Reusing existing connection for pc_id: {pc_id}") + await pipecat_connection.renegotiate( + sdp=request["sdp"], + type=request["type"], + restart_pc=request.get("restart_pc", False), + ) + else: + pipecat_connection = SmallWebRTCConnection(ice_servers) + await pipecat_connection.initialize(sdp=request["sdp"], type=request["type"]) + + @pipecat_connection.event_handler("closed") + async def handle_disconnected(webrtc_connection: SmallWebRTCConnection): + logger.info(f"Discarding peer connection for pc_id: {webrtc_connection.pc_id}") + pcs_map.pop(webrtc_connection.pc_id, None) + + # Run example function with SmallWebRTC transport arguments. + background_tasks.add_task(run_example, pipecat_connection) + + answer = pipecat_connection.get_answer() + # Updating the peer connection inside the map + pcs_map[answer["pc_id"]] = pipecat_connection + + return answer + + +@asynccontextmanager +async def lifespan(app: FastAPI): + yield # Run app + coros = [pc.close() for pc in pcs_map.values()] + await asyncio.gather(*coros) + pcs_map.clear() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Pipecat Bot Runner") + parser.add_argument( + "--host", default="localhost", help="Host for HTTP server (default: localhost)" + ) + parser.add_argument( + "--port", type=int, default=7860, help="Port for HTTP server (default: 7860)" + ) + args = parser.parse_args() + + uvicorn.run(app, host=args.host, port=args.port) diff --git a/examples/foundational/04b-transports-livekit.py b/examples/foundational/04b-transports-livekit.py index ef2679ab8..3cd5c7eea 100644 --- a/examples/foundational/04b-transports-livekit.py +++ b/examples/foundational/04b-transports-livekit.py @@ -10,7 +10,6 @@ import json import os import sys -import aiohttp from deepgram import LiveOptions from dotenv import load_dotenv from livekit import api @@ -104,101 +103,100 @@ async def configure_livekit(): async def main(): - async with aiohttp.ClientSession() as session: - (url, token, room_name) = await configure_livekit() + (url, token, room_name) = await configure_livekit() - transport = LiveKitTransport( - url=url, - token=token, - room_name=room_name, - params=LiveKitParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + transport = LiveKitTransport( + url=url, + token=token, + room_name=room_name, + params=LiveKitParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + ) - stt = DeepgramSTTService( - api_key=os.getenv("DEEPGRAM_API_KEY"), - live_options=LiveOptions( - vad_events=True, - ), - ) + stt = DeepgramSTTService( + api_key=os.getenv("DEEPGRAM_API_KEY"), + live_options=LiveOptions( + vad_events=True, + ), + ) - llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) - tts = CartesiaTTSService( - api_key=os.getenv("CARTESIA_API_KEY"), - voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady - ) + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) - messages = [ - { - "role": "system", - "content": "You are a helpful LLM in a WebRTC call. " - "Your goal is to demonstrate your capabilities in a succinct way. " - "Your output will be converted to audio so don't include special characters in your answers. " - "Respond to what the user said in a creative and helpful way.", - }, - ] + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. " + "Your goal is to demonstrate your capabilities in a succinct way. " + "Your output will be converted to audio so don't include special characters in your answers. " + "Respond to what the user said in a creative and helpful way.", + }, + ] - context = OpenAILLMContext(messages) - context_aggregator = llm.create_context_aggregator(context) + context = OpenAILLMContext(messages) + context_aggregator = llm.create_context_aggregator(context) - runner = PipelineRunner() + runner = PipelineRunner() - task = PipelineTask( - Pipeline( - [ - transport.input(), - stt, - context_aggregator.user(), - llm, - tts, - transport.output(), - context_aggregator.assistant(), - ], - ), - params=PipelineParams( - allow_interruptions=True, enable_metrics=True, enable_usage_metrics=True - ), - ) + task = PipelineTask( + Pipeline( + [ + transport.input(), + stt, + context_aggregator.user(), + llm, + tts, + transport.output(), + context_aggregator.assistant(), + ], + ), + params=PipelineParams( + allow_interruptions=True, enable_metrics=True, enable_usage_metrics=True + ), + ) - # Register an event handler so we can play the audio when the - # participant joins. - @transport.event_handler("on_first_participant_joined") - async def on_first_participant_joined(transport, participant_id): - await asyncio.sleep(1) - await task.queue_frame( - TextFrame( - "Hello there! How are you doing today? Would you like to talk about the weather?" - ) + # Register an event handler so we can play the audio when the + # participant joins. + @transport.event_handler("on_first_participant_joined") + async def on_first_participant_joined(transport, participant_id): + await asyncio.sleep(1) + await task.queue_frame( + TextFrame( + "Hello there! How are you doing today? Would you like to talk about the weather?" ) + ) - # Register an event handler to receive data from the participant via text chat - # in the LiveKit room. This will be used to as transcription frames and - # interrupt the bot and pass it to llm for processing and - # then pass back to the participant as audio output. - @transport.event_handler("on_data_received") - async def on_data_received(transport, data, participant_id): - logger.info(f"Received data from participant {participant_id}: {data}") - # convert data from bytes to string - json_data = json.loads(data) + # Register an event handler to receive data from the participant via text chat + # in the LiveKit room. This will be used to as transcription frames and + # interrupt the bot and pass it to llm for processing and + # then pass back to the participant as audio output. + @transport.event_handler("on_data_received") + async def on_data_received(transport, data, participant_id): + logger.info(f"Received data from participant {participant_id}: {data}") + # convert data from bytes to string + json_data = json.loads(data) - await task.queue_frames( - [ - BotInterruptionFrame(), - UserStartedSpeakingFrame(), - TranscriptionFrame( - user_id=participant_id, - timestamp=json_data["timestamp"], - text=json_data["message"], - ), - UserStoppedSpeakingFrame(), - ], - ) + await task.queue_frames( + [ + BotInterruptionFrame(), + UserStartedSpeakingFrame(), + TranscriptionFrame( + user_id=participant_id, + timestamp=json_data["timestamp"], + text=json_data["message"], + ), + UserStoppedSpeakingFrame(), + ], + ) - await runner.run(task) + await runner.run(task) if __name__ == "__main__": diff --git a/examples/foundational/04c-transports-daily-audio-source.py b/examples/foundational/04c-transports-daily-audio-source.py deleted file mode 100644 index 00cb8a603..000000000 --- a/examples/foundational/04c-transports-daily-audio-source.py +++ /dev/null @@ -1,111 +0,0 @@ -# -# Copyright (c) 2024–2025, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -import asyncio -import os -import sys - -import aiohttp -from daily_runner import configure -from dotenv import load_dotenv -from loguru import logger - -from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.pipeline.pipeline import Pipeline -from pipecat.pipeline.runner import PipelineRunner -from pipecat.pipeline.task import PipelineParams, PipelineTask -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext -from pipecat.services.cartesia.tts import CartesiaTTSService -from pipecat.services.deepgram.stt import DeepgramSTTService, Language, LiveOptions -from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.services.daily import DailyParams, DailyTransport - -load_dotenv(override=True) - -logger.remove(0) -logger.add(sys.stderr, level="DEBUG") - - -async def main(): - async with aiohttp.ClientSession() as session: - (room_url, token) = await configure(session) - - transport = DailyTransport( - room_url, - token, - "Respond bot", - DailyParams( - audio_in_enabled=True, - audio_in_passthrough=False, - audio_out_enabled=True, - audio_out_sample_rate=16000, - transcription_enabled=False, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - - stt = DeepgramSTTService( - api_key=os.getenv("DEEPGRAM_API_KEY"), - live_options=LiveOptions(language=Language.EN), - ) - - tts = CartesiaTTSService( - api_key=os.getenv("CARTESIA_API_KEY"), - voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady - ) - - llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o") - - messages = [ - { - "role": "system", - "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.", - }, - ] - - context = OpenAILLMContext(messages) - context_aggregator = llm.create_context_aggregator(context) - - pipeline = Pipeline( - [ - transport.input(), # Transport user input - stt, - context_aggregator.user(), # User responses - llm, # LLM - tts, # TTS - transport.output(), # Transport bot output - context_aggregator.assistant(), # Assistant spoken responses - ] - ) - - task = PipelineTask( - pipeline, - params=PipelineParams( - allow_interruptions=True, - enable_metrics=True, - enable_usage_metrics=True, - report_only_initial_ttfb=True, - ), - ) - - @transport.event_handler("on_first_participant_joined") - async def on_first_participant_joined(transport, participant): - await transport.capture_participant_audio(participant["id"]) - # Kick off the conversation. - messages.append({"role": "system", "content": "Please introduce yourself to the user."}) - await task.queue_frames([context_aggregator.user().get_context_frame()]) - - @transport.event_handler("on_participant_left") - async def on_participant_left(transport, participant, reason): - await task.cancel() - - runner = PipelineRunner() - - await runner.run(task) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/examples/foundational/05-sync-speech-and-image.py b/examples/foundational/05-sync-speech-and-image.py index e91c49eb5..a87869d0b 100644 --- a/examples/foundational/05-sync-speech-and-image.py +++ b/examples/foundational/05-sync-speech-and-image.py @@ -28,9 +28,8 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.services.cartesia.tts import CartesiaHttpTTSService from pipecat.services.fal.image import FalImageGenService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -64,7 +63,26 @@ class MonthPrepender(FrameProcessor): await self.push_frame(frame, direction) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_out_enabled=True, + video_out_enabled=True, + video_out_width=1024, + video_out_height=1024, + ), + "webrtc": lambda: TransportParams( + audio_out_enabled=True, + video_out_enabled=True, + video_out_width=1024, + video_out_height=1024, + ), +} + + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): """Run the Calendar Month Narration bot using WebRTC transport. Args: @@ -73,17 +91,6 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac """ logger.info(f"Starting bot") - # Create a transport using the WebRTC connection - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_out_enabled=True, - video_out_enabled=True, - video_out_width=1024, - video_out_height=1024, - ), - ) - # Create an HTTP session for API calls async with aiohttp.ClientSession() as session: llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) @@ -159,18 +166,14 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() # Run the pipeline - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/06-listen-and-respond.py b/examples/foundational/06-listen-and-respond.py index 921144e85..5bdcd4b77 100644 --- a/examples/foundational/06-listen-and-respond.py +++ b/examples/foundational/06-listen-and-respond.py @@ -26,9 +26,9 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -53,17 +53,30 @@ class MetricsLogger(FrameProcessor): await self.push_frame(frame, direction) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -117,17 +130,13 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/06a-image-sync.py b/examples/foundational/06a-image-sync.py index a9d4e16b5..baabd1a56 100644 --- a/examples/foundational/06a-image-sync.py +++ b/examples/foundational/06a-image-sync.py @@ -26,9 +26,8 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -68,20 +67,31 @@ class ImageSyncAggregator(FrameProcessor): await self.push_frame(frame) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_out_enabled=True, + video_out_width=1024, + video_out_height=1024, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_out_enabled=True, + video_out_width=1024, + video_out_height=1024, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - video_out_enabled=True, - video_out_width=1024, - video_out_height=1024, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -139,17 +149,13 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07-interruptible-cartesia-http.py b/examples/foundational/07-interruptible-cartesia-http.py index 957938df2..c0108d8f4 100644 --- a/examples/foundational/07-interruptible-cartesia-http.py +++ b/examples/foundational/07-interruptible-cartesia-http.py @@ -18,24 +18,37 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.cartesia.tts import CartesiaHttpTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -88,13 +101,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -102,4 +111,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07-interruptible.py b/examples/foundational/07-interruptible.py index 29ac495ea..a86651c85 100644 --- a/examples/foundational/07-interruptible.py +++ b/examples/foundational/07-interruptible.py @@ -18,25 +18,37 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) tts = CartesiaTTSService( @@ -88,13 +100,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -102,4 +110,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07b-interruptible-langchain.py b/examples/foundational/07b-interruptible-langchain.py index 0b352719f..de1b12fea 100644 --- a/examples/foundational/07b-interruptible-langchain.py +++ b/examples/foundational/07b-interruptible-langchain.py @@ -27,9 +27,9 @@ from pipecat.processors.aggregators.llm_response import ( from pipecat.processors.frameworks.langchain import LangchainProcessor from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -43,17 +43,30 @@ def get_session_history(session_id: str) -> BaseChatMessageHistory: return message_store[session_id] -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -120,13 +133,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -134,4 +143,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07c-interruptible-deepgram-vad.py b/examples/foundational/07c-interruptible-deepgram-vad.py index 945cdc447..53697283e 100644 --- a/examples/foundational/07c-interruptible-deepgram-vad.py +++ b/examples/foundational/07c-interruptible-deepgram-vad.py @@ -24,23 +24,34 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.deepgram.tts import DeepgramTTSService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService( api_key=os.getenv("DEEPGRAM_API_KEY"), @@ -101,13 +112,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -115,4 +122,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07c-interruptible-deepgram.py b/examples/foundational/07c-interruptible-deepgram.py index 2a707da4a..8eaea32e6 100644 --- a/examples/foundational/07c-interruptible-deepgram.py +++ b/examples/foundational/07c-interruptible-deepgram.py @@ -18,24 +18,37 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.deepgram.tts import DeepgramTTSService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -85,13 +98,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -99,4 +108,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07d-interruptible-elevenlabs-http.py b/examples/foundational/07d-interruptible-elevenlabs-http.py index 9fadd2cf5..7f76dbdcd 100644 --- a/examples/foundational/07d-interruptible-elevenlabs-http.py +++ b/examples/foundational/07d-interruptible-elevenlabs-http.py @@ -19,24 +19,37 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.elevenlabs.tts import ElevenLabsHttpTTSService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") # Create an HTTP session async with aiohttp.ClientSession() as session: @@ -92,13 +105,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -106,4 +115,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07d-interruptible-elevenlabs.py b/examples/foundational/07d-interruptible-elevenlabs.py index 885a034c0..a8a004417 100644 --- a/examples/foundational/07d-interruptible-elevenlabs.py +++ b/examples/foundational/07d-interruptible-elevenlabs.py @@ -18,24 +18,37 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.elevenlabs.tts import ElevenLabsTTSService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -88,13 +101,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -102,4 +111,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07e-interruptible-playht-http.py b/examples/foundational/07e-interruptible-playht-http.py index 5ac99640e..0662ea27a 100644 --- a/examples/foundational/07e-interruptible-playht-http.py +++ b/examples/foundational/07e-interruptible-playht-http.py @@ -18,25 +18,37 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.playht.tts import PlayHTHttpTTSService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) tts = PlayHTHttpTTSService( @@ -89,13 +101,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -103,4 +111,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07e-interruptible-playht.py b/examples/foundational/07e-interruptible-playht.py index 321a876c0..b5c96c689 100644 --- a/examples/foundational/07e-interruptible-playht.py +++ b/examples/foundational/07e-interruptible-playht.py @@ -19,25 +19,37 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.playht.tts import PlayHTTTSService from pipecat.transcriptions.language import Language -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) tts = PlayHTTTSService( @@ -91,13 +103,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -105,4 +113,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07f-interruptible-azure.py b/examples/foundational/07f-interruptible-azure.py index 5c2f42247..32f97fb2e 100644 --- a/examples/foundational/07f-interruptible-azure.py +++ b/examples/foundational/07f-interruptible-azure.py @@ -18,25 +18,37 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.azure.llm import AzureLLMService from pipecat.services.azure.stt import AzureSTTService from pipecat.services.azure.tts import AzureTTSService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - stt = AzureSTTService( api_key=os.getenv("AZURE_SPEECH_API_KEY"), region=os.getenv("AZURE_SPEECH_REGION"), @@ -95,13 +107,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -109,4 +117,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07g-interruptible-openai.py b/examples/foundational/07g-interruptible-openai.py index c89baa068..203556e99 100644 --- a/examples/foundational/07g-interruptible-openai.py +++ b/examples/foundational/07g-interruptible-openai.py @@ -18,25 +18,37 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.openai.stt import OpenAISTTService from pipecat.services.openai.tts import OpenAITTSService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - stt = OpenAISTTService( api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o-transcribe", @@ -90,13 +102,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -104,4 +112,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07h-interruptible-openpipe.py b/examples/foundational/07h-interruptible-openpipe.py index 90daf9311..a73a78eb0 100644 --- a/examples/foundational/07h-interruptible-openpipe.py +++ b/examples/foundational/07h-interruptible-openpipe.py @@ -19,25 +19,37 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openpipe.llm import OpenPipeLLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) tts = CartesiaTTSService( @@ -94,13 +106,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -108,4 +116,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07i-interruptible-xtts.py b/examples/foundational/07i-interruptible-xtts.py index 1ca56b865..0efe516f2 100644 --- a/examples/foundational/07i-interruptible-xtts.py +++ b/examples/foundational/07i-interruptible-xtts.py @@ -19,25 +19,37 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.xtts.tts import XTTSService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - # Create an HTTP session async with aiohttp.ClientSession() as session: stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -92,13 +104,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -106,4 +114,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07j-interruptible-gladia.py b/examples/foundational/07j-interruptible-gladia.py index 757ae2697..38b0f506e 100644 --- a/examples/foundational/07j-interruptible-gladia.py +++ b/examples/foundational/07j-interruptible-gladia.py @@ -20,25 +20,37 @@ from pipecat.services.gladia.config import GladiaInputParams, LanguageConfig from pipecat.services.gladia.stt import GladiaSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transcriptions.language import Language -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - stt = GladiaSTTService( api_key=os.getenv("GLADIA_API_KEY", ""), params=GladiaInputParams( @@ -97,17 +109,13 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07k-interruptible-lmnt.py b/examples/foundational/07k-interruptible-lmnt.py index 7447f8257..f7fc4b8aa 100644 --- a/examples/foundational/07k-interruptible-lmnt.py +++ b/examples/foundational/07k-interruptible-lmnt.py @@ -18,25 +18,37 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.lmnt.tts import LmntTTSService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) tts = LmntTTSService(api_key=os.getenv("LMNT_API_KEY"), voice_id="morgan") @@ -85,13 +97,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -99,4 +107,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07l-interruptible-groq.py b/examples/foundational/07l-interruptible-groq.py index 869548274..d649ddce8 100644 --- a/examples/foundational/07l-interruptible-groq.py +++ b/examples/foundational/07l-interruptible-groq.py @@ -19,25 +19,37 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.groq.llm import GroqLLMService from pipecat.services.groq.stt import GroqSTTService from pipecat.services.groq.tts import GroqTTSService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - stt = GroqSTTService(api_key=os.getenv("GROQ_API_KEY")) llm = GroqLLMService( @@ -89,13 +101,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -103,4 +111,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07m-interruptible-aws.py b/examples/foundational/07m-interruptible-aws.py index bbcfe7313..dbdd0ba40 100644 --- a/examples/foundational/07m-interruptible-aws.py +++ b/examples/foundational/07m-interruptible-aws.py @@ -17,25 +17,37 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.aws.llm import AWSBedrockLLMService from pipecat.services.aws.stt import AWSTranscribeSTTService from pipecat.services.aws.tts import AWSPollyTTSService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - stt = AWSTranscribeSTTService() tts = AWSPollyTTSService( @@ -92,13 +104,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -106,4 +114,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07n-interruptible-google.py b/examples/foundational/07n-interruptible-google.py index 36cb27193..30d09943a 100644 --- a/examples/foundational/07n-interruptible-google.py +++ b/examples/foundational/07n-interruptible-google.py @@ -19,25 +19,37 @@ from pipecat.services.google.llm import GoogleLLMService from pipecat.services.google.stt import GoogleSTTService from pipecat.services.google.tts import GoogleTTSService from pipecat.transcriptions.language import Language -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - stt = GoogleSTTService( params=GoogleSTTService.InputParams(languages=Language.EN_US), credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"), @@ -93,13 +105,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -107,4 +115,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07o-interruptible-assemblyai.py b/examples/foundational/07o-interruptible-assemblyai.py index 2b371be50..0e195fa57 100644 --- a/examples/foundational/07o-interruptible-assemblyai.py +++ b/examples/foundational/07o-interruptible-assemblyai.py @@ -18,24 +18,37 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.assemblyai.stt import AssemblyAISTTService from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = AssemblyAISTTService( api_key=os.getenv("ASSEMBLYAI_API_KEY"), @@ -90,13 +103,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -104,4 +113,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07p-interruptible-krisp.py b/examples/foundational/07p-interruptible-krisp.py index baaef1852..57cc0910e 100644 --- a/examples/foundational/07p-interruptible-krisp.py +++ b/examples/foundational/07p-interruptible-krisp.py @@ -19,26 +19,40 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.deepgram.tts import DeepgramTTSService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + audio_in_filter=KrispFilter(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + audio_in_filter=KrispFilter(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + audio_in_filter=KrispFilter(), + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - audio_in_filter=KrispFilter(), - ), - ) - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) tts = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY"), voice="aura-helios-en") @@ -87,13 +101,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -101,4 +111,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07q-interruptible-rime-http.py b/examples/foundational/07q-interruptible-rime-http.py index 40fc6be5f..248214d1a 100644 --- a/examples/foundational/07q-interruptible-rime-http.py +++ b/examples/foundational/07q-interruptible-rime-http.py @@ -19,24 +19,37 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.rime.tts import RimeHttpTTSService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") # Create an HTTP session async with aiohttp.ClientSession() as session: @@ -93,13 +106,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -107,4 +116,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07q-interruptible-rime.py b/examples/foundational/07q-interruptible-rime.py index 27f678930..cdc083b2e 100644 --- a/examples/foundational/07q-interruptible-rime.py +++ b/examples/foundational/07q-interruptible-rime.py @@ -18,25 +18,37 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.rime.tts import RimeTTSService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) tts = RimeTTSService( @@ -88,13 +100,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -102,4 +110,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07r-interruptible-riva-nim.py b/examples/foundational/07r-interruptible-riva-nim.py index ddb80181c..f93809d03 100644 --- a/examples/foundational/07r-interruptible-riva-nim.py +++ b/examples/foundational/07r-interruptible-riva-nim.py @@ -16,31 +16,39 @@ from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.nim.llm import NimLLMService -from pipecat.services.riva.stt import ( - ParakeetSTTService, - RivaSegmentedSTTService, - RivaSTTService, -) -from pipecat.services.riva.tts import FastPitchTTSService, RivaTTSService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.services.riva.stt import RivaSTTService +from pipecat.services.riva.tts import RivaTTSService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - stt = RivaSTTService(api_key=os.getenv("NVIDIA_API_KEY")) llm = NimLLMService(api_key=os.getenv("NVIDIA_API_KEY"), model="meta/llama-3.1-405b-instruct") @@ -89,13 +97,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -103,4 +107,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07s-interruptible-google-audio-in.py b/examples/foundational/07s-interruptible-google-audio-in.py index 360a5d350..eeb4d6c48 100644 --- a/examples/foundational/07s-interruptible-google-audio-in.py +++ b/examples/foundational/07s-interruptible-google-audio-in.py @@ -32,9 +32,9 @@ from pipecat.processors.frame_processor import FrameProcessor from pipecat.services.google.llm import GoogleLLMService from pipecat.services.google.tts import GoogleTTSService from pipecat.transcriptions.language import Language -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -191,17 +191,30 @@ class TanscriptionContextFixup(FrameProcessor): await self.push_frame(frame, direction) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"), model="gemini-2.0-flash-001") @@ -261,13 +274,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -275,4 +284,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07t-interruptible-fish.py b/examples/foundational/07t-interruptible-fish.py index 58fb4cd61..48c02d33c 100644 --- a/examples/foundational/07t-interruptible-fish.py +++ b/examples/foundational/07t-interruptible-fish.py @@ -18,24 +18,37 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.fish.tts import FishAudioTTSService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -88,13 +101,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -102,4 +111,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07u-interruptible-ultravox.py b/examples/foundational/07u-interruptible-ultravox.py index b1e2e3756..bedabe1e3 100644 --- a/examples/foundational/07u-interruptible-ultravox.py +++ b/examples/foundational/07u-interruptible-ultravox.py @@ -11,15 +11,14 @@ from dotenv import load_dotenv from loguru import logger from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.audio.vad.vad_analyzer import VADParams from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.ultravox.stt import UltravoxSTTService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -36,17 +35,30 @@ ultravox_processor = UltravoxSTTService( ) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") tts = CartesiaTTSService( api_key=os.environ.get("CARTESIA_API_KEY"), @@ -77,13 +89,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -91,4 +99,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07v-interruptible-neuphonic-http.py b/examples/foundational/07v-interruptible-neuphonic-http.py index 24eafa2e5..39afa9987 100644 --- a/examples/foundational/07v-interruptible-neuphonic-http.py +++ b/examples/foundational/07v-interruptible-neuphonic-http.py @@ -18,24 +18,37 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.neuphonic.tts import NeuphonicHttpTTSService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -88,13 +101,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -102,4 +111,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07v-interruptible-neuphonic.py b/examples/foundational/07v-interruptible-neuphonic.py index 660925544..5049020d8 100644 --- a/examples/foundational/07v-interruptible-neuphonic.py +++ b/examples/foundational/07v-interruptible-neuphonic.py @@ -18,25 +18,37 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.neuphonic.tts import NeuphonicTTSService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) tts = NeuphonicTTSService( @@ -88,13 +100,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -102,4 +110,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07w-interruptible-fal.py b/examples/foundational/07w-interruptible-fal.py index 754aac7e9..bd967aa10 100644 --- a/examples/foundational/07w-interruptible-fal.py +++ b/examples/foundational/07w-interruptible-fal.py @@ -18,24 +18,37 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.fal.stt import FalSTTService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = FalSTTService( api_key=os.getenv("FAL_KEY"), @@ -90,13 +103,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -104,4 +113,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07y-interruptible-minimax.py b/examples/foundational/07y-interruptible-minimax.py index 5add114bc..b8c9ded40 100644 --- a/examples/foundational/07y-interruptible-minimax.py +++ b/examples/foundational/07y-interruptible-minimax.py @@ -20,27 +20,40 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.minimax.tts import MiniMaxHttpTTSService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transcriptions.language import Language -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} + + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") # Create an HTTP session async with aiohttp.ClientSession() as session: - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) tts = MiniMaxHttpTTSService( @@ -94,13 +107,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -108,4 +117,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/07z-interruptible-sarvam.py b/examples/foundational/07z-interruptible-sarvam.py index fafee5e93..69a233991 100644 --- a/examples/foundational/07z-interruptible-sarvam.py +++ b/examples/foundational/07z-interruptible-sarvam.py @@ -20,24 +20,38 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.sarvam.tts import SarvamTTSService from pipecat.transcriptions.language import Language -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} + + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) # Create an HTTP session async with aiohttp.ClientSession() as session: stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -92,13 +106,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -106,4 +116,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/09-mirror.py b/examples/foundational/09-mirror.py index 2a62b5a88..406293351 100644 --- a/examples/foundational/09-mirror.py +++ b/examples/foundational/09-mirror.py @@ -20,9 +20,8 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.frame_processor import FrameDirection, FrameProcessor -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -47,21 +46,33 @@ class MirrorProcessor(FrameProcessor): await self.push_frame(frame, direction) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_in_enabled=True, + video_out_enabled=True, + video_out_is_live=True, + video_out_width=1280, + video_out_height=720, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_in_enabled=True, + video_out_enabled=True, + video_out_is_live=True, + video_out_width=1280, + video_out_height=720, + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - video_in_enabled=True, - video_out_enabled=True, - video_out_is_live=True, - video_out_width=1280, - video_out_height=720, - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") pipeline = Pipeline([transport.input(), MirrorProcessor(), transport.output()]) @@ -77,13 +88,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -91,4 +98,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/09a-local-mirror.py b/examples/foundational/09a-local-mirror.py index 489f68888..06855bbb7 100644 --- a/examples/foundational/09a-local-mirror.py +++ b/examples/foundational/09a-local-mirror.py @@ -22,10 +22,9 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.frame_processor import FrameDirection, FrameProcessor -from pipecat.transports.base_transport import TransportParams +from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.local.tk import TkLocalTransport, TkTransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -50,21 +49,33 @@ class MirrorProcessor(FrameProcessor): await self.push_frame(frame, direction) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_in_enabled=True, + video_out_enabled=True, + video_out_is_live=True, + video_out_width=1280, + video_out_height=720, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_in_enabled=True, + video_out_enabled=True, + video_out_is_live=True, + video_out_width=1280, + video_out_height=720, + ), +} - p2p_transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - video_in_enabled=True, - video_out_enabled=True, - video_out_is_live=True, - video_out_width=1280, - video_out_height=720, - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") tk_root = tk.Tk() tk_root.title("Local Mirror") @@ -80,11 +91,7 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac ), ) - @p2p_transport.event_handler("on_client_connected") - async def on_client_connected(transport, client): - logger.info(f"Client connected") - - pipeline = Pipeline([p2p_transport.input(), MirrorProcessor(), tk_transport.output()]) + pipeline = Pipeline([transport.input(), MirrorProcessor(), tk_transport.output()]) task = PipelineTask( pipeline, @@ -97,7 +104,16 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac tk_root.update_idletasks() await asyncio.sleep(0.1) - runner = PipelineRunner(handle_sigint=False) + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=handle_sigint) await asyncio.gather(runner.run(task), run_tk()) @@ -105,4 +121,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/10-wake-phrase.py b/examples/foundational/10-wake-phrase.py index 66b4045d6..7851687cc 100644 --- a/examples/foundational/10-wake-phrase.py +++ b/examples/foundational/10-wake-phrase.py @@ -20,25 +20,37 @@ from pipecat.processors.filters.wake_check_filter import WakeCheckFilter from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) tts = CartesiaTTSService( @@ -84,13 +96,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -98,4 +106,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/11-sound-effects.py b/examples/foundational/11-sound-effects.py index d896ddfb9..02ab18341 100644 --- a/examples/foundational/11-sound-effects.py +++ b/examples/foundational/11-sound-effects.py @@ -30,9 +30,9 @@ from pipecat.processors.logger import FrameLogger from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -78,17 +78,30 @@ class InboundSoundEffectWrapper(FrameProcessor): await self.push_frame(frame, direction) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -141,13 +154,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -155,4 +164,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/12-describe-video.py b/examples/foundational/12-describe-video.py index 3afd6dd0c..276508873 100644 --- a/examples/foundational/12-describe-video.py +++ b/examples/foundational/12-describe-video.py @@ -10,6 +10,7 @@ from typing import Optional from dotenv import load_dotenv from loguru import logger +from run import get_transport_client_id, maybe_capture_participant_video from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame @@ -22,9 +23,8 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.moondream.vision import MoondreamService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -47,21 +47,27 @@ class UserImageRequester(FrameProcessor): await self.push_frame(frame, direction) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - # Get WebRTC peer connection ID - webrtc_peer_id = webrtc_connection.pc_id +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_in_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_in_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - logger.info(f"Starting bot with peer_id: {webrtc_peer_id}") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - video_in_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") user_response = UserResponseAggregator() @@ -99,22 +105,21 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac async def on_client_connected(transport, client): logger.info(f"Client connected: {client}") - # Welcome message - await tts.say("Hi there! Feel free to ask me what I see.") + await maybe_capture_participant_video(transport, client) # Set the participant ID in the image requester - image_requester.set_participant_id(webrtc_peer_id) + client_id = get_transport_client_id(transport, client) + image_requester.set_participant_id(client_id) + + # Welcome message + await tts.say("Hi there! Feel free to ask me what I see.") @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -122,4 +127,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/12a-describe-video-gemini-flash.py b/examples/foundational/12a-describe-video-gemini-flash.py index f81655298..1e3d9d494 100644 --- a/examples/foundational/12a-describe-video-gemini-flash.py +++ b/examples/foundational/12a-describe-video-gemini-flash.py @@ -10,6 +10,7 @@ from typing import Optional from dotenv import load_dotenv from loguru import logger +from run import get_transport_client_id, maybe_capture_participant_video from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame @@ -22,9 +23,8 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.google.llm import GoogleLLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -47,21 +47,27 @@ class UserImageRequester(FrameProcessor): await self.push_frame(frame, direction) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - # Get WebRTC peer connection ID - webrtc_peer_id = webrtc_connection.pc_id +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_in_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_in_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - logger.info(f"Starting bot with peer_id: {webrtc_peer_id}") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - video_in_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") user_response = UserResponseAggregator() @@ -102,22 +108,21 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac async def on_client_connected(transport, client): logger.info(f"Client connected: {client}") - # Welcome message - await tts.say("Hi there! Feel free to ask me what I see.") + await maybe_capture_participant_video(transport, client) # Set the participant ID in the image requester - image_requester.set_participant_id(webrtc_peer_id) + client_id = get_transport_client_id(transport, client) + image_requester.set_participant_id(client_id) + + # Welcome message + await tts.say("Hi there! Feel free to ask me what I see.") @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -125,4 +130,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/12b-describe-video-gpt-4o.py b/examples/foundational/12b-describe-video-gpt-4o.py index 00d64863c..d912ee91b 100644 --- a/examples/foundational/12b-describe-video-gpt-4o.py +++ b/examples/foundational/12b-describe-video-gpt-4o.py @@ -10,6 +10,7 @@ from typing import Optional from dotenv import load_dotenv from loguru import logger +from run import get_transport_client_id, maybe_capture_participant_video from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame @@ -22,9 +23,8 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -47,21 +47,27 @@ class UserImageRequester(FrameProcessor): await self.push_frame(frame, direction) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - # Get WebRTC peer connection ID - webrtc_peer_id = webrtc_connection.pc_id +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_in_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_in_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - logger.info(f"Starting bot with peer_id: {webrtc_peer_id}") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - video_in_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") user_response = UserResponseAggregator() @@ -102,22 +108,21 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac async def on_client_connected(transport, client): logger.info(f"Client connected: {client}") - # Welcome message - await tts.say("Hi there! Feel free to ask me what I see.") + await maybe_capture_participant_video(transport, client) # Set the participant ID in the image requester - image_requester.set_participant_id(webrtc_peer_id) + client_id = get_transport_client_id(transport, client) + image_requester.set_participant_id(client_id) + + # Welcome message + await tts.say("Hi there! Feel free to ask me what I see.") @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -125,4 +130,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/12c-describe-video-anthropic.py b/examples/foundational/12c-describe-video-anthropic.py index cc454d2e1..bf921df83 100644 --- a/examples/foundational/12c-describe-video-anthropic.py +++ b/examples/foundational/12c-describe-video-anthropic.py @@ -10,6 +10,7 @@ from typing import Optional from dotenv import load_dotenv from loguru import logger +from run import get_transport_client_id, maybe_capture_participant_video from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame @@ -22,9 +23,8 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.services.anthropic.llm import AnthropicLLMService from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -47,21 +47,27 @@ class UserImageRequester(FrameProcessor): await self.push_frame(frame, direction) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - # Get WebRTC peer connection ID - webrtc_peer_id = webrtc_connection.pc_id +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_in_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_in_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - logger.info(f"Starting bot with peer_id: {webrtc_peer_id}") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - video_in_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") user_response = UserResponseAggregator() @@ -102,22 +108,21 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac async def on_client_connected(transport, client): logger.info(f"Client connected: {client}") - # Welcome message - await tts.say("Hi there! Feel free to ask me what I see.") + await maybe_capture_participant_video(transport, client) # Set the participant ID in the image requester - image_requester.set_participant_id(webrtc_peer_id) + client_id = get_transport_client_id(transport, client) + image_requester.set_participant_id(client_id) + + # Welcome message + await tts.say("Hi there! Feel free to ask me what I see.") @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -125,4 +130,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/13-whisper-transcription.py b/examples/foundational/13-whisper-transcription.py index 1cee7a00b..003d9ebb2 100644 --- a/examples/foundational/13-whisper-transcription.py +++ b/examples/foundational/13-whisper-transcription.py @@ -16,9 +16,9 @@ from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineTask from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.services.whisper.stt import WhisperSTTService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -31,16 +31,27 @@ class TranscriptionLogger(FrameProcessor): print(f"Transcription: {frame.text}") -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = WhisperSTTService() @@ -53,13 +64,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -67,4 +74,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/13b-deepgram-transcription.py b/examples/foundational/13b-deepgram-transcription.py index 4c7e75dc1..2318041cb 100644 --- a/examples/foundational/13b-deepgram-transcription.py +++ b/examples/foundational/13b-deepgram-transcription.py @@ -16,9 +16,9 @@ from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineTask from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.services.deepgram.stt import DeepgramSTTService, Language, LiveOptions -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -31,13 +31,18 @@ class TranscriptionLogger(FrameProcessor): print(f"Transcription: {frame.text}") -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams(audio_in_enabled=True), + "twilio": lambda: FastAPIWebsocketParams(audio_in_enabled=True), + "webrtc": lambda: TransportParams(audio_in_enabled=True), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams(audio_in_enabled=True), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService( api_key=os.getenv("DEEPGRAM_API_KEY"), @@ -53,13 +58,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -67,4 +68,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/13c-gladia-transcription.py b/examples/foundational/13c-gladia-transcription.py index a0a6264bf..14237a5f0 100644 --- a/examples/foundational/13c-gladia-transcription.py +++ b/examples/foundational/13c-gladia-transcription.py @@ -16,9 +16,9 @@ from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineTask from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.services.gladia import GladiaSTTService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -31,13 +31,18 @@ class TranscriptionLogger(FrameProcessor): print(f"Transcription: {frame.text}") -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams(audio_in_enabled=True), + "twilio": lambda: FastAPIWebsocketParams(audio_in_enabled=True), + "webrtc": lambda: TransportParams(audio_in_enabled=True), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams(audio_in_enabled=True), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = GladiaSTTService( api_key=os.getenv("GLADIA_API_KEY"), @@ -53,13 +58,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -67,4 +68,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/13c-gladia-translation.py b/examples/foundational/13c-gladia-translation.py index 0c821df5d..0deb03c71 100644 --- a/examples/foundational/13c-gladia-translation.py +++ b/examples/foundational/13c-gladia-translation.py @@ -23,9 +23,9 @@ from pipecat.services.gladia.config import ( ) from pipecat.services.gladia.stt import GladiaSTTService from pipecat.transcriptions.language import Language -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -40,13 +40,18 @@ class TranscriptionLogger(FrameProcessor): print(f"Translation ({frame.language}): {frame.text}") -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams(audio_in_enabled=True), + "twilio": lambda: FastAPIWebsocketParams(audio_in_enabled=True), + "webrtc": lambda: TransportParams(audio_in_enabled=True), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams(audio_in_enabled=True), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = GladiaSTTService( api_key=os.getenv("GLADIA_API_KEY"), @@ -74,13 +79,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -88,4 +89,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/13d-assemblyai-transcription.py b/examples/foundational/13d-assemblyai-transcription.py index 8fa99d8de..0a803d7d3 100644 --- a/examples/foundational/13d-assemblyai-transcription.py +++ b/examples/foundational/13d-assemblyai-transcription.py @@ -16,9 +16,9 @@ from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineTask from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.services.assemblyai.stt import AssemblyAISTTService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -31,13 +31,18 @@ class TranscriptionLogger(FrameProcessor): print(f"Transcription: {frame.text}") -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams(audio_in_enabled=True), + "twilio": lambda: FastAPIWebsocketParams(audio_in_enabled=True), + "webrtc": lambda: TransportParams(audio_in_enabled=True), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams(audio_in_enabled=True), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = AssemblyAISTTService( api_key=os.getenv("ASSEMBLYAI_API_KEY"), @@ -52,13 +57,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -66,4 +67,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/13e-whisper-mlx.py b/examples/foundational/13e-whisper-mlx.py index 9ab7bc82b..f8db6b924 100644 --- a/examples/foundational/13e-whisper-mlx.py +++ b/examples/foundational/13e-whisper-mlx.py @@ -18,9 +18,9 @@ from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.services.whisper.stt import MLXModel, WhisperSTTServiceMLX -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -52,16 +52,27 @@ class TranscriptionLogger(FrameProcessor): self._last_transcription_time = time.time() -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=STOP_SECS)), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=STOP_SECS)), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=STOP_SECS)), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=STOP_SECS)), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = WhisperSTTServiceMLX(model=MLXModel.LARGE_V3_TURBO) @@ -80,13 +91,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -94,4 +101,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/14-function-calling.py b/examples/foundational/14-function-calling.py index ab51a6050..64d2c4e4f 100644 --- a/examples/foundational/14-function-calling.py +++ b/examples/foundational/14-function-calling.py @@ -22,9 +22,9 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -34,17 +34,30 @@ async def fetch_weather_from_api(params: FunctionCallParams): await params.result_callback({"conditions": "nice", "temperature": "75"}) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -118,13 +131,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -132,4 +141,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/14a-function-calling-anthropic.py b/examples/foundational/14a-function-calling-anthropic.py index 38d4aed2f..7cfd5bce5 100644 --- a/examples/foundational/14a-function-calling-anthropic.py +++ b/examples/foundational/14a-function-calling-anthropic.py @@ -21,9 +21,9 @@ from pipecat.services.anthropic.llm import AnthropicLLMService from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -33,17 +33,30 @@ async def get_weather(params: FunctionCallParams): await params.result_callback(f"The weather in {location} is currently 72 degrees and sunny.") -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -111,13 +124,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -125,4 +134,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/14b-function-calling-anthropic-video.py b/examples/foundational/14b-function-calling-anthropic-video.py index c18f7bf48..660d8fb96 100644 --- a/examples/foundational/14b-function-calling-anthropic-video.py +++ b/examples/foundational/14b-function-calling-anthropic-video.py @@ -10,6 +10,7 @@ import os from dotenv import load_dotenv from loguru import logger +from run import get_transport_client_id, maybe_capture_participant_video from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema @@ -22,15 +23,14 @@ from pipecat.services.anthropic.llm import AnthropicLLMService from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -# Global variable to store the peer connection ID -webrtc_peer_id = "" +# Global variable to store the client ID +client_id = "" async def get_weather(params: FunctionCallParams): @@ -40,11 +40,11 @@ async def get_weather(params: FunctionCallParams): async def get_image(params: FunctionCallParams): question = params.arguments["question"] - logger.debug(f"Requesting image with user_id={webrtc_peer_id}, question={question}") + logger.debug(f"Requesting image with user_id={client_id}, question={question}") # Request the image frame await params.llm.request_image_frame( - user_id=webrtc_peer_id, + user_id=client_id, function_name=params.function_name, tool_call_id=params.tool_call_id, text_content=question, @@ -59,21 +59,27 @@ async def get_image(params: FunctionCallParams): ) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - global webrtc_peer_id - webrtc_peer_id = webrtc_connection.pc_id +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_in_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_in_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - logger.info(f"Starting bot with peer_id: {webrtc_peer_id}") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - video_in_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -174,19 +180,21 @@ If you need to use a tool, simply use the tool. Do not tell the user the tool yo @transport.event_handler("on_client_connected") async def on_client_connected(transport, client): logger.info(f"Client connected: {client}") + + await maybe_capture_participant_video(transport, client) + + global client_id + client_id = get_transport_client_id(transport, client) + # Kick off the conversation. await task.queue_frames([context_aggregator.user().get_context_frame()]) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -194,4 +202,4 @@ If you need to use a tool, simply use the tool. Do not tell the user the tool yo if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/14c-function-calling-together.py b/examples/foundational/14c-function-calling-together.py index da79b68b0..538af5737 100644 --- a/examples/foundational/14c-function-calling-together.py +++ b/examples/foundational/14c-function-calling-together.py @@ -22,9 +22,9 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.together.llm import TogetherLLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -34,17 +34,30 @@ async def fetch_weather_from_api(params: FunctionCallParams): await params.result_callback({"conditions": "nice", "temperature": "75"}) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -111,13 +124,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -125,4 +134,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/14d-function-calling-video.py b/examples/foundational/14d-function-calling-video.py index 18772b609..034171808 100644 --- a/examples/foundational/14d-function-calling-video.py +++ b/examples/foundational/14d-function-calling-video.py @@ -10,6 +10,7 @@ import os from dotenv import load_dotenv from loguru import logger +from run import get_transport_client_id, maybe_capture_participant_video from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema @@ -22,15 +23,14 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -# Global variable to store the peer connection ID -webrtc_peer_id = "" +# Global variable to store the client ID +client_id = "" async def get_weather(params: FunctionCallParams): @@ -40,11 +40,11 @@ async def get_weather(params: FunctionCallParams): async def get_image(params: FunctionCallParams): question = params.arguments["question"] - logger.debug(f"Requesting image with user_id={webrtc_peer_id}, question={question}") + logger.debug(f"Requesting image with user_id={client_id}, question={question}") # Request the image frame await params.llm.request_image_frame( - user_id=webrtc_peer_id, + user_id=client_id, function_name=params.function_name, tool_call_id=params.tool_call_id, text_content=question, @@ -59,21 +59,27 @@ async def get_image(params: FunctionCallParams): ) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - global webrtc_peer_id - webrtc_peer_id = webrtc_connection.pc_id +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_in_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_in_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - logger.info(f"Starting bot with peer_id: {webrtc_peer_id}") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - video_in_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -157,19 +163,21 @@ indicate you should use the get_image tool are: @transport.event_handler("on_client_connected") async def on_client_connected(transport, client): logger.info(f"Client connected") + + await maybe_capture_participant_video(transport, client) + + global client_id + client_id = get_transport_client_id(transport, client) + # Kick off the conversation. await task.queue_frames([context_aggregator.user().get_context_frame()]) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -177,4 +185,4 @@ indicate you should use the get_image tool are: if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/14e-function-calling-gemini.py b/examples/foundational/14e-function-calling-gemini.py index 9725e7968..40d414f87 100644 --- a/examples/foundational/14e-function-calling-gemini.py +++ b/examples/foundational/14e-function-calling-gemini.py @@ -10,6 +10,7 @@ import os from dotenv import load_dotenv from loguru import logger +from run import get_transport_client_id, maybe_capture_participant_video from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema @@ -23,15 +24,14 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.google.llm import GoogleLLMService from pipecat.services.llm_service import FunctionCallParams -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -# Global variable to store the peer connection ID -webrtc_peer_id = "" +# Global variable to store the client ID +client_id = "" async def get_weather(params: FunctionCallParams): @@ -42,11 +42,11 @@ async def get_weather(params: FunctionCallParams): async def get_image(params: FunctionCallParams): question = params.arguments["question"] - logger.debug(f"Requesting image with user_id={webrtc_peer_id}, question={question}") + logger.debug(f"Requesting image with user_id={client_id}, question={question}") # Request the image frame await params.llm.request_image_frame( - user_id=webrtc_peer_id, + user_id=client_id, function_name=params.function_name, tool_call_id=params.tool_call_id, text_content=question, @@ -61,21 +61,27 @@ async def get_image(params: FunctionCallParams): ) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - global webrtc_peer_id - webrtc_peer_id = webrtc_connection.pc_id +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_in_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_in_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - logger.info(f"Starting bot with peer_id: {webrtc_peer_id}") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - video_in_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -167,19 +173,21 @@ indicate you should use the get_image tool are: @transport.event_handler("on_client_connected") async def on_client_connected(transport, client): logger.info(f"Client connected: {client}") + + await maybe_capture_participant_video(transport, client) + + global client_id + client_id = get_transport_client_id(transport, client) + # Kick off the conversation. await task.queue_frames([context_aggregator.user().get_context_frame()]) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -187,4 +195,4 @@ indicate you should use the get_image tool are: if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/14f-function-calling-groq.py b/examples/foundational/14f-function-calling-groq.py index ee6a9a855..c0099697b 100644 --- a/examples/foundational/14f-function-calling-groq.py +++ b/examples/foundational/14f-function-calling-groq.py @@ -23,9 +23,9 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.groq.llm import GroqLLMService from pipecat.services.groq.stt import GroqSTTService from pipecat.services.llm_service import FunctionCallParams -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -35,17 +35,30 @@ async def fetch_weather_from_api(params: FunctionCallParams): await params.result_callback({"conditions": "nice", "temperature": "75"}) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = GroqSTTService(api_key=os.getenv("GROQ_API_KEY"), model="distil-whisper-large-v3-en") @@ -120,13 +133,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -134,4 +143,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/14g-function-calling-grok.py b/examples/foundational/14g-function-calling-grok.py index 63d625bec..d9643e0ad 100644 --- a/examples/foundational/14g-function-calling-grok.py +++ b/examples/foundational/14g-function-calling-grok.py @@ -21,9 +21,9 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.grok.llm import GrokLLMService from pipecat.services.llm_service import FunctionCallParams -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -32,17 +32,30 @@ async def fetch_weather_from_api(params: FunctionCallParams): await params.result_callback({"conditions": "nice", "temperature": "75"}) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -113,13 +126,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -127,4 +136,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/14h-function-calling-azure.py b/examples/foundational/14h-function-calling-azure.py index 6c7bda50f..3f79f30ef 100644 --- a/examples/foundational/14h-function-calling-azure.py +++ b/examples/foundational/14h-function-calling-azure.py @@ -22,9 +22,9 @@ from pipecat.services.azure.llm import AzureLLMService from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -34,17 +34,30 @@ async def fetch_weather_from_api(params: FunctionCallParams): await params.result_callback({"conditions": "nice", "temperature": "75"}) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -119,13 +132,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -133,4 +142,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/14i-function-calling-fireworks.py b/examples/foundational/14i-function-calling-fireworks.py index e970e8213..13ed83b32 100644 --- a/examples/foundational/14i-function-calling-fireworks.py +++ b/examples/foundational/14i-function-calling-fireworks.py @@ -22,9 +22,9 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.fireworks.llm import FireworksLLMService from pipecat.services.llm_service import FunctionCallParams -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -34,17 +34,30 @@ async def fetch_weather_from_api(params: FunctionCallParams): await params.result_callback({"conditions": "nice", "temperature": "75"}) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -118,13 +131,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -132,4 +141,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/14j-function-calling-nim.py b/examples/foundational/14j-function-calling-nim.py index a07109753..e5e663ec8 100644 --- a/examples/foundational/14j-function-calling-nim.py +++ b/examples/foundational/14j-function-calling-nim.py @@ -22,9 +22,9 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.nim.llm import NimLLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -34,17 +34,30 @@ async def fetch_weather_from_api(params: FunctionCallParams): await params.result_callback({"conditions": "nice", "temperature": "75"}) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -116,13 +129,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -130,4 +139,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/14k-function-calling-cerebras.py b/examples/foundational/14k-function-calling-cerebras.py index 1016b8146..b30bd9ca7 100644 --- a/examples/foundational/14k-function-calling-cerebras.py +++ b/examples/foundational/14k-function-calling-cerebras.py @@ -22,9 +22,9 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.cerebras.llm import CerebrasLLMService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -34,17 +34,30 @@ async def fetch_weather_from_api(params: FunctionCallParams): await params.result_callback({"conditions": "nice", "temperature": "75"}) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -126,13 +139,9 @@ Start by asking me for my location. Then, use 'get_weather_current' to give me a @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -140,4 +149,4 @@ Start by asking me for my location. Then, use 'get_weather_current' to give me a if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/14l-function-calling-deepseek.py b/examples/foundational/14l-function-calling-deepseek.py index a5e3f814c..6d352835e 100644 --- a/examples/foundational/14l-function-calling-deepseek.py +++ b/examples/foundational/14l-function-calling-deepseek.py @@ -22,9 +22,9 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.deepseek.llm import DeepSeekLLMService from pipecat.services.llm_service import FunctionCallParams -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -34,17 +34,30 @@ async def fetch_weather_from_api(params: FunctionCallParams): await params.result_callback({"conditions": "nice", "temperature": "75"}) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -126,13 +139,9 @@ Start by asking me for my location. Then, use 'get_weather_current' to give me a @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -140,4 +149,4 @@ Start by asking me for my location. Then, use 'get_weather_current' to give me a if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/14m-function-calling-openrouter.py b/examples/foundational/14m-function-calling-openrouter.py index 68b3973c5..fd612aa92 100644 --- a/examples/foundational/14m-function-calling-openrouter.py +++ b/examples/foundational/14m-function-calling-openrouter.py @@ -22,9 +22,9 @@ from pipecat.services.azure.tts import AzureTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.openrouter.llm import OpenRouterLLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -34,17 +34,30 @@ async def fetch_weather_from_api(params: FunctionCallParams): await params.result_callback({"conditions": "nice", "temperature": "75"}) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -120,13 +133,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -134,4 +143,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/14n-function-calling-perplexity.py b/examples/foundational/14n-function-calling-perplexity.py index 168441611..8539dc2db 100644 --- a/examples/foundational/14n-function-calling-perplexity.py +++ b/examples/foundational/14n-function-calling-perplexity.py @@ -25,25 +25,37 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.perplexity.llm import PerplexityLLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) tts = CartesiaTTSService( @@ -94,13 +106,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -108,4 +116,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/14o-function-calling-gemini-openai-format.py b/examples/foundational/14o-function-calling-gemini-openai-format.py index 15077ba0f..08d82eee3 100644 --- a/examples/foundational/14o-function-calling-gemini-openai-format.py +++ b/examples/foundational/14o-function-calling-gemini-openai-format.py @@ -22,9 +22,9 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.elevenlabs.tts import ElevenLabsTTSService from pipecat.services.google.llm_openai import GoogleLLMOpenAIBetaService from pipecat.services.llm_service import FunctionCallParams -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -34,17 +34,30 @@ async def fetch_weather_from_api(params: FunctionCallParams): await params.result_callback({"conditions": "nice", "temperature": "75"}) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -115,13 +128,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -129,4 +138,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/14p-function-calling-gemini-vertex-ai.py b/examples/foundational/14p-function-calling-gemini-vertex-ai.py index cc152f837..86307e5a4 100644 --- a/examples/foundational/14p-function-calling-gemini-vertex-ai.py +++ b/examples/foundational/14p-function-calling-gemini-vertex-ai.py @@ -22,9 +22,9 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.elevenlabs.tts import ElevenLabsTTSService from pipecat.services.google.llm_vertex import GoogleVertexLLMService from pipecat.services.llm_service import FunctionCallParams -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -34,17 +34,30 @@ async def fetch_weather_from_api(params: FunctionCallParams): await params.result_callback({"conditions": "nice", "temperature": "75"}) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -54,10 +67,10 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac ) llm = GoogleVertexLLMService( - # credentials="", + credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"), params=GoogleVertexLLMService.InputParams( project_id="", - ) + ), ) # You can aslo register a function_name of None to get all functions # sent to the same callback with an additional function_name parameter. @@ -121,13 +134,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -135,4 +144,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/14q-function-calling-qwen.py b/examples/foundational/14q-function-calling-qwen.py index 45e0b6463..520e675e0 100644 --- a/examples/foundational/14q-function-calling-qwen.py +++ b/examples/foundational/14q-function-calling-qwen.py @@ -22,9 +22,9 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.qwen.llm import QwenLLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -34,17 +34,30 @@ async def fetch_weather_from_api(params: FunctionCallParams): await params.result_callback({"conditions": "nice", "temperature": "75"}) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -118,13 +131,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -132,4 +141,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/14r-function-calling-aws.py b/examples/foundational/14r-function-calling-aws.py index cf4859576..923fb3423 100644 --- a/examples/foundational/14r-function-calling-aws.py +++ b/examples/foundational/14r-function-calling-aws.py @@ -21,9 +21,9 @@ from pipecat.services.aws.llm import AWSBedrockLLMService from pipecat.services.aws.stt import AWSTranscribeSTTService from pipecat.services.aws.tts import AWSPollyTTSService from pipecat.services.llm_service import FunctionCallParams -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -32,17 +32,30 @@ async def fetch_weather_from_api(params: FunctionCallParams): await params.result_callback({"conditions": "nice", "temperature": "75"}) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = AWSTranscribeSTTService() @@ -122,13 +135,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -136,4 +145,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/15-switch-voices.py b/examples/foundational/15-switch-voices.py index 6b195722d..44372b120 100644 --- a/examples/foundational/15-switch-voices.py +++ b/examples/foundational/15-switch-voices.py @@ -22,9 +22,9 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -54,17 +54,30 @@ async def barbershop_man_filter(frame) -> bool: return current_voice == "Barbershop Man" -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -151,13 +164,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -165,4 +174,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/15a-switch-languages.py b/examples/foundational/15a-switch-languages.py index 93610841d..fb7a401e8 100644 --- a/examples/foundational/15a-switch-languages.py +++ b/examples/foundational/15a-switch-languages.py @@ -23,9 +23,9 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -49,17 +49,30 @@ async def spanish_filter(frame) -> bool: return current_language == "Spanish" -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService( api_key=os.getenv("DEEPGRAM_API_KEY"), live_options=LiveOptions(language="multi") @@ -139,13 +152,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -153,4 +162,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/16-gpu-container-local-bot.py b/examples/foundational/16-gpu-container-local-bot.py index d5e560010..2b6f18576 100644 --- a/examples/foundational/16-gpu-container-local-bot.py +++ b/examples/foundational/16-gpu-container-local-bot.py @@ -18,26 +18,37 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.deepgram.tts import DeepgramTTSService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection -from pipecat.transports.services.daily import DailyTransportMessageFrame +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams, DailyTransportMessageFrame load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) tts = DeepgramTTSService( @@ -124,13 +135,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -138,4 +145,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/17-detect-user-idle.py b/examples/foundational/17-detect-user-idle.py index 96189805e..66bc2be52 100644 --- a/examples/foundational/17-detect-user-idle.py +++ b/examples/foundational/17-detect-user-idle.py @@ -20,25 +20,37 @@ from pipecat.processors.user_idle_processor import UserIdleProcessor from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) tts = CartesiaTTSService( @@ -121,13 +133,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -135,4 +143,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/18-gstreamer-filesrc.py b/examples/foundational/18-gstreamer-filesrc.py index 92dcf973e..123b76e07 100644 --- a/examples/foundational/18-gstreamer-filesrc.py +++ b/examples/foundational/18-gstreamer-filesrc.py @@ -13,26 +13,35 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineTask from pipecat.processors.gstreamer.pipeline_source import GStreamerPipelineSource -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -async def run_bot(webrtc_connection: SmallWebRTCConnection, args: argparse.Namespace): - logger.info(f"Starting bot with video input: {args.input}") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_out_enabled=True, + video_out_enabled=True, + video_out_is_live=True, + video_out_width=1280, + video_out_height=720, + ), + "webrtc": lambda: TransportParams( + audio_out_enabled=True, + video_out_enabled=True, + video_out_is_live=True, + video_out_width=1280, + video_out_height=720, + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_out_enabled=True, - video_out_enabled=True, - video_out_is_live=True, - video_out_width=1280, - video_out_height=720, - ), - ) + +async def run_example(transport: BaseTransport, args: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot with video input: {args.input}") gst = GStreamerPipelineSource( pipeline=f"filesrc location={args.input}", @@ -51,7 +60,7 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, args: argparse.Names task = PipelineTask(pipeline) - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -62,4 +71,4 @@ if __name__ == "__main__": parser = argparse.ArgumentParser(description="Pipecat Bot Runner") parser.add_argument("-i", "--input", type=str, required=True, help="Input video file") - main(parser) + main(run_example, parser=parser, transport_params=transport_params) diff --git a/examples/foundational/18a-gstreamer-videotestsrc.py b/examples/foundational/18a-gstreamer-videotestsrc.py index ece124667..f5cf64d60 100644 --- a/examples/foundational/18a-gstreamer-videotestsrc.py +++ b/examples/foundational/18a-gstreamer-videotestsrc.py @@ -13,27 +13,33 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineTask from pipecat.processors.gstreamer.pipeline_source import GStreamerPipelineSource -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + video_out_enabled=True, + video_out_is_live=True, + video_out_width=1280, + video_out_height=720, + ), + "webrtc": lambda: TransportParams( + video_out_enabled=True, + video_out_is_live=True, + video_out_width=1280, + video_out_height=720, + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot with video test source") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - video_out_enabled=True, - video_out_is_live=True, - video_out_width=1280, - video_out_height=720, - ), - ) - gst = GStreamerPipelineSource( pipeline='videotestsrc ! capsfilter caps="video/x-raw,width=1280,height=720,framerate=30/1"', out_params=GStreamerPipelineSource.OutputParams( @@ -50,7 +56,7 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac task = PipelineTask(pipeline) - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -58,4 +64,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/19-openai-realtime-beta.py b/examples/foundational/19-openai-realtime-beta.py index 17b5462c0..307eb11d3 100644 --- a/examples/foundational/19-openai-realtime-beta.py +++ b/examples/foundational/19-openai-realtime-beta.py @@ -14,7 +14,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.audio.vad.vad_analyzer import VADParams from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask @@ -27,9 +26,9 @@ from pipecat.services.openai_realtime_beta import ( SemanticTurnDetection, SessionProperties, ) -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -67,17 +66,30 @@ weather_function = FunctionSchema( tools = ToolsSchema(standard_tools=[weather_function]) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.8)), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") session_properties = SessionProperties( input_audio_transcription=InputAudioTranscription(), @@ -163,13 +175,9 @@ Remember, your responses should be short. Just one or two sentences, usually.""" @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -177,4 +185,4 @@ Remember, your responses should be short. Just one or two sentences, usually.""" if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/19a-azure-realtime-beta.py b/examples/foundational/19a-azure-realtime-beta.py index c7778d6b8..12cb259ba 100644 --- a/examples/foundational/19a-azure-realtime-beta.py +++ b/examples/foundational/19a-azure-realtime-beta.py @@ -14,7 +14,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.audio.vad.vad_analyzer import VADParams from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask @@ -25,9 +24,9 @@ from pipecat.services.openai_realtime_beta import ( InputAudioTranscription, SessionProperties, ) -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -66,17 +65,30 @@ weather_function = FunctionSchema( tools = ToolsSchema(standard_tools=[weather_function]) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.8)), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") session_properties = SessionProperties( input_audio_transcription=InputAudioTranscription(model="whisper-1"), @@ -162,13 +174,9 @@ Remember, your responses should be short. Just one or two sentences, usually.""" @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -176,4 +184,4 @@ Remember, your responses should be short. Just one or two sentences, usually.""" if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/20a-persistent-context-openai.py b/examples/foundational/20a-persistent-context-openai.py index 32d50326b..285d902f7 100644 --- a/examples/foundational/20a-persistent-context-openai.py +++ b/examples/foundational/20a-persistent-context-openai.py @@ -25,9 +25,9 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -164,22 +164,33 @@ tools = [ ] -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.8)), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.8)), + ), +} + + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") global tts - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.8)), - ), - ) - - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) - tts = CartesiaTTSService( api_key=os.getenv("CARTESIA_API_KEY"), voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady @@ -228,13 +239,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -242,4 +249,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/20b-persistent-context-openai-realtime.py b/examples/foundational/20b-persistent-context-openai-realtime.py index 12d82ff37..799108066 100644 --- a/examples/foundational/20b-persistent-context-openai-realtime.py +++ b/examples/foundational/20b-persistent-context-openai-realtime.py @@ -15,7 +15,6 @@ from dotenv import load_dotenv from loguru import logger from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.audio.vad.vad_analyzer import VADParams from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask @@ -30,9 +29,9 @@ from pipecat.services.openai_realtime_beta import ( SessionProperties, TurnDetection, ) -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -153,17 +152,30 @@ tools = [ ] -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.8)), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -237,13 +249,9 @@ Remember, your responses should be short. Just one or two sentences, usually.""" @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -251,4 +259,4 @@ Remember, your responses should be short. Just one or two sentences, usually.""" if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/20c-persistent-context-anthropic.py b/examples/foundational/20c-persistent-context-anthropic.py index 1d304ed6e..ef5ec0ee9 100644 --- a/examples/foundational/20c-persistent-context-anthropic.py +++ b/examples/foundational/20c-persistent-context-anthropic.py @@ -25,9 +25,9 @@ from pipecat.services.anthropic.llm import AnthropicLLMService from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -159,20 +159,33 @@ tools = [ ] -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.8)), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.8)), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.8)), + ), +} + + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") global tts - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.8)), - ), - ) - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) tts = CartesiaTTSService( @@ -225,13 +238,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -239,4 +248,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/20d-persistent-context-gemini.py b/examples/foundational/20d-persistent-context-gemini.py index a0c9bff7a..131f32420 100644 --- a/examples/foundational/20d-persistent-context-gemini.py +++ b/examples/foundational/20d-persistent-context-gemini.py @@ -12,6 +12,7 @@ from datetime import datetime from dotenv import load_dotenv from loguru import logger +from run import get_transport_client_id, maybe_capture_participant_video from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -25,19 +26,16 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.google.llm import GoogleLLMService from pipecat.services.llm_service import FunctionCallParams -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -video_participant_id = None - - BASE_FILENAME = "/tmp/pipecat_conversation_" -tts = None -webrtc_peer_id = "" + +# Global variable to store the client ID +client_id = "" async def fetch_weather_from_api(params: FunctionCallParams): @@ -54,11 +52,11 @@ async def fetch_weather_from_api(params: FunctionCallParams): async def get_image(params: FunctionCallParams): question = params.arguments["question"] - logger.debug(f"Requesting image with user_id={webrtc_peer_id}, question={question}") + logger.debug(f"Requesting image with user_id={client_id}, question={question}") # Request the image frame await params.llm.request_image_frame( - user_id=webrtc_peer_id, + user_id=client_id, function_name=params.function_name, tool_call_id=params.tool_call_id, text_content=question, @@ -96,7 +94,6 @@ async def save_conversation(params: FunctionCallParams): async def load_conversation(params: FunctionCallParams): - global tts filename = params.arguments["filename"] logger.debug(f"loading conversation from {filename}") try: @@ -221,21 +218,27 @@ tools = [ ] -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - global tts, webrtc_peer_id - webrtc_peer_id = webrtc_connection.pc_id +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_in_enabled=True, + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.8)), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_in_enabled=True, + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.8)), + ), +} - logger.info(f"Starting bot with peer_id: {webrtc_peer_id}") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - video_in_enabled=True, - vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.8)), - ), - ) +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -282,19 +285,21 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_connected") async def on_client_connected(transport, client): logger.info(f"Client connected") + + await maybe_capture_participant_video(transport, client) + + global client_id + client_id = get_transport_client_id(transport, client) + # Kick off the conversation. await task.queue_frames([context_aggregator.user().get_context_frame()]) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -302,4 +307,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/20e-persistent-context-aws-nova-sonic.py b/examples/foundational/20e-persistent-context-aws-nova-sonic.py index 1519f1c53..5c848d6f5 100644 --- a/examples/foundational/20e-persistent-context-aws-nova-sonic.py +++ b/examples/foundational/20e-persistent-context-aws-nova-sonic.py @@ -17,16 +17,15 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.audio.vad.vad_analyzer import VADParams from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.aws_nova_sonic.aws import AWSNovaSonicLLMService from pipecat.services.llm_service import FunctionCallParams -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -170,17 +169,30 @@ tools = ToolsSchema( ) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.8)), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") # Specify initial system instruction. # HACK: note that, for now, we need to inject a special bit of text into this instruction to @@ -250,13 +262,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -264,4 +272,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/21-tavus-layer-tavus-transport.py b/examples/foundational/21-tavus-transport.py similarity index 98% rename from examples/foundational/21-tavus-layer-tavus-transport.py rename to examples/foundational/21-tavus-transport.py index c9bcd2501..68614a854 100644 --- a/examples/foundational/21-tavus-layer-tavus-transport.py +++ b/examples/foundational/21-tavus-transport.py @@ -103,7 +103,7 @@ async def main(): logger.info(f"Client disconnected") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner() await runner.run(task) diff --git a/examples/foundational/21a-tavus-layer-small-webrtc.py b/examples/foundational/21a-tavus-video-service.py similarity index 76% rename from examples/foundational/21a-tavus-layer-small-webrtc.py rename to examples/foundational/21a-tavus-video-service.py index 2f557b5cd..59109f119 100644 --- a/examples/foundational/21a-tavus-layer-small-webrtc.py +++ b/examples/foundational/21a-tavus-video-service.py @@ -20,29 +20,39 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.google.llm import GoogleLLMService from pipecat.services.tavus.video import TavusVideoService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_out_enabled=True, + video_out_is_live=True, + video_out_width=1280, + video_out_height=720, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_out_enabled=True, + video_out_is_live=True, + video_out_width=1280, + video_out_height=720, + vad_analyzer=SileroVADAnalyzer(), + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") async with aiohttp.ClientSession() as session: - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - video_out_enabled=True, - video_out_is_live=True, - vad_analyzer=SileroVADAnalyzer(), - video_out_width=1280, - video_out_height=720, - ), - ) - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) tts = CartesiaTTSService( @@ -108,13 +118,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -122,4 +128,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/21b-tavus-layer-daily-transport.py b/examples/foundational/21b-tavus-layer-daily-transport.py deleted file mode 100644 index 564828136..000000000 --- a/examples/foundational/21b-tavus-layer-daily-transport.py +++ /dev/null @@ -1,123 +0,0 @@ -# -# Copyright (c) 2024–2025, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -import asyncio -import os -import sys - -import aiohttp -from daily_runner import configure -from dotenv import load_dotenv -from loguru import logger - -from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.pipeline.pipeline import Pipeline -from pipecat.pipeline.runner import PipelineRunner -from pipecat.pipeline.task import PipelineParams, PipelineTask -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext -from pipecat.services.cartesia.tts import CartesiaTTSService -from pipecat.services.deepgram.stt import DeepgramSTTService -from pipecat.services.google.llm import GoogleLLMService -from pipecat.services.tavus.video import TavusVideoService -from pipecat.transports.services.daily import DailyParams, DailyTransport - -load_dotenv(override=True) - -logger.remove(0) -logger.add(sys.stderr, level="DEBUG") - - -async def main(): - async with aiohttp.ClientSession() as session: - (room_url, token) = await configure(session) - - transport = DailyTransport( - room_url, - token, - "Pipecat bot", - DailyParams( - audio_in_enabled=True, - audio_out_enabled=True, - video_out_enabled=True, - video_out_is_live=True, - vad_analyzer=SileroVADAnalyzer(), - video_out_width=1280, - video_out_height=720, - ), - ) - - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) - - tts = CartesiaTTSService( - api_key=os.getenv("CARTESIA_API_KEY"), - voice_id="a167e0f3-df7e-4d52-a9c3-f949145efdab", - ) - - llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY")) - - tavus = TavusVideoService( - api_key=os.getenv("TAVUS_API_KEY"), - replica_id=os.getenv("TAVUS_REPLICA_ID"), - session=session, - ) - - messages = [ - { - "role": "system", - "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.", - }, - ] - - context = OpenAILLMContext(messages) - context_aggregator = llm.create_context_aggregator(context) - - pipeline = Pipeline( - [ - transport.input(), # Transport user input - stt, # STT - context_aggregator.user(), # User responses - llm, # LLM - tts, # TTS - tavus, # Tavus output layer - transport.output(), # Transport bot output - context_aggregator.assistant(), # Assistant spoken responses - ] - ) - - task = PipelineTask( - pipeline, - params=PipelineParams( - audio_in_sample_rate=16000, - audio_out_sample_rate=24000, - allow_interruptions=True, - enable_metrics=True, - enable_usage_metrics=True, - report_only_initial_ttfb=True, - ), - ) - - @transport.event_handler("on_first_participant_joined") - async def on_first_participant_joined(transport, participant): - # Kick off the conversation. - messages.append( - { - "role": "system", - "content": "Start by greeting the user and ask how you can help.", - } - ) - await task.queue_frames([context_aggregator.user().get_context_frame()]) - - @transport.event_handler("on_participant_left") - async def on_participant_left(transport, participant, reason): - await task.cancel() - - runner = PipelineRunner(handle_sigint=False) - - await runner.run(task) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/examples/foundational/22-natural-conversation.py b/examples/foundational/22-natural-conversation.py index 75683e41c..6fe26e053 100644 --- a/examples/foundational/22-natural-conversation.py +++ b/examples/foundational/22-natural-conversation.py @@ -25,24 +25,37 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.sync.event_notifier import EventNotifier -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -151,13 +164,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -165,4 +174,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/22b-natural-conversation-proposal.py b/examples/foundational/22b-natural-conversation-proposal.py index 45c25776e..1c3c9ee6e 100644 --- a/examples/foundational/22b-natural-conversation-proposal.py +++ b/examples/foundational/22b-natural-conversation-proposal.py @@ -48,9 +48,9 @@ from pipecat.services.llm_service import FunctionCallParams from pipecat.services.openai.llm import OpenAILLMService from pipecat.sync.base_notifier import BaseNotifier from pipecat.sync.event_notifier import EventNotifier -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -202,17 +202,30 @@ async def fetch_weather_from_api(params: FunctionCallParams): await params.result_callback({"conditions": "nice", "temperature": "75"}) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -376,13 +389,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -390,4 +399,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/22c-natural-conversation-mixed-llms.py b/examples/foundational/22c-natural-conversation-mixed-llms.py index 2cbda6b96..391b39329 100644 --- a/examples/foundational/22c-natural-conversation-mixed-llms.py +++ b/examples/foundational/22c-natural-conversation-mixed-llms.py @@ -49,9 +49,9 @@ from pipecat.services.llm_service import FunctionCallParams from pipecat.services.openai.llm import OpenAILLMService from pipecat.sync.base_notifier import BaseNotifier from pipecat.sync.event_notifier import EventNotifier -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -406,17 +406,30 @@ async def fetch_weather_from_api(params: FunctionCallParams): await params.result_callback({"conditions": "nice", "temperature": "75"}) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -583,13 +596,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -597,4 +606,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/22d-natural-conversation-gemini-audio.py b/examples/foundational/22d-natural-conversation-gemini-audio.py index 3460e079c..f87a776e6 100644 --- a/examples/foundational/22d-natural-conversation-gemini-audio.py +++ b/examples/foundational/22d-natural-conversation-gemini-audio.py @@ -48,9 +48,9 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.google.llm import GoogleLLMContext, GoogleLLMService from pipecat.sync.base_notifier import BaseNotifier from pipecat.sync.event_notifier import EventNotifier -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -627,17 +627,30 @@ class OutputGate(FrameProcessor): break -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") tts = CartesiaTTSService( api_key=os.getenv("CARTESIA_API_KEY"), @@ -762,13 +775,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -776,4 +785,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/23-bot-background-sound-daily.py b/examples/foundational/23-bot-background-sound-daily.py deleted file mode 100644 index 67c03386f..000000000 --- a/examples/foundational/23-bot-background-sound-daily.py +++ /dev/null @@ -1,119 +0,0 @@ -# -# Copyright (c) 2024–2025, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -import argparse -import asyncio -import os -import sys - -import aiohttp -from daily_runner import configure_with_args -from dotenv import load_dotenv -from loguru import logger - -from pipecat.audio.mixers.soundfile_mixer import SoundfileMixer -from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.frames.frames import MixerEnableFrame, MixerUpdateSettingsFrame -from pipecat.pipeline.pipeline import Pipeline -from pipecat.pipeline.runner import PipelineRunner -from pipecat.pipeline.task import PipelineParams, PipelineTask -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext -from pipecat.services.cartesia.tts import CartesiaTTSService -from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.services.daily import DailyParams, DailyTransport - -load_dotenv(override=True) - -logger.remove(0) -logger.add(sys.stderr, level="DEBUG") - - -async def main(): - async with aiohttp.ClientSession() as session: - parser = argparse.ArgumentParser(description="Bot Background Sound") - parser.add_argument("-i", "--input", type=str, required=True, help="Input audio file") - - (room_url, token, args) = await configure_with_args(session, parser) - - soundfile_mixer = SoundfileMixer( - sound_files={"office": args.input}, - default_sound="office", - volume=2.0, - ) - - transport = DailyTransport( - room_url, - token, - "Respond bot", - DailyParams( - audio_in_enabled=True, - audio_out_enabled=True, - audio_out_mixer=soundfile_mixer, - transcription_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - - tts = CartesiaTTSService( - api_key=os.getenv("CARTESIA_API_KEY"), - voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady - ) - - llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) - - messages = [ - { - "role": "system", - "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.", - }, - ] - - context = OpenAILLMContext(messages) - context_aggregator = llm.create_context_aggregator(context) - - pipeline = Pipeline( - [ - transport.input(), # Transport user input - context_aggregator.user(), # User responses - llm, # LLM - tts, # TTS - transport.output(), # Transport bot output - context_aggregator.assistant(), # Assistant spoken responses - ] - ) - - task = PipelineTask( - pipeline, - params=PipelineParams( - allow_interruptions=True, - enable_metrics=True, - enable_usage_metrics=True, - report_only_initial_ttfb=True, - ), - ) - - @transport.event_handler("on_first_participant_joined") - async def on_first_participant_joined(transport, participant): - await transport.capture_participant_transcription(participant["id"]) - # Show how to use mixer control frames. - await asyncio.sleep(10.0) - await task.queue_frame(MixerUpdateSettingsFrame({"volume": 0.5})) - await asyncio.sleep(5.0) - await task.queue_frame(MixerEnableFrame(False)) - await asyncio.sleep(5.0) - await task.queue_frame(MixerEnableFrame(True)) - await asyncio.sleep(5.0) - # Kick off the conversation. - messages.append({"role": "system", "content": "Please introduce yourself to the user."}) - await task.queue_frames([context_aggregator.user().get_context_frame()]) - - runner = PipelineRunner() - - await runner.run(task) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/examples/foundational/23-bot-background-sound-p2p.py b/examples/foundational/23-bot-background-sound.py similarity index 60% rename from examples/foundational/23-bot-background-sound-p2p.py rename to examples/foundational/23-bot-background-sound.py index 1d75c305e..e72ea0614 100644 --- a/examples/foundational/23-bot-background-sound-p2p.py +++ b/examples/foundational/23-bot-background-sound.py @@ -4,16 +4,6 @@ # SPDX-License-Identifier: BSD 2-Clause License # -"""Usage ------ -Set the path to your background audio file using the `INPUT_AUDIO_PATH` environment variable, then run the bot using: - - INPUT_AUDIO_PATH=path/to/your_audio.mp3 python 23-bot-background-sound.py - -Example: - INPUT_AUDIO_PATH=my_audio.mp3 python 23-bot-background-sound.py -""" - import argparse import asyncio import os @@ -31,36 +21,54 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -audio_path = os.getenv("INPUT_AUDIO_PATH") -if not audio_path: - raise ValueError("No INPUT_AUDIO_PATH specified in environment variables") +OFFICE_SOUND_FILE = os.path.join( + os.path.dirname(__file__), "assets", "office-ambience-24000-mono.mp3" +) - -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") - - soundfile_mixer = SoundfileMixer( - sound_files={"office": audio_path}, - default_sound="office", - volume=2.0, - ) - - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - audio_out_mixer=soundfile_mixer, - vad_analyzer=SileroVADAnalyzer(), +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + audio_out_mixer=SoundfileMixer( + sound_files={"office": OFFICE_SOUND_FILE}, + default_sound="office", + volume=2.0, ), - ) + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + audio_out_mixer=SoundfileMixer( + sound_files={"office": OFFICE_SOUND_FILE}, + default_sound="office", + volume=2.0, + ), + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + audio_out_mixer=SoundfileMixer( + sound_files={"office": OFFICE_SOUND_FILE}, + default_sound="office", + volume=2.0, + ), + vad_analyzer=SileroVADAnalyzer(), + ), +} + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) tts = CartesiaTTSService( @@ -83,7 +91,7 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac pipeline = Pipeline( [ transport.input(), # Transport user input - stt, # STT service + stt, # STT context_aggregator.user(), # User responses llm, # LLM tts, # TTS @@ -103,16 +111,18 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac ) @transport.event_handler("on_client_connected") - async def on_client_connected(transport, client): - logger.info(f"Client connected: {client}") + async def on_client_connected(transport, participant): # Show how to use mixer control frames. - await asyncio.sleep(10.0) + logger.info(f"Listening for background sound for a bit...") + await asyncio.sleep(5.0) + logger.info(f"Reducing volume...") await task.queue_frame(MixerUpdateSettingsFrame({"volume": 0.5})) await asyncio.sleep(5.0) + logger.info(f"Disabling background sound for a bit...") await task.queue_frame(MixerEnableFrame(False)) await asyncio.sleep(5.0) + logger.info(f"Re-enabling background sound and starting bot...") await task.queue_frame(MixerEnableFrame(True)) - await asyncio.sleep(5.0) # Kick off the conversation. messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([context_aggregator.user().get_context_frame()]) @@ -120,13 +130,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -134,4 +140,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/24-stt-mute-filter.py b/examples/foundational/24-stt-mute-filter.py index 83f7b85a9..981e91957 100644 --- a/examples/foundational/24-stt-mute-filter.py +++ b/examples/foundational/24-stt-mute-filter.py @@ -23,9 +23,9 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.deepgram.tts import DeepgramTTSService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -38,17 +38,30 @@ async def fetch_weather_from_api(params: FunctionCallParams): await params.result_callback({"conditions": "nice", "temperature": "75"}) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -125,13 +138,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -139,4 +148,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/25-google-audio-in.py b/examples/foundational/25-google-audio-in.py index eea8c5a2a..f97e91832 100644 --- a/examples/foundational/25-google-audio-in.py +++ b/examples/foundational/25-google-audio-in.py @@ -34,9 +34,9 @@ from pipecat.processors.aggregators.openai_llm_context import ( from pipecat.processors.frame_processor import FrameProcessor from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.google.llm import GoogleLLMContext, GoogleLLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -269,17 +269,30 @@ class TranscriptionContextFixup(FrameProcessor): await self.push_frame(frame, direction) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") tts = CartesiaTTSService( api_key=os.getenv("CARTESIA_API_KEY"), @@ -359,13 +372,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -373,4 +382,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/26-gemini-multimodal-live.py b/examples/foundational/26-gemini-multimodal-live.py index 3e0bccb74..13036fb43 100644 --- a/examples/foundational/26-gemini-multimodal-live.py +++ b/examples/foundational/26-gemini-multimodal-live.py @@ -17,29 +17,44 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams # Load environment variables load_dotenv(override=True) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + # set stop_secs to something roughly similar to the internal setting + # of the Multimodal Live api, just to align events. + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + # set stop_secs to something roughly similar to the internal setting + # of the Multimodal Live api, just to align events. + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + # set stop_secs to something roughly similar to the internal setting + # of the Multimodal Live api, just to align events. + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), + ), +} - # Initialize the SmallWebRTCTransport with the connection - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - video_in_enabled=False, - # set stop_secs to something roughly similar to the internal setting - # of the Multimodal Live api, just to align events. - vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") # Create the Gemini Multimodal Live LLM service system_instruction = f""" @@ -96,18 +111,14 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() # Run the pipeline - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/26a-gemini-multimodal-live-transcription.py b/examples/foundational/26a-gemini-multimodal-live-transcription.py index 94fdfac2a..6765dbab5 100644 --- a/examples/foundational/26a-gemini-multimodal-live-transcription.py +++ b/examples/foundational/26a-gemini-multimodal-live-transcription.py @@ -19,29 +19,49 @@ from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.processors.transcript_processor import TranscriptProcessor from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + # set stop_secs to something roughly similar to the internal setting + # of the Multimodal Live api, just to align events. This doesn't really + # matter because we can only use the Multimodal Live API's phrase + # endpointing, for now. + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + # set stop_secs to something roughly similar to the internal setting + # of the Multimodal Live api, just to align events. This doesn't really + # matter because we can only use the Multimodal Live API's phrase + # endpointing, for now. + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + # set stop_secs to something roughly similar to the internal setting + # of the Multimodal Live api, just to align events. This doesn't really + # matter because we can only use the Multimodal Live API's phrase + # endpointing, for now. + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), + ), +} - # Initialize the SmallWebRTCTransport with the connection - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - # set stop_secs to something roughly similar to the internal setting - # of the Multimodal Live api, just to align events. This doesn't really - # matter because we can only use the Multimodal Live API's phrase - # endpointing, for now. - vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") llm = GeminiMultimodalLiveLLMService( api_key=os.getenv("GOOGLE_API_KEY"), @@ -102,10 +122,6 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() # Register event handler for transcript updates @@ -117,7 +133,7 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac line = f"{timestamp}{msg.role}: {msg.content}" logger.info(f"Transcript: {line}") - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -125,4 +141,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/26b-gemini-multimodal-live-function-calling.py b/examples/foundational/26b-gemini-multimodal-live-function-calling.py index 29017ba00..7087f5766 100644 --- a/examples/foundational/26b-gemini-multimodal-live-function-calling.py +++ b/examples/foundational/26b-gemini-multimodal-live-function-calling.py @@ -21,9 +21,9 @@ from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService from pipecat.services.llm_service import FunctionCallParams -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -48,22 +48,42 @@ for the weather, call this function. """ -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + # set stop_secs to something roughly similar to the internal setting + # of the Multimodal Live api, just to align events. This doesn't really + # matter because we can only use the Multimodal Live API's phrase + # endpointing, for now. + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + # set stop_secs to something roughly similar to the internal setting + # of the Multimodal Live api, just to align events. This doesn't really + # matter because we can only use the Multimodal Live API's phrase + # endpointing, for now. + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + # set stop_secs to something roughly similar to the internal setting + # of the Multimodal Live api, just to align events. This doesn't really + # matter because we can only use the Multimodal Live API's phrase + # endpointing, for now. + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), + ), +} - # Initialize the SmallWebRTCTransport with the connection - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - # set stop_secs to something roughly similar to the internal setting - # of the Multimodal Live api, just to align events. This doesn't really - # matter because we can only use the Multimodal Live API's phrase - # endpointing, for now. - vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") weather_function = FunctionSchema( name="get_current_weather", @@ -127,13 +147,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -141,4 +157,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/26c-gemini-multimodal-live-video.py b/examples/foundational/26c-gemini-multimodal-live-video.py index cabbd8353..511ae3346 100644 --- a/examples/foundational/26c-gemini-multimodal-live-video.py +++ b/examples/foundational/26c-gemini-multimodal-live-video.py @@ -4,14 +4,13 @@ # SPDX-License-Identifier: BSD 2-Clause License # +import argparse import asyncio import os -import sys -import aiohttp -from daily_runner import configure from dotenv import load_dotenv from loguru import logger +from run import maybe_capture_participant_video from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -20,89 +19,98 @@ from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService -from pipecat.transports.services.daily import DailyParams, DailyTransport +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) -logger.remove(0) -logger.add(sys.stderr, level="DEBUG") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_in_enabled=True, + # set stop_secs to something roughly similar to the internal setting + # of the Multimodal Live api, just to align events. This doesn't really + # matter because we can only use the Multimodal Live API's phrase + # endpointing, for now. + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_in_enabled=True, + # set stop_secs to something roughly similar to the internal setting + # of the Multimodal Live api, just to align events. This doesn't really + # matter because we can only use the Multimodal Live API's phrase + # endpointing, for now. + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), + ), +} -async def main(): - async with aiohttp.ClientSession() as session: - (room_url, token) = await configure(session) +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + llm = GeminiMultimodalLiveLLMService( + api_key=os.getenv("GOOGLE_API_KEY"), + voice_id="Aoede", # Puck, Charon, Kore, Fenrir, Aoede + # system_instruction="Talk like a pirate." + # inference_on_context_initialization=False, + ) - transport = DailyTransport( - room_url, - token, - "Respond bot", - DailyParams( - audio_in_enabled=True, - audio_out_enabled=True, - # set stop_secs to something roughly similar to the internal setting - # of the Multimodal Live api, just to align events. This doesn't really - # matter because we can only use the Multimodal Live API's phrase - # endpointing, for now. - vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), - ), - ) + context = OpenAILLMContext( + [ + { + "role": "user", + "content": "Say hello.", + }, + ], + ) + context_aggregator = llm.create_context_aggregator(context) - llm = GeminiMultimodalLiveLLMService( - api_key=os.getenv("GOOGLE_API_KEY"), - voice_id="Aoede", # Puck, Charon, Kore, Fenrir, Aoede - # system_instruction="Talk like a pirate." - # inference_on_context_initialization=False, - ) + pipeline = Pipeline( + [ + transport.input(), + context_aggregator.user(), + llm, + transport.output(), + context_aggregator.assistant(), + ] + ) - context = OpenAILLMContext( - [ - { - "role": "user", - "content": "Say hello.", - }, - ], - ) - context_aggregator = llm.create_context_aggregator(context) + task = PipelineTask( + pipeline, + params=PipelineParams( + allow_interruptions=True, + enable_metrics=True, + enable_usage_metrics=True, + ), + ) - pipeline = Pipeline( - [ - transport.input(), - context_aggregator.user(), - llm, - transport.output(), - context_aggregator.assistant(), - ] - ) + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected: {client}") - task = PipelineTask( - pipeline, - params=PipelineParams( - allow_interruptions=True, - enable_metrics=True, - enable_usage_metrics=True, - ), - ) + await maybe_capture_participant_video(transport, client) - @transport.event_handler("on_first_participant_joined") - async def on_first_participant_joined(transport, participant): - # Enable both camera and screenshare. From the client side - # send just one. - await transport.capture_participant_video( - participant["id"], framerate=1, video_source="camera" - ) - await transport.capture_participant_video( - participant["id"], framerate=1, video_source="screenVideo" - ) - await task.queue_frames([context_aggregator.user().get_context_frame()]) - await asyncio.sleep(3) - logger.debug("Unpausing audio and video") - llm.set_audio_input_paused(False) - llm.set_video_input_paused(False) + await task.queue_frames([context_aggregator.user().get_context_frame()]) + await asyncio.sleep(3) + logger.debug("Unpausing audio and video") + llm.set_audio_input_paused(False) + llm.set_video_input_paused(False) - runner = PipelineRunner() + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() - await runner.run(task) + runner = PipelineRunner(handle_sigint=handle_sigint) + + await runner.run(task) if __name__ == "__main__": - asyncio.run(main()) + from run import main + + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/26d-gemini-multimodal-live-text.py b/examples/foundational/26d-gemini-multimodal-live-text.py index c42acd34b..da388deda 100644 --- a/examples/foundational/26d-gemini-multimodal-live-text.py +++ b/examples/foundational/26d-gemini-multimodal-live-text.py @@ -22,9 +22,9 @@ from pipecat.services.gemini_multimodal_live.gemini import ( GeminiMultimodalModalities, InputParams, ) -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -40,22 +40,42 @@ Respond to what the user said in a creative and helpful way. Keep your responses """ -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + # set stop_secs to something roughly similar to the internal setting + # of the Multimodal Live api, just to align events. This doesn't really + # matter because we can only use the Multimodal Live API's phrase + # endpointing, for now. + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + # set stop_secs to something roughly similar to the internal setting + # of the Multimodal Live api, just to align events. This doesn't really + # matter because we can only use the Multimodal Live API's phrase + # endpointing, for now. + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + # set stop_secs to something roughly similar to the internal setting + # of the Multimodal Live api, just to align events. This doesn't really + # matter because we can only use the Multimodal Live API's phrase + # endpointing, for now. + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), + ), +} - # Initialize the SmallWebRTCTransport with the connection - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - # set stop_secs to something roughly similar to the internal setting - # of the Multimodal Live api, just to align events. This doesn't really - # matter because we can only use the Multimodal Live API's phrase - # endpointing, for now. - vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") llm = GeminiMultimodalLiveLLMService( api_key=os.getenv("GOOGLE_API_KEY"), @@ -114,13 +134,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -128,4 +144,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/26e-gemini-multimodal-google-search.py b/examples/foundational/26e-gemini-multimodal-google-search.py index 97483028c..5fc1d8d1e 100644 --- a/examples/foundational/26e-gemini-multimodal-google-search.py +++ b/examples/foundational/26e-gemini-multimodal-google-search.py @@ -11,14 +11,15 @@ from dotenv import load_dotenv from loguru import logger from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.audio.vad.vad_analyzer import VADParams from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -41,18 +42,42 @@ Start each interaction by asking the user about which place they would like to k """ -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + # set stop_secs to something roughly similar to the internal setting + # of the Multimodal Live api, just to align events. This doesn't really + # matter because we can only use the Multimodal Live API's phrase + # endpointing, for now. + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), + ), + "twilio": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + # set stop_secs to something roughly similar to the internal setting + # of the Multimodal Live api, just to align events. This doesn't really + # matter because we can only use the Multimodal Live API's phrase + # endpointing, for now. + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + # set stop_secs to something roughly similar to the internal setting + # of the Multimodal Live api, just to align events. This doesn't really + # matter because we can only use the Multimodal Live API's phrase + # endpointing, for now. + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), + ), +} - # Initialize the SmallWebRTCTransport with the connection - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") # Initialize the Gemini Multimodal Live model llm = GeminiMultimodalLiveLLMService( @@ -93,13 +118,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -107,4 +128,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/27-simli-layer.py b/examples/foundational/27-simli-layer.py index 38721e50e..1b3faba39 100644 --- a/examples/foundational/27-simli-layer.py +++ b/examples/foundational/27-simli-layer.py @@ -20,29 +20,39 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.simli.video import SimliVideoService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_out_enabled=True, + video_out_is_live=True, + video_out_width=512, + video_out_height=512, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_out_enabled=True, + video_out_is_live=True, + video_out_width=512, + video_out_height=512, + vad_analyzer=SileroVADAnalyzer(), + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - video_out_enabled=True, - video_out_is_live=True, - video_out_width=512, - video_out_height=512, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) tts = CartesiaTTSService( @@ -96,17 +106,13 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/28-transcription-processor.py b/examples/foundational/28-transcription-processor.py index e538c20e9..2cb0f3122 100644 --- a/examples/foundational/28-transcription-processor.py +++ b/examples/foundational/28-transcription-processor.py @@ -21,9 +21,9 @@ from pipecat.processors.transcript_processor import TranscriptProcessor from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -89,17 +89,30 @@ class TranscriptHandler: await self.save_message(msg) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -155,17 +168,13 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/29-turn-tracking-observer.py b/examples/foundational/29-turn-tracking-observer.py index 08c39fe55..267b95f2f 100644 --- a/examples/foundational/29-turn-tracking-observer.py +++ b/examples/foundational/29-turn-tracking-observer.py @@ -19,25 +19,37 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) tts = CartesiaTTSService( @@ -104,13 +116,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -118,4 +126,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/30-observer.py b/examples/foundational/30-observer.py index c9cd08aee..ad0a8cace 100644 --- a/examples/foundational/30-observer.py +++ b/examples/foundational/30-observer.py @@ -34,9 +34,9 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_input import BaseInputTransport from pipecat.transports.base_output import BaseOutputTransport -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -74,17 +74,30 @@ class CustomObserver(BaseObserver): logger.info(f"🤖 BOT STOP SPEAKING: {src} {arrow} {dst} at {time_sec:.2f}s") -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -148,13 +161,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -162,4 +171,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/32-gemini-grounding-metadata.py b/examples/foundational/32-gemini-grounding-metadata.py index 8c53f7367..3dba663d9 100644 --- a/examples/foundational/32-gemini-grounding-metadata.py +++ b/examples/foundational/32-gemini-grounding-metadata.py @@ -22,9 +22,9 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.google.llm import GoogleLLMService, LLMSearchResponseFrame from pipecat.services.llm_service import LLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams sys.path.append(str(Path(__file__).parent.parent)) @@ -67,17 +67,30 @@ class LLMSearchLoggerObserver(BaseObserver): logger.debug(f"🧠 {arrow} {dst} LLM SEARCH RESPONSE FRAME: {frame} at {time_sec:.2f}s") -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -130,17 +143,13 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/33-gemini-rag.py b/examples/foundational/33-gemini-rag.py index 43da2ccf1..9653e5f2f 100644 --- a/examples/foundational/33-gemini-rag.py +++ b/examples/foundational/33-gemini-rag.py @@ -65,9 +65,9 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.google.llm import GoogleLLMService from pipecat.services.llm_service import FunctionCallParams -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -154,17 +154,30 @@ async def query_knowledge_base(params: FunctionCallParams): await params.result_callback(response.text) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -241,17 +254,13 @@ Your response will be turned into speech so use only simple words and punctuatio @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/34-audio-recording.py b/examples/foundational/34-audio-recording.py index a7d07e7b2..7922c0b7e 100644 --- a/examples/foundational/34-audio-recording.py +++ b/examples/foundational/34-audio-recording.py @@ -65,9 +65,9 @@ from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -86,17 +86,30 @@ async def save_audio_file(audio: bytes, filename: str, sample_rate: int, num_cha logger.info(f"Audio saved to {filename}") -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"), audio_passthrough=True) @@ -146,10 +159,6 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() # Handler for merged audio @@ -174,11 +183,11 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac bot_filename = f"recordings/bot_{timestamp}.wav" await save_audio_file(bot_audio, bot_filename, sample_rate, 1) - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/35-pattern-pair-voice-switching.py b/examples/foundational/35-pattern-pair-voice-switching.py index 7b1218015..b753967a9 100644 --- a/examples/foundational/35-pattern-pair-voice-switching.py +++ b/examples/foundational/35-pattern-pair-voice-switching.py @@ -59,9 +59,9 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams from pipecat.utils.text.pattern_pair_aggregator import PatternMatch, PatternPairAggregator load_dotenv(override=True) @@ -74,19 +74,31 @@ VOICE_IDS = { "male": "7cf0e2b1-8daf-4fe4-89ad-f6039398f359", # Male character voice } +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - # Create pattern pair aggregator for voice switching pattern_aggregator = PatternPairAggregator() @@ -215,17 +227,13 @@ Remember: Use narrator voice for EVERYTHING except the actual quoted dialogue."" @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/36-user-email-gathering.py b/examples/foundational/36-user-email-gathering.py index a917a626a..58be17224 100644 --- a/examples/foundational/36-user-email-gathering.py +++ b/examples/foundational/36-user-email-gathering.py @@ -21,9 +21,9 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.rime.tts import RimeHttpTTSService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -32,17 +32,30 @@ async def store_user_emails(params: FunctionCallParams): print(f"User emails: {params.arguments}") -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -132,17 +145,13 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/37-mem0.py b/examples/foundational/37-mem0.py index dfb3e7db4..35d410297 100644 --- a/examples/foundational/37-mem0.py +++ b/examples/foundational/37-mem0.py @@ -58,9 +58,9 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.elevenlabs.tts import ElevenLabsTTSService from pipecat.services.mem0.memory import Mem0MemoryService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -123,7 +123,29 @@ async def get_initial_greeting( return "Hello! How can I help you today?" -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} + + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): """Main bot execution function. Sets up and runs the bot pipeline including: @@ -138,15 +160,6 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) # Initialize text-to-speech service @@ -272,17 +285,13 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/38-smart-turn-fal.py b/examples/foundational/38-smart-turn-fal.py index b4a0708cb..12d971232 100644 --- a/examples/foundational/38-smart-turn-fal.py +++ b/examples/foundational/38-smart-turn-fal.py @@ -21,92 +21,109 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +aiohttp_session = aiohttp.ClientSession() -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), + turn_analyzer=FalSmartTurnAnalyzer( + api_key=os.getenv("FAL_SMART_TURN_API_KEY"), aiohttp_session=aiohttp_session + ), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), + turn_analyzer=FalSmartTurnAnalyzer( + api_key=os.getenv("FAL_SMART_TURN_API_KEY"), aiohttp_session=aiohttp_session + ), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), + turn_analyzer=FalSmartTurnAnalyzer( + api_key=os.getenv("FAL_SMART_TURN_API_KEY"), aiohttp_session=aiohttp_session + ), + ), +} + + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - async with aiohttp.ClientSession() as session: - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=FalSmartTurnAnalyzer( - api_key=os.getenv("FAL_SMART_TURN_API_KEY"), aiohttp_session=session - ), - ), - ) + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) - tts = CartesiaTTSService( - api_key=os.getenv("CARTESIA_API_KEY"), - voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady - ) + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) - llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.", + }, + ] - messages = [ - { - "role": "system", - "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.", - }, + context = OpenAILLMContext(messages) + context_aggregator = llm.create_context_aggregator(context) + + pipeline = Pipeline( + [ + transport.input(), # Transport user input + stt, + context_aggregator.user(), # User responses + llm, # LLM + tts, # TTS + transport.output(), # Transport bot output + context_aggregator.assistant(), # Assistant spoken responses ] + ) - context = OpenAILLMContext(messages) - context_aggregator = llm.create_context_aggregator(context) + task = PipelineTask( + pipeline, + params=PipelineParams( + allow_interruptions=True, + enable_metrics=True, + enable_usage_metrics=True, + report_only_initial_ttfb=True, + ), + ) - pipeline = Pipeline( - [ - transport.input(), # Transport user input - stt, - context_aggregator.user(), # User responses - llm, # LLM - tts, # TTS - transport.output(), # Transport bot output - context_aggregator.assistant(), # Assistant spoken responses - ] - ) + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + # Kick off the conversation. + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([context_aggregator.user().get_context_frame()]) - task = PipelineTask( - pipeline, - params=PipelineParams( - allow_interruptions=True, - enable_metrics=True, - enable_usage_metrics=True, - report_only_initial_ttfb=True, - ), - ) + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() - @transport.event_handler("on_client_connected") - async def on_client_connected(transport, client): - logger.info(f"Client connected") - # Kick off the conversation. - messages.append({"role": "system", "content": "Please introduce yourself to the user."}) - await task.queue_frames([context_aggregator.user().get_context_frame()]) + runner = PipelineRunner(handle_sigint=handle_sigint) - @transport.event_handler("on_client_disconnected") - async def on_client_disconnected(transport, client): - logger.info(f"Client disconnected") + await runner.run(task) - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") - await task.cancel() - - runner = PipelineRunner(handle_sigint=False) - - await runner.run(task) + await aiohttp_session.close() if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/38a-smart-turn-local-coreml.py b/examples/foundational/38a-smart-turn-local-coreml.py index 4168e8bbc..87e193674 100644 --- a/examples/foundational/38a-smart-turn-local-coreml.py +++ b/examples/foundational/38a-smart-turn-local-coreml.py @@ -21,44 +21,62 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# To use this locally, set the environment variable LOCAL_SMART_TURN_MODEL_PATH +# to the path where the smart-turn repo is cloned. +# +# Example setup: +# +# # Git LFS (Large File Storage) +# brew install git-lfs +# # Hugging Face uses LFS to store large model files, including .mlpackage +# git lfs install +# # Clone the repo with the smart_turn_classifier.mlpackage +# git clone https://huggingface.co/pipecat-ai/smart-turn +# +# Then set the env variable: +# export LOCAL_SMART_TURN_MODEL_PATH=./smart-turn +# or add it to your .env file +smart_turn_model_path = os.getenv("LOCAL_SMART_TURN_MODEL_PATH") -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") - - # To use this locally, set the environment variable LOCAL_SMART_TURN_MODEL_PATH - # to the path where the smart-turn repo is cloned. - # - # Example setup: - # - # # Git LFS (Large File Storage) - # brew install git-lfs - # # Hugging Face uses LFS to store large model files, including .mlpackage - # git lfs install - # # Clone the repo with the smart_turn_classifier.mlpackage - # git clone https://huggingface.co/pipecat-ai/smart-turn - # - # Then set the env variable: - # export LOCAL_SMART_TURN_MODEL_PATH=./smart-turn - # or add it to your .env file - smart_turn_model_path = os.getenv("LOCAL_SMART_TURN_MODEL_PATH") - - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalCoreMLSmartTurnAnalyzer( - smart_turn_model_path=smart_turn_model_path, params=SmartTurnParams() - ), +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), + turn_analyzer=LocalCoreMLSmartTurnAnalyzer( + smart_turn_model_path=smart_turn_model_path, params=SmartTurnParams() ), - ) + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), + turn_analyzer=LocalCoreMLSmartTurnAnalyzer( + smart_turn_model_path=smart_turn_model_path, params=SmartTurnParams() + ), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), + turn_analyzer=LocalCoreMLSmartTurnAnalyzer( + smart_turn_model_path=smart_turn_model_path, params=SmartTurnParams() + ), + ), +} + + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -111,13 +129,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -125,4 +139,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/38b-smart-turn-local.py b/examples/foundational/38b-smart-turn-local.py index e95d06eac..850f5d640 100644 --- a/examples/foundational/38b-smart-turn-local.py +++ b/examples/foundational/38b-smart-turn-local.py @@ -21,44 +21,62 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# To use this locally, set the environment variable LOCAL_SMART_TURN_MODEL_PATH +# to the path where the smart-turn repo is cloned. +# +# Example setup: +# +# # Git LFS (Large File Storage) +# brew install git-lfs +# # Hugging Face uses LFS to store large model files, including .mlpackage +# git lfs install +# # Clone the repo with the smart_turn_classifier.mlpackage +# git clone https://huggingface.co/pipecat-ai/smart-turn +# +# Then set the env variable: +# export LOCAL_SMART_TURN_MODEL_PATH=./smart-turn +# or add it to your .env file +smart_turn_model_path = os.getenv("LOCAL_SMART_TURN_MODEL_PATH") -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") - - # To use this locally, set the environment variable LOCAL_SMART_TURN_MODEL_PATH - # to the path where the smart-turn repo is cloned. - # - # Example setup: - # - # # Git LFS (Large File Storage) - # brew install git-lfs - # # Hugging Face uses LFS to store large model files, including .mlpackage - # git lfs install - # # Clone the repo with the smart_turn_classifier.mlpackage - # git clone https://huggingface.co/pipecat-ai/smart-turn - # - # Then set the env variable: - # export LOCAL_SMART_TURN_MODEL_PATH=./smart-turn - # or add it to your .env file - smart_turn_model_path = os.getenv("LOCAL_SMART_TURN_MODEL_PATH") - - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzer( - smart_turn_model_path=smart_turn_model_path, params=SmartTurnParams() - ), +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), + turn_analyzer=LocalSmartTurnAnalyzer( + smart_turn_model_path=smart_turn_model_path, params=SmartTurnParams() ), - ) + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), + turn_analyzer=LocalSmartTurnAnalyzer( + smart_turn_model_path=smart_turn_model_path, params=SmartTurnParams() + ), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), + turn_analyzer=LocalSmartTurnAnalyzer( + smart_turn_model_path=smart_turn_model_path, params=SmartTurnParams() + ), + ), +} + + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -111,13 +129,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -125,4 +139,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/39-mcp-stdio.py b/examples/foundational/39-mcp-stdio.py index 1c23931d2..f71a4f0b4 100644 --- a/examples/foundational/39-mcp-stdio.py +++ b/examples/foundational/39-mcp-stdio.py @@ -33,9 +33,8 @@ from pipecat.services.anthropic.llm import AnthropicLLMService from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.mcp_service import MCPClient -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -81,20 +80,31 @@ class UrlToImageProcessor(FrameProcessor): logger.error(error_msg) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_out_enabled=True, + video_out_width=1024, + video_out_height=1024, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_out_enabled=True, + video_out_width=1024, + video_out_height=1024, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - video_out_enabled=True, - video_out_width=1024, - video_out_height=1024, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") # Create an HTTP session for API calls async with aiohttp.ClientSession() as session: @@ -127,15 +137,15 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac tools = await mcp.register_tools(llm) system = f""" - You are a helpful LLM in a WebRTC call. - Your goal is to demonstrate your capabilities in a succinct way. + You are a helpful LLM in a WebRTC call. + Your goal is to demonstrate your capabilities in a succinct way. You have access to a number of tools provided by NASA MCP. Use any and all tools to help users. When asked for the astronomy picture of the day, PASS in NO date to the API. This ensures we get the latest picture available. If as specific date is asked for, you can pass in that date to the API. - Your output will be converted to audio so don't include special characters in your answers. - Respond to what the user said in a creative and helpful way. - Don't overexplain what you are doing. + Your output will be converted to audio so don't include special characters in your answers. + Respond to what the user said in a creative and helpful way. + Don't overexplain what you are doing. Just respond with short sentences when you are carrying out tool calls. """ @@ -174,13 +184,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -188,4 +194,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/39a-mcp-run-sse.py b/examples/foundational/39a-mcp-run-sse.py index a567f4d10..cae94a037 100644 --- a/examples/foundational/39a-mcp-run-sse.py +++ b/examples/foundational/39a-mcp-run-sse.py @@ -6,9 +6,7 @@ import argparse import os -import sys -import aiohttp from dotenv import load_dotenv from loguru import logger @@ -17,30 +15,41 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext -from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.services.anthropic.llm import AnthropicLLMService from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.mcp_service import MCPClient -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): logger.info(f"Starting bot") - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) tts = CartesiaTTSService( @@ -62,13 +71,13 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac tools = await mcp.register_tools(llm) system = f""" - You are a helpful LLM in a WebRTC call. - Your goal is to demonstrate your capabilities in a succinct way. + You are a helpful LLM in a WebRTC call. + Your goal is to demonstrate your capabilities in a succinct way. You have access to a number of tools provided by mcp.run. Use any and all tools to help users. - Your output will be converted to audio so don't include special characters in your answers. - Respond to what the user said in a creative and helpful way. + Your output will be converted to audio so don't include special characters in your answers. + Respond to what the user said in a creative and helpful way. When asked for today's date, use 'https://www.datetoday.net/'. - Don't overexplain what you are doing. + Don't overexplain what you are doing. Just respond with short sentences when you are carrying out tool calls. """ @@ -106,13 +115,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -120,4 +125,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/39b-multiple-mcp.py b/examples/foundational/39b-multiple-mcp.py index 2ce0dc201..fc2e53122 100644 --- a/examples/foundational/39b-multiple-mcp.py +++ b/examples/foundational/39b-multiple-mcp.py @@ -10,7 +10,6 @@ import io import os import re import shutil -import sys import aiohttp from dotenv import load_dotenv @@ -34,9 +33,8 @@ from pipecat.services.anthropic.llm import AnthropicLLMService from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.mcp_service import MCPClient -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) @@ -82,20 +80,31 @@ class UrlToImageProcessor(FrameProcessor): logger.error(error_msg) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_out_enabled=True, + video_out_width=1024, + video_out_height=1024, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + video_out_enabled=True, + video_out_width=1024, + video_out_height=1024, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - video_out_enabled=True, - video_out_width=1024, - video_out_height=1024, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") # Create an HTTP session for API calls async with aiohttp.ClientSession() as session: @@ -111,13 +120,13 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac ) system = f""" - You are a helpful LLM in a WebRTC call. - Your goal is to demonstrate your capabilities in a succinct way. + You are a helpful LLM in a WebRTC call. + Your goal is to demonstrate your capabilities in a succinct way. You have access to a number of tools provided by NASA MCP. Use any and all tools to help users. When asked for today's date, use 'https://www.datetoday.net/'. When asked for the astronomy picture of the day, use 'https://www.datetoday.net/', to get today's date. - Your output will be converted to audio so don't include special characters in your answers. - Respond to what the user said in a creative and helpful way. + Your output will be converted to audio so don't include special characters in your answers. + Respond to what the user said in a creative and helpful way. Don't overexplain what you are doing. Just respond with short sentences when you are carrying out tool calls. """ @@ -185,13 +194,9 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) @@ -199,4 +204,4 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/40-aws-nova-sonic.py b/examples/foundational/40-aws-nova-sonic.py index 4ed533e18..7f57a24d8 100644 --- a/examples/foundational/40-aws-nova-sonic.py +++ b/examples/foundational/40-aws-nova-sonic.py @@ -14,16 +14,15 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.audio.vad.vad_analyzer import VADParams from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.aws_nova_sonic import AWSNovaSonicLLMService from pipecat.services.llm_service import FunctionCallParams -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams +from pipecat.transports.services.daily import DailyParams # Load environment variables load_dotenv(override=True) @@ -62,20 +61,30 @@ weather_function = FunctionSchema( tools = ToolsSchema(standard_tools=[weather_function]) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - # Initialize the SmallWebRTCTransport with the connection - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_in_sample_rate=16000, - audio_out_enabled=True, - camera_in_enabled=False, - vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.8)), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") # Specify initial system instruction. # HACK: note that, for now, we need to inject a special bit of text into this instruction to @@ -156,18 +165,14 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() # Run the pipeline - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) if __name__ == "__main__": from run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/foundational/README.md b/examples/foundational/README.md index 3c77a7bef..16e3ce42f 100644 --- a/examples/foundational/README.md +++ b/examples/foundational/README.md @@ -32,11 +32,42 @@ Depending on what you're trying to build, these learning paths will guide you th 4. Run any example: ```bash - python run.py 01-say-one-thing.py + python 01-say-one-thing.py ``` 5. Open the web interface at http://localhost:7860 and click "Connect" +## Running examples with other transports + +It is possible to run most of the examples with other transports such as Twilio or Daily. + +### Daily + +You need to create a Daily account at https://dashboard.daily.co/u/signup. Once signed up, you can create your own room from the dashboard and set the environment variables `DAILY_SAMPLE_ROOM_URL` and `DAILY_API_KEY`. Alternatively, you can let the example create a room for you (still needs `DAILY_API_KEY` environment variable). Then, start any example with `-t daily`: + +```bash +python 07-interruptible.py -t daily +``` + +### Twilio + +It is also possible to run the example through a Twilio phone number. You will +need to setup a few things: + +1. Install and run [ngrok](https://ngrok.com/download). + + ```bash + ngrok http 7860 + ``` + +2. Configure your Twilio phone number. One way is to setup a TwiML app and set the request URL to the ngrok URL from step (1). Then, set your phone number to use the new TwiML app. + +Then, run the example with: + +```bash +python 07-interruptible.py -t twilio -x NGROK_HOST_NAME (no protocol) +``` + ## Examples by Feature ### Basics @@ -109,7 +140,7 @@ Depending on what you're trying to build, these learning paths will guide you th ### Customizing Network Settings ```bash -python run.py --host 0.0.0.0 --port 8080 +python --host 0.0.0.0 --port 8080 ``` ### Troubleshooting diff --git a/examples/foundational/assets/office-ambience-24000-mono.mp3 b/examples/foundational/assets/office-ambience-24000-mono.mp3 new file mode 100644 index 000000000..60a452694 Binary files /dev/null and b/examples/foundational/assets/office-ambience-24000-mono.mp3 differ diff --git a/examples/foundational/run.py b/examples/foundational/run.py index e7012c9e9..65a4e94b3 100644 --- a/examples/foundational/run.py +++ b/examples/foundational/run.py @@ -6,200 +6,248 @@ import argparse import asyncio -import importlib.util +import json import os import sys from contextlib import asynccontextmanager -from inspect import iscoroutinefunction, signature -from typing import Any, Callable, Dict, Optional, Tuple +from typing import Any, Callable, Dict, Mapping, Optional +import aiohttp import uvicorn from dotenv import load_dotenv -from fastapi import BackgroundTasks, FastAPI -from fastapi.responses import RedirectResponse +from fastapi import BackgroundTasks, FastAPI, WebSocket +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import HTMLResponse, RedirectResponse from loguru import logger -from pipecat_ai_small_webrtc_prebuilt.frontend import SmallWebRTCPrebuiltUI +from pipecat.serializers.twilio import TwilioFrameSerializer +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.network.fastapi_websocket import ( + FastAPIWebsocketParams, + FastAPIWebsocketTransport, +) +from pipecat.transports.network.small_webrtc import SmallWebRTCTransport from pipecat.transports.network.webrtc_connection import IceServer, SmallWebRTCConnection +from pipecat.transports.services.daily import DailyParams, DailyTransport # Load environment variables load_dotenv(override=True) -app = FastAPI() -# Store connections by pc_id -pcs_map: Dict[str, SmallWebRTCConnection] = {} - -ice_servers = [ - IceServer( - urls="stun:stun.l.google.com:19302", - ) -] - -# Mount the frontend at / -app.mount("/client", SmallWebRTCPrebuiltUI) - -# Store program arguments -args: argparse.Namespace = argparse.Namespace() - -# Store the bot module and function info -bot_module: Any = None -run_bot_func: Optional[Callable] = None -is_webrtc_bot: bool = True +def get_transport_client_id(transport: BaseTransport, client: Any) -> str: + if isinstance(transport, SmallWebRTCTransport): + return client.pc_id + elif isinstance(transport, DailyTransport): + return client["id"] + logger.warning(f"Unable to get client id from unsupported transport {type(transport)}") + return "" -def import_bot_file(file_path: str) -> Tuple[Any, Callable, bool]: - """Dynamically import the bot file and determine how to run it. - - Returns: - tuple: (module, run_function, is_webrtc_bot) - - module: The imported module - - run_function: Either run_bot or main function - - is_webrtc_bot: True if run_bot function exists and accepts a WebRTC connection - """ - if not os.path.exists(file_path): - raise FileNotFoundError(f"Bot file not found: {file_path}") - - # Extract module name without extension - module_name = os.path.splitext(os.path.basename(file_path))[0] - - # Load the module - spec = importlib.util.spec_from_file_location(module_name, file_path) - if not spec or not spec.loader: - raise ImportError(f"Could not load spec for {file_path}") - - module = importlib.util.module_from_spec(spec) - sys.modules[module_name] = module - spec.loader.exec_module(module) - - # Check for run_bot function first - if hasattr(module, "run_bot"): - run_func = module.run_bot - # Check if the function accepts a WebRTC connection - sig = signature(run_func) - is_webrtc = len(sig.parameters) > 0 - return module, run_func, is_webrtc - - # Fall back to main function - if hasattr(module, "main") and iscoroutinefunction(module.main): - return module, module.main, False - - raise AttributeError(f"No run_bot or async main function found in {file_path}") - - -@app.get("/", include_in_schema=False) -async def root_redirect(): - return RedirectResponse(url="/client/") - - -@app.post("/api/offer") -async def offer(request: dict, background_tasks: BackgroundTasks): - global run_bot_func, is_webrtc_bot - - if not run_bot_func: - raise RuntimeError("No bot file has been loaded") - - if not is_webrtc_bot: - return { - "error": "This bot doesn't support WebRTC connections, it's running in standalone mode" - } - - pc_id = request.get("pc_id") - - if pc_id and pc_id in pcs_map: - pipecat_connection = pcs_map[pc_id] - logger.info(f"Reusing existing connection for pc_id: {pc_id}") - await pipecat_connection.renegotiate( - sdp=request["sdp"], type=request["type"], restart_pc=request.get("restart_pc", False) +async def maybe_capture_participant_video(transport: BaseTransport, client: Any): + if isinstance(transport, DailyTransport): + await transport.capture_participant_video(client["id"], framerate=0, video_source="camera") + await transport.capture_participant_video( + client["id"], framerate=0, video_source="screenVideo" ) - else: - pipecat_connection = SmallWebRTCConnection(ice_servers) - await pipecat_connection.initialize(sdp=request["sdp"], type=request["type"]) - - @pipecat_connection.event_handler("closed") - async def handle_disconnected(webrtc_connection: SmallWebRTCConnection): - logger.info(f"Discarding peer connection for pc_id: {webrtc_connection.pc_id}") - pcs_map.pop(webrtc_connection.pc_id, None) - - # We've already checked that run_bot_func exists - assert run_bot_func is not None - background_tasks.add_task(run_bot_func, pipecat_connection, args) - - answer = pipecat_connection.get_answer() - # Updating the peer connection inside the map - pcs_map[answer["pc_id"]] = pipecat_connection - - return answer -@asynccontextmanager -async def lifespan(app: FastAPI): - yield # Run app - coros = [pc.close() for pc in pcs_map.values()] - await asyncio.gather(*coros) - pcs_map.clear() +def run_example_daily( + run_example: Callable, + args: argparse.Namespace, + params: DailyParams, +): + logger.info("Running example with DailyTransport...") + + from daily_runner import configure + + async def run(): + async with aiohttp.ClientSession() as session: + (room_url, token) = await configure(session) + + # Run example function with DailyTransport transport arguments. + transport = DailyTransport(room_url, token, "Pipecat", params=params) + await run_example(transport, args, True) + + asyncio.run(run()) -async def run_standalone_bot() -> None: - """Run a standalone bot that doesn't require WebRTC""" - global run_bot_func - if run_bot_func is not None: - await run_bot_func() - else: - raise RuntimeError("No bot function available to run") +def run_example_webrtc( + run_example: Callable, + args: argparse.Namespace, + params: TransportParams, +): + logger.info("Running example with SmallWebRTCTransport...") + + from pipecat_ai_small_webrtc_prebuilt.frontend import SmallWebRTCPrebuiltUI + + app = FastAPI() + + # Store connections by pc_id + pcs_map: Dict[str, SmallWebRTCConnection] = {} + + ice_servers = [ + IceServer( + urls="stun:stun.l.google.com:19302", + ) + ] + + # Mount the frontend at / + app.mount("/client", SmallWebRTCPrebuiltUI) + + @app.get("/", include_in_schema=False) + async def root_redirect(): + return RedirectResponse(url="/client/") + + @app.post("/api/offer") + async def offer(request: dict, background_tasks: BackgroundTasks): + pc_id = request.get("pc_id") + + if pc_id and pc_id in pcs_map: + pipecat_connection = pcs_map[pc_id] + logger.info(f"Reusing existing connection for pc_id: {pc_id}") + await pipecat_connection.renegotiate( + sdp=request["sdp"], + type=request["type"], + restart_pc=request.get("restart_pc", False), + ) + else: + pipecat_connection = SmallWebRTCConnection(ice_servers) + await pipecat_connection.initialize(sdp=request["sdp"], type=request["type"]) + + @pipecat_connection.event_handler("closed") + async def handle_disconnected(webrtc_connection: SmallWebRTCConnection): + logger.info(f"Discarding peer connection for pc_id: {webrtc_connection.pc_id}") + pcs_map.pop(webrtc_connection.pc_id, None) + + # Run example function with SmallWebRTC transport arguments. + transport = SmallWebRTCTransport(params=params, webrtc_connection=pipecat_connection) + background_tasks.add_task(run_example, transport, args, False) + + answer = pipecat_connection.get_answer() + # Updating the peer connection inside the map + pcs_map[answer["pc_id"]] = pipecat_connection + + return answer + + @asynccontextmanager + async def lifespan(app: FastAPI): + yield # Run app + coros = [pc.close() for pc in pcs_map.values()] + await asyncio.gather(*coros) + pcs_map.clear() + + uvicorn.run(app, host=args.host, port=args.port) -def main(parser: Optional[argparse.ArgumentParser] = None): - global args +def run_example_twilio( + run_example: Callable, + args: argparse.Namespace, + params: FastAPIWebsocketParams, +): + logger.info("Running example with FastAPIWebsocketTransport (Twilio)...") + app = FastAPI() + + app.add_middleware( + CORSMiddleware, + allow_origins=["*"], # Allow all origins for testing + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], + ) + + @app.post("/") + async def start_call(): + logger.debug("POST TwiML") + + xml_content = f""" + + + + + + + """ + return HTMLResponse(content=xml_content, media_type="application/xml") + + @app.websocket("/ws") + async def websocket_endpoint(websocket: WebSocket): + await websocket.accept() + + logger.debug("WebSocket connection accepted") + + # Reading Twilio data. + start_data = websocket.iter_text() + await start_data.__anext__() + call_data = json.loads(await start_data.__anext__()) + print(call_data, flush=True) + stream_sid = call_data["start"]["streamSid"] + call_sid = call_data["start"]["callSid"] + + # Create websocket transport and update params. + params.add_wav_header = False + params.serializer = TwilioFrameSerializer( + stream_sid=stream_sid, + call_sid=call_sid, + account_sid=os.getenv("TWILIO_ACCOUNT_SID", ""), + auth_token=os.getenv("TWILIO_AUTH_TOKEN", ""), + ) + transport = FastAPIWebsocketTransport(websocket=websocket, params=params) + await run_example(transport, args, False) + + uvicorn.run(app, host=args.host, port=args.port) + + +def run_main( + run_example: Callable, + args: argparse.Namespace, + transport_params: Mapping[str, Callable] = {}, +): + if args.transport not in transport_params: + logger.error(f"Transport '{args.transport}' not supported by this example") + return + + params = transport_params[args.transport]() + match args.transport: + case "daily": + run_example_daily(run_example, args, params) + case "webrtc": + run_example_webrtc(run_example, args, params) + case "twilio": + run_example_twilio(run_example, args, params) + + +def main( + run_example: Callable, + *, + parser: Optional[argparse.ArgumentParser] = None, + transport_params: Mapping[str, Callable] = {}, +): if not parser: parser = argparse.ArgumentParser(description="Pipecat Bot Runner") - parser.add_argument("bot_file", nargs="?", help="Path to the bot file", default=None) parser.add_argument( "--host", default="localhost", help="Host for HTTP server (default: localhost)" ) parser.add_argument( "--port", type=int, default=7860, help="Port for HTTP server (default: 7860)" ) + parser.add_argument( + "--transport", + "-t", + type=str, + choices=["daily", "webrtc", "twilio"], + default="webrtc", + help="The transport this example should use", + ) + parser.add_argument( + "--proxy", "-x", help="A public proxy host name (no protocol, e.g. proxy.example.com)" + ) parser.add_argument("--verbose", "-v", action="count", default=0) args = parser.parse_args() + # Log level logger.remove(0) - if args.verbose: - logger.add(sys.stderr, level="TRACE") - else: - logger.add(sys.stderr, level="DEBUG") - - # Infer the bot file from the caller if not provided explicitly - bot_file = args.bot_file - if bot_file is None: - # Get the __file__ of the script that called main() - import inspect - - caller_frame = inspect.stack()[1] - caller_globals = caller_frame.frame.f_globals - bot_file = caller_globals.get("__file__") - - if not bot_file: - print("❌ Could not determine the bot file. Pass it explicitly to main().") - sys.exit(1) + logger.add(sys.stderr, level="TRACE" if args.verbose else "DEBUG") # Import the bot file - try: - global run_bot_func, bot_module, is_webrtc_bot - bot_module, run_bot_func, is_webrtc_bot = import_bot_file(bot_file) - logger.info(f"Successfully loaded bot from {bot_file}") - - if is_webrtc_bot: - logger.info("Detected WebRTC-compatible bot, starting web server...") - uvicorn.run(app, host=args.host, port=args.port) - else: - logger.info("Detected standalone bot, running directly...") - asyncio.run(run_standalone_bot()) - except Exception as e: - logger.error(f"Error loading bot file: {e}") - sys.exit(1) - - -if __name__ == "__main__": - main() + run_main(run_example, args, transport_params) diff --git a/examples/open-telemetry/jaeger/bot.py b/examples/open-telemetry/jaeger/bot.py index 18fe34ef4..0fb8bfd2e 100644 --- a/examples/open-telemetry/jaeger/bot.py +++ b/examples/open-telemetry/jaeger/bot.py @@ -6,7 +6,6 @@ import argparse import os -import sys from dotenv import load_dotenv from loguru import logger @@ -24,9 +23,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.services.daily import DailyParams from pipecat.utils.tracing.setup import setup_tracing load_dotenv(override=True) @@ -55,17 +53,30 @@ async def fetch_weather_from_api(params: FunctionCallParams): await params.result_callback({"conditions": "nice", "temperature": "75"}) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -143,19 +154,14 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) if __name__ == "__main__": - sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) - from run import main + from ..run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/open-telemetry/langfuse/bot.py b/examples/open-telemetry/langfuse/bot.py index 9f311970e..eb1ed0748 100644 --- a/examples/open-telemetry/langfuse/bot.py +++ b/examples/open-telemetry/langfuse/bot.py @@ -6,7 +6,6 @@ import argparse import os -import sys from dotenv import load_dotenv from loguru import logger @@ -24,9 +23,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.services.daily import DailyParams from pipecat.utils.tracing.setup import setup_tracing load_dotenv(override=True) @@ -52,17 +50,30 @@ async def fetch_weather_from_api(params: FunctionCallParams): await params.result_callback({"conditions": "nice", "temperature": "75"}) -async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace): - logger.info(f"Starting bot") +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} - transport = SmallWebRTCTransport( - webrtc_connection=webrtc_connection, - params=TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - ) + +async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool): + logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) @@ -140,19 +151,14 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") - - @transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info(f"Client closed connection") await task.cancel() - runner = PipelineRunner(handle_sigint=False) + runner = PipelineRunner(handle_sigint=handle_sigint) await runner.run(task) if __name__ == "__main__": - sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) - from run import main + from ..run import main - main() + main(run_example, transport_params=transport_params) diff --git a/examples/open-telemetry/run.py b/examples/open-telemetry/run.py index e7012c9e9..101efc96a 100644 --- a/examples/open-telemetry/run.py +++ b/examples/open-telemetry/run.py @@ -6,200 +6,230 @@ import argparse import asyncio -import importlib.util +import json import os import sys from contextlib import asynccontextmanager -from inspect import iscoroutinefunction, signature -from typing import Any, Callable, Dict, Optional, Tuple +from typing import Callable, Dict, Mapping, Optional +import aiohttp import uvicorn +from daily_runner import configure from dotenv import load_dotenv -from fastapi import BackgroundTasks, FastAPI -from fastapi.responses import RedirectResponse +from fastapi import BackgroundTasks, FastAPI, WebSocket +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import HTMLResponse, RedirectResponse from loguru import logger -from pipecat_ai_small_webrtc_prebuilt.frontend import SmallWebRTCPrebuiltUI +from pipecat.serializers.twilio import TwilioFrameSerializer +from pipecat.transports.base_transport import TransportParams +from pipecat.transports.network.fastapi_websocket import ( + FastAPIWebsocketParams, + FastAPIWebsocketTransport, +) +from pipecat.transports.network.small_webrtc import SmallWebRTCTransport from pipecat.transports.network.webrtc_connection import IceServer, SmallWebRTCConnection +from pipecat.transports.services.daily import DailyParams, DailyTransport # Load environment variables load_dotenv(override=True) -app = FastAPI() -# Store connections by pc_id -pcs_map: Dict[str, SmallWebRTCConnection] = {} +def run_example_daily( + run_example: Callable, + args: argparse.Namespace, + params: DailyParams, +): + logger.info("Running example with DailyTransport...") -ice_servers = [ - IceServer( - urls="stun:stun.l.google.com:19302", - ) -] + async def run(): + async with aiohttp.ClientSession() as session: + (room_url, token) = await configure(session) -# Mount the frontend at / -app.mount("/client", SmallWebRTCPrebuiltUI) + # Run example function with DailyTransport transport arguments. + transport = DailyTransport(room_url, token, "Pipecat", params=params) + await run_example(transport, args, True) -# Store program arguments -args: argparse.Namespace = argparse.Namespace() - -# Store the bot module and function info -bot_module: Any = None -run_bot_func: Optional[Callable] = None -is_webrtc_bot: bool = True + asyncio.run(run()) -def import_bot_file(file_path: str) -> Tuple[Any, Callable, bool]: - """Dynamically import the bot file and determine how to run it. +def run_example_webrtc( + run_example: Callable, + args: argparse.Namespace, + params: TransportParams, +): + logger.info("Running example with SmallWebRTCTransport...") - Returns: - tuple: (module, run_function, is_webrtc_bot) - - module: The imported module - - run_function: Either run_bot or main function - - is_webrtc_bot: True if run_bot function exists and accepts a WebRTC connection - """ - if not os.path.exists(file_path): - raise FileNotFoundError(f"Bot file not found: {file_path}") + from pipecat_ai_small_webrtc_prebuilt.frontend import SmallWebRTCPrebuiltUI - # Extract module name without extension - module_name = os.path.splitext(os.path.basename(file_path))[0] + app = FastAPI() - # Load the module - spec = importlib.util.spec_from_file_location(module_name, file_path) - if not spec or not spec.loader: - raise ImportError(f"Could not load spec for {file_path}") + # Store connections by pc_id + pcs_map: Dict[str, SmallWebRTCConnection] = {} - module = importlib.util.module_from_spec(spec) - sys.modules[module_name] = module - spec.loader.exec_module(module) - - # Check for run_bot function first - if hasattr(module, "run_bot"): - run_func = module.run_bot - # Check if the function accepts a WebRTC connection - sig = signature(run_func) - is_webrtc = len(sig.parameters) > 0 - return module, run_func, is_webrtc - - # Fall back to main function - if hasattr(module, "main") and iscoroutinefunction(module.main): - return module, module.main, False - - raise AttributeError(f"No run_bot or async main function found in {file_path}") - - -@app.get("/", include_in_schema=False) -async def root_redirect(): - return RedirectResponse(url="/client/") - - -@app.post("/api/offer") -async def offer(request: dict, background_tasks: BackgroundTasks): - global run_bot_func, is_webrtc_bot - - if not run_bot_func: - raise RuntimeError("No bot file has been loaded") - - if not is_webrtc_bot: - return { - "error": "This bot doesn't support WebRTC connections, it's running in standalone mode" - } - - pc_id = request.get("pc_id") - - if pc_id and pc_id in pcs_map: - pipecat_connection = pcs_map[pc_id] - logger.info(f"Reusing existing connection for pc_id: {pc_id}") - await pipecat_connection.renegotiate( - sdp=request["sdp"], type=request["type"], restart_pc=request.get("restart_pc", False) + ice_servers = [ + IceServer( + urls="stun:stun.l.google.com:19302", ) - else: - pipecat_connection = SmallWebRTCConnection(ice_servers) - await pipecat_connection.initialize(sdp=request["sdp"], type=request["type"]) + ] - @pipecat_connection.event_handler("closed") - async def handle_disconnected(webrtc_connection: SmallWebRTCConnection): - logger.info(f"Discarding peer connection for pc_id: {webrtc_connection.pc_id}") - pcs_map.pop(webrtc_connection.pc_id, None) + # Mount the frontend at / + app.mount("/client", SmallWebRTCPrebuiltUI) - # We've already checked that run_bot_func exists - assert run_bot_func is not None - background_tasks.add_task(run_bot_func, pipecat_connection, args) + @app.get("/", include_in_schema=False) + async def root_redirect(): + return RedirectResponse(url="/client/") - answer = pipecat_connection.get_answer() - # Updating the peer connection inside the map - pcs_map[answer["pc_id"]] = pipecat_connection + @app.post("/api/offer") + async def offer(request: dict, background_tasks: BackgroundTasks): + pc_id = request.get("pc_id") - return answer + if pc_id and pc_id in pcs_map: + pipecat_connection = pcs_map[pc_id] + logger.info(f"Reusing existing connection for pc_id: {pc_id}") + await pipecat_connection.renegotiate( + sdp=request["sdp"], + type=request["type"], + restart_pc=request.get("restart_pc", False), + ) + else: + pipecat_connection = SmallWebRTCConnection(ice_servers) + await pipecat_connection.initialize(sdp=request["sdp"], type=request["type"]) + + @pipecat_connection.event_handler("closed") + async def handle_disconnected(webrtc_connection: SmallWebRTCConnection): + logger.info(f"Discarding peer connection for pc_id: {webrtc_connection.pc_id}") + pcs_map.pop(webrtc_connection.pc_id, None) + + # Run example function with SmallWebRTC transport arguments. + transport = SmallWebRTCTransport(params=params, webrtc_connection=pipecat_connection) + background_tasks.add_task(run_example, transport, args, False) + + answer = pipecat_connection.get_answer() + # Updating the peer connection inside the map + pcs_map[answer["pc_id"]] = pipecat_connection + + return answer + + @asynccontextmanager + async def lifespan(app: FastAPI): + yield # Run app + coros = [pc.close() for pc in pcs_map.values()] + await asyncio.gather(*coros) + pcs_map.clear() + + uvicorn.run(app, host=args.host, port=args.port) -@asynccontextmanager -async def lifespan(app: FastAPI): - yield # Run app - coros = [pc.close() for pc in pcs_map.values()] - await asyncio.gather(*coros) - pcs_map.clear() +def run_example_twilio( + run_example: Callable, + args: argparse.Namespace, + params: FastAPIWebsocketParams, +): + logger.info("Running example with FastAPIWebsocketTransport (Twilio)...") + + app = FastAPI() + + app.add_middleware( + CORSMiddleware, + allow_origins=["*"], # Allow all origins for testing + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], + ) + + @app.post("/") + async def start_call(): + logger.debug("POST TwiML") + + xml_content = f""" + + + + + + + """ + return HTMLResponse(content=xml_content, media_type="application/xml") + + @app.websocket("/ws") + async def websocket_endpoint(websocket: WebSocket): + await websocket.accept() + + logger.debug("WebSocket connection accepted") + + # Reading Twilio data. + start_data = websocket.iter_text() + await start_data.__anext__() + call_data = json.loads(await start_data.__anext__()) + print(call_data, flush=True) + stream_sid = call_data["start"]["streamSid"] + call_sid = call_data["start"]["callSid"] + + # Create websocket transport and update params. + params.add_wav_header = False + params.serializer = TwilioFrameSerializer( + stream_sid=stream_sid, + call_sid=call_sid, + account_sid=os.getenv("TWILIO_ACCOUNT_SID", ""), + auth_token=os.getenv("TWILIO_AUTH_TOKEN", ""), + ) + transport = FastAPIWebsocketTransport(websocket=websocket, params=params) + await run_example(transport, args, False) + + uvicorn.run(app, host=args.host, port=args.port) -async def run_standalone_bot() -> None: - """Run a standalone bot that doesn't require WebRTC""" - global run_bot_func - if run_bot_func is not None: - await run_bot_func() - else: - raise RuntimeError("No bot function available to run") +def run_main( + run_example: Callable, + args: argparse.Namespace, + transport_params: Mapping[str, Callable] = {}, +): + if args.transport not in transport_params: + logger.error(f"Transport '{args.transport}' not supported by this example") + return + + params = transport_params[args.transport]() + match args.transport: + case "daily": + run_example_daily(run_example, args, params) + case "webrtc": + run_example_webrtc(run_example, args, params) + case "twilio": + run_example_twilio(run_example, args, params) -def main(parser: Optional[argparse.ArgumentParser] = None): - global args - +def main( + run_example: Callable, + *, + parser: Optional[argparse.ArgumentParser] = None, + transport_params: Mapping[str, Callable] = {}, +): if not parser: parser = argparse.ArgumentParser(description="Pipecat Bot Runner") - parser.add_argument("bot_file", nargs="?", help="Path to the bot file", default=None) parser.add_argument( "--host", default="localhost", help="Host for HTTP server (default: localhost)" ) parser.add_argument( "--port", type=int, default=7860, help="Port for HTTP server (default: 7860)" ) + parser.add_argument( + "--transport", + "-t", + type=str, + choices=["daily", "webrtc", "twilio"], + default="webrtc", + help="The transport this example should use", + ) + parser.add_argument( + "--proxy", "-x", help="A public proxy host name (no protocol, e.g. proxy.example.com)" + ) parser.add_argument("--verbose", "-v", action="count", default=0) args = parser.parse_args() + # Log level logger.remove(0) - if args.verbose: - logger.add(sys.stderr, level="TRACE") - else: - logger.add(sys.stderr, level="DEBUG") - - # Infer the bot file from the caller if not provided explicitly - bot_file = args.bot_file - if bot_file is None: - # Get the __file__ of the script that called main() - import inspect - - caller_frame = inspect.stack()[1] - caller_globals = caller_frame.frame.f_globals - bot_file = caller_globals.get("__file__") - - if not bot_file: - print("❌ Could not determine the bot file. Pass it explicitly to main().") - sys.exit(1) + logger.add(sys.stderr, level="TRACE" if args.verbose else "DEBUG") # Import the bot file - try: - global run_bot_func, bot_module, is_webrtc_bot - bot_module, run_bot_func, is_webrtc_bot = import_bot_file(bot_file) - logger.info(f"Successfully loaded bot from {bot_file}") - - if is_webrtc_bot: - logger.info("Detected WebRTC-compatible bot, starting web server...") - uvicorn.run(app, host=args.host, port=args.port) - else: - logger.info("Detected standalone bot, running directly...") - asyncio.run(run_standalone_bot()) - except Exception as e: - logger.error(f"Error loading bot file: {e}") - sys.exit(1) - - -if __name__ == "__main__": - main() + run_main(run_example, args, transport_params) diff --git a/examples/p2p-webrtc/daily-interop-bridge/bot.py b/examples/p2p-webrtc/daily-interop-bridge/bot.py index 0e859b5a0..659d3fcef 100644 --- a/examples/p2p-webrtc/daily-interop-bridge/bot.py +++ b/examples/p2p-webrtc/daily-interop-bridge/bot.py @@ -112,10 +112,6 @@ async def run_bot(webrtc_connection): @pipecat_transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info("Pipecat Client disconnected") - - @pipecat_transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info("Pipecat Client closed") await task.cancel() runner = PipelineRunner(handle_sigint=False) diff --git a/examples/p2p-webrtc/video-transform/server/bot.py b/examples/p2p-webrtc/video-transform/server/bot.py index a6d885cea..44684d4ea 100644 --- a/examples/p2p-webrtc/video-transform/server/bot.py +++ b/examples/p2p-webrtc/video-transform/server/bot.py @@ -140,10 +140,6 @@ async def run_bot(webrtc_connection): @pipecat_transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info("Pipecat Client disconnected") - - @pipecat_transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info("Pipecat Client closed") await task.cancel() runner = PipelineRunner(handle_sigint=False) diff --git a/examples/p2p-webrtc/voice-agent/bot.py b/examples/p2p-webrtc/voice-agent/bot.py index 4cda32a59..505a768e6 100644 --- a/examples/p2p-webrtc/voice-agent/bot.py +++ b/examples/p2p-webrtc/voice-agent/bot.py @@ -86,10 +86,6 @@ async def run_bot(webrtc_connection): @pipecat_transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info("Pipecat Client disconnected") - - @pipecat_transport.event_handler("on_client_closed") - async def on_client_closed(transport, client): - logger.info("Pipecat Client closed") await task.cancel() runner = PipelineRunner(handle_sigint=False) diff --git a/scripts/release/eval.py b/scripts/release/eval.py new file mode 100644 index 000000000..5ad2bb8f3 --- /dev/null +++ b/scripts/release/eval.py @@ -0,0 +1,290 @@ +# +# Copyright (c) 2024-2025 Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import argparse +import asyncio +import io +import os +import re +import sys +import time +import wave +from datetime import datetime +from pathlib import Path +from typing import List, Optional + +import aiofiles +from loguru import logger +from utils import ( + EvalResult, + load_module_from_path, + print_begin_test, + print_end_test, + print_test_results, +) + +from pipecat.adapters.schemas.function_schema import FunctionSchema +from pipecat.adapters.schemas.tools_schema import ToolsSchema +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.audio.vad.vad_analyzer import VADParams +from pipecat.frames.frames import EndTaskFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext +from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor +from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.llm_service import FunctionCallParams +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.services.daily import DailyParams, DailyTransport + +SCRIPT_DIR = Path(__file__).resolve().parent + +FOUNDATIONAL_DIR = SCRIPT_DIR.parent.parent / "examples" / "foundational" + +sys.path.insert(0, os.path.abspath(FOUNDATIONAL_DIR)) + +EVAL_PROMPT = "" + +PIPELINE_IDLE_TIMEOUT_SECS = 30 + + +class EvalRunner: + def __init__( + self, + *, + pattern: str = "", + record_audio: bool = False, + name: Optional[str] = None, + log_level: str = "DEBUG", + ): + self._pattern = f".*{pattern}.*" if pattern else "" + self._record_audio = record_audio + self._log_level = log_level + self._total_success = 0 + self._tests: List[EvalResult] = [] + self._queue = asyncio.Queue() + + # We to save runner files. + name = name or f"{datetime.now().strftime('%Y%m%d_%H%M%S')}" + self._runs_dir = os.path.join(SCRIPT_DIR, "test-runs", name) + self._logs_dir = os.path.join(self._runs_dir, "logs") + self._recordings_dir = os.path.join(self._runs_dir, "recordings") + os.makedirs(self._logs_dir, exist_ok=True) + os.makedirs(self._recordings_dir, exist_ok=True) + + async def assert_eval(self, params: FunctionCallParams): + reasoning = params.arguments["reasoning"] + logger.debug(f"🧠 EVAL REASONING: {reasoning}") + await self._queue.put(params.arguments["result"]) + await params.result_callback(None) + await params.llm.push_frame(EndTaskFrame(), FrameDirection.UPSTREAM) + + async def assert_eval_false(self): + await self._queue.put(False) + + async def run_eval(self, example_file: str, prompt: str, eval: Optional[str] = None): + if not re.match(self._pattern, example_file): + return + + # Store logs + filename = self._log_file_name(example_file) + log_file_id = logger.add(filename, level=self._log_level) + + print_begin_test(example_file) + + start_time = time.time() + + try: + await asyncio.wait( + [ + asyncio.create_task(run_example_pipeline(example_file)), + asyncio.create_task(run_eval_pipeline(self, example_file, prompt, eval)), + ], + timeout=90, + ) + except asyncio.CancelledError: + pass + except Exception as e: + print(f"ERROR: Unable to run {example_file}: {e}") + + try: + result = await asyncio.wait_for(self._queue.get(), timeout=1.0) + except asyncio.TimeoutError: + result = False + + if result: + self._total_success += 1 + + eval_time = time.time() - start_time + + self._tests.append(EvalResult(name=example_file, result=result, time=eval_time)) + + print_end_test(example_file, result, eval_time) + + logger.remove(log_file_id) + + def print_results(self): + print_test_results(self._tests, self._total_success, self._runs_dir) + + async def save_audio(self, name: str, audio: bytes, sample_rate: int, num_channels: int): + if len(audio) > 0: + filename = self._recording_file_name(name) + with io.BytesIO() as buffer: + with wave.open(buffer, "wb") as wf: + wf.setsampwidth(2) + wf.setnchannels(num_channels) + wf.setframerate(sample_rate) + wf.writeframes(audio) + async with aiofiles.open(filename, "wb") as file: + await file.write(buffer.getvalue()) + logger.debug(f"Saving {name} audio to {filename}") + else: + logger.warning(f"There's no audio to save for {name}") + + def _base_file_name(self, example_file: str): + base_name = os.path.splitext(example_file)[0] + return f"{base_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}" + + def _log_file_name(self, example_file: str): + base_name = self._base_file_name(example_file) + return os.path.join(self._logs_dir, f"{base_name}.log") + + def _recording_file_name(self, example_file: str): + base_name = self._base_file_name(example_file) + return os.path.join(self._recordings_dir, f"{base_name}.wav") + + +async def run_example_pipeline(example_file: str): + room_url = os.getenv("DAILY_SAMPLE_ROOM_URL") + + script_path = FOUNDATIONAL_DIR / example_file + + module = load_module_from_path(script_path) + + transport = DailyTransport( + room_url, + None, + "Pipecat", + DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + ) + + await module.run_example(transport, argparse.Namespace(), True) + + +async def run_eval_pipeline( + eval_runner: EvalRunner, example_file: str, prompt: str, eval: Optional[str] +): + logger.info(f"Starting eval bot") + + room_url = os.getenv("DAILY_SAMPLE_ROOM_URL") + + transport = DailyTransport( + room_url, + None, + "Pipecat Eval", + DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=2.0)), + ), + ) + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + llm.register_function("assert_eval", eval_runner.assert_eval) + + eval_function = FunctionSchema( + name="assert_eval", + description="Called when the user answers a question.", + properties={ + "result": { + "type": "boolean", + "description": "The result of the eval", + }, + "reasoning": { + "type": "string", + "description": "Why the answer was considered correct or invalid", + }, + }, + required=["result", "reasoning"], + ) + tools = ToolsSchema(standard_tools=[eval_function]) + + # See if we need to include an eval prompt. + eval_prompt = "" + if eval: + eval_prompt = f"The answer is correct if the user says [{eval}]." + + messages = [ + { + "role": "system", + "content": f"You are an LLM eval, be extremly brief. Your goal is to only ask one question: {prompt}. Call the eval function only if the user answers the question and check if the answer is correct (words as numbers are valid). {eval_prompt}", + }, + ] + + context = OpenAILLMContext(messages, tools) + context_aggregator = llm.create_context_aggregator(context) + + audio_buffer = AudioBufferProcessor() + + pipeline = Pipeline( + [ + transport.input(), # Transport user input + stt, # STT + context_aggregator.user(), # User responses + llm, # LLM + tts, # TTS + transport.output(), # Transport bot output + audio_buffer, + context_aggregator.assistant(), # Assistant spoken responses + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + allow_interruptions=True, + audio_in_sample_rate=16000, + audio_out_sample_rate=16000, + ), + idle_timeout_secs=PIPELINE_IDLE_TIMEOUT_SECS, + ) + + @audio_buffer.event_handler("on_audio_data") + async def on_audio_data(buffer, audio, sample_rate, num_channels): + await eval_runner.save_audio(example_file, audio, sample_rate, num_channels) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + await audio_buffer.start_recording() + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + @task.event_handler("on_idle_timeout") + async def on_pipeline_idle_timeout(task): + await eval_runner.assert_eval_false() + + runner = PipelineRunner() + + await runner.run(task) diff --git a/scripts/release/run-release-evals.py b/scripts/release/run-release-evals.py new file mode 100644 index 000000000..33511fdeb --- /dev/null +++ b/scripts/release/run-release-evals.py @@ -0,0 +1,147 @@ +# +# Copyright (c) 2024-2025 Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import argparse +import asyncio +import sys +from datetime import datetime, timezone + +from dotenv import load_dotenv +from eval import EvalRunner +from loguru import logger +from utils import check_env_variables + +load_dotenv(override=True) + +# Math +PROMPT_SIMPLE_MATH = "A simple math addition." + +# Weather +PROMPT_WEATHER = "What's the weather in San Francisco?" +EVAL_WEATHER = ( + "Something specific about the current weather in San Francisco, including the degrees." +) + +# Online search +PROMPT_ONLINE_SEARCH = "What's the date right now in London?" +EVAL_ONLINE_SEARCH = f"Today is {datetime.now(timezone.utc).strftime('%B %d, %Y')}." + +TESTS_07 = [ + # 07 series + ("07-interruptible.py", PROMPT_SIMPLE_MATH, None), + ("07-interruptible-cartesia-http.py", PROMPT_SIMPLE_MATH, None), + ("07b-interruptible-langchain.py", PROMPT_SIMPLE_MATH, None), + ("07c-interruptible-deepgram.py", PROMPT_SIMPLE_MATH, None), + ("07d-interruptible-elevenlabs.py", PROMPT_SIMPLE_MATH, None), + ("07d-interruptible-elevenlabs-http.py", PROMPT_SIMPLE_MATH, None), + ("07e-interruptible-playht.py", PROMPT_SIMPLE_MATH, None), + ("07e-interruptible-playht-http.py", PROMPT_SIMPLE_MATH, None), + ("07f-interruptible-azure.py", PROMPT_SIMPLE_MATH, None), + ("07g-interruptible-openai.py", PROMPT_SIMPLE_MATH, None), + ("07h-interruptible-openpipe.py", PROMPT_SIMPLE_MATH, None), + ("07j-interruptible-gladia.py", PROMPT_SIMPLE_MATH, None), + ("07k-interruptible-lmnt.py", PROMPT_SIMPLE_MATH, None), + ("07l-interruptible-groq.py", PROMPT_SIMPLE_MATH, None), + ("07m-interruptible-aws.py", PROMPT_SIMPLE_MATH, None), + ("07n-interruptible-google.py", PROMPT_SIMPLE_MATH, None), + ("07o-interruptible-assemblyai.py", PROMPT_SIMPLE_MATH, None), + ("07q-interruptible-rime.py", PROMPT_SIMPLE_MATH, None), + ("07q-interruptible-rime-http.py", PROMPT_SIMPLE_MATH, None), + ("07r-interruptible-riva-nim.py", PROMPT_SIMPLE_MATH, None), + ("07s-interruptible-google-audio-in.py", PROMPT_SIMPLE_MATH, None), + ("07t-interruptible-fish.py", PROMPT_SIMPLE_MATH, None), + ("07v-interruptible-neuphonic.py", PROMPT_SIMPLE_MATH, None), + ("07v-interruptible-neuphonic-http.py", PROMPT_SIMPLE_MATH, None), + ("07w-interruptible-fal.py", PROMPT_SIMPLE_MATH, None), + ("07y-interruptible-minimax.py", PROMPT_SIMPLE_MATH, None), + ("07z-interruptible-sarvam.py", PROMPT_SIMPLE_MATH, None), + # Needs a local XTTS docker instance running. + # ("07i-interruptible-xtts.py", PROMPT_SIMPLE_MATH, None), + # Needs a Krisp license. + # ("07p-interruptible-krisp.py", PROMPT_SIMPLE_MATH, None), + # Needs GPU resources. + # ("07u-interruptible-ultravox.py", PROMPT_SIMPLE_MATH, None), +] + +TESTS_14 = [ + ("14-function-calling.py", PROMPT_WEATHER, EVAL_WEATHER), + ("14a-function-calling-anthropic.py", PROMPT_WEATHER, EVAL_WEATHER), + ("14b-function-calling-anthropic-video.py", PROMPT_WEATHER, EVAL_WEATHER), + ("14d-function-calling-video.py", PROMPT_WEATHER, EVAL_WEATHER), + ("14e-function-calling-gemini.py", PROMPT_WEATHER, EVAL_WEATHER), + ("14f-function-calling-groq.py", PROMPT_WEATHER, EVAL_WEATHER), + ("14g-function-calling-grok.py", PROMPT_WEATHER, EVAL_WEATHER), + ("14h-function-calling-azure.py", PROMPT_WEATHER, EVAL_WEATHER), + ("14i-function-calling-fireworks.py", PROMPT_WEATHER, EVAL_WEATHER), + ("14j-function-calling-nim.py", PROMPT_WEATHER, EVAL_WEATHER), + ("14n-function-calling-perplexity.py", PROMPT_WEATHER, EVAL_WEATHER), + ("14q-function-calling-qwen.py", PROMPT_WEATHER, EVAL_WEATHER), + ("14r-function-calling-aws.py", PROMPT_WEATHER, EVAL_WEATHER), + # Currently not working. + # ("14c-function-calling-together.py", PROMPT_WEATHER, EVAL_WEATHER), + # ("14j-function-calling-nim.py", PROMPT_WEATHER, EVAL_WEATHER), + # ("14k-function-calling-cerebras.py", PROMPT_WEATHER, EVAL_WEATHER), + # ("14l-function-calling-deepseek.py", PROMPT_WEATHER, EVAL_WEATHER), + # ("14m-function-calling-openrouter.py", PROMPT_WEATHER, EVAL_WEATHER), + # ("14o-function-calling-gemini-openai-format.py", PROMPT_WEATHER, EVAL_WEATHER), + # ("14p-function-calling-gemini-vertex-ai.py", PROMPT_WEATHER, EVAL_WEATHER), +] + +TESTS_19 = [ + ("19-openai-realtime-beta.py", PROMPT_WEATHER, EVAL_WEATHER), + ("19a-azure-realtime-beta.py", PROMPT_WEATHER, EVAL_WEATHER), +] + +TESTS_26 = [ + ("26-gemini-multimodal-live.py", PROMPT_SIMPLE_MATH, None), + ("26a-gemini-multimodal-live-transcription.py", PROMPT_SIMPLE_MATH, None), + ("26b-gemini-multimodal-live-function-calling.py", PROMPT_WEATHER, EVAL_WEATHER), + ("26c-gemini-multimodal-live-video.py", PROMPT_SIMPLE_MATH, None), + ("26e-gemini-multimodal-google-search.py", PROMPT_ONLINE_SEARCH, EVAL_ONLINE_SEARCH), + # Currently not working. + # ("26d-gemini-multimodal-live-text.py", PROMPT_SIMPLE_MATH, None), +] + +TESTS = [ + *TESTS_07, + *TESTS_14, + *TESTS_19, + *TESTS_26, +] + + +async def main(args: argparse.Namespace): + if not check_env_variables(): + return + + # Log level + logger.remove(0) + log_level = "TRACE" if args.verbose >= 2 else "DEBUG" + if args.verbose: + logger.add(sys.stderr, level=log_level) + + runner = EvalRunner( + name=args.name, + pattern=args.pattern, + record_audio=args.audio, + log_level=log_level, + ) + + for test, prompt, eval in TESTS: + await runner.run_eval(test, prompt, eval) + + runner.print_results() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Pipecat Eval Runner") + parser.add_argument("--audio", "-a", action="store_true", help="Record audio for each test") + parser.add_argument("--name", "-n", help="Name for the current runner (e.g. 'v.0.0.68')") + parser.add_argument("--pattern", "-p", help="Only run tests that match the pattern") + parser.add_argument("--verbose", "-v", action="count", default=0) + args = parser.parse_args() + + asyncio.run(main(args)) diff --git a/scripts/release/utils.py b/scripts/release/utils.py new file mode 100644 index 000000000..768568d73 --- /dev/null +++ b/scripts/release/utils.py @@ -0,0 +1,84 @@ +# +# Copyright (c) 2024-2025 Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import importlib.util +import os +from dataclasses import dataclass +from pathlib import Path +from typing import Sequence + +GREEN = "\033[92m" +RED = "\033[91m" +RESET = "\033[0m" +CLEAR = "\033[K" + + +@dataclass +class EvalResult: + name: str + result: bool + time: float + + +def check_env_variables() -> bool: + required_envs = [ + "CARTESIA_API_KEY", + "DEEPGRAM_API_KEY", + "OPENAI_API_KEY", + "DAILY_SAMPLE_ROOM_URL", + ] + for env in required_envs: + if not os.getenv(env): + print(f"\nERROR: Environment variable {env} is not defined.\n") + print(f"Required environment variables: {required_envs}") + return False + return True + + +def print_begin_test(example_file: str): + print(f"{example_file:<55} RUNNING...{CLEAR}", end="\r", flush=True) + + +def print_end_test(example_file: str, passed: bool, time: float): + status = f"{GREEN}✅ OK{RESET}" if passed else f"{RED}❌ FAILED{RESET}" + print(f"{example_file:<55} {status} ({time:.2f}s){CLEAR}") + + +def print_test_results(tests: Sequence[EvalResult], total_success: int, location: str): + total_count = len(tests) + + bar = "=" * 80 + + print() + print(f"{GREEN}{bar}{RESET}") + print(f"TOTAL NUMBER OF TESTS: {total_count}") + print() + + total_time = 0.0 + total_count = len(tests) + for eval in tests: + total_time += eval.time + print_end_test(eval.name, eval.result, eval.time) + + total_fail = total_count - total_success + + print() + print( + f"{GREEN}SUCCESS{RESET}: {total_success} | {RED}FAIL{RESET}: {total_fail} | TOTAL TIME: {total_time:.2f}s" + ) + print(f"{GREEN}{bar}{RESET}") + print() + print(f"Tests output: {location}") + + +def load_module_from_path(path: str | Path): + path = Path(path).resolve() + module_name = path.stem + + spec = importlib.util.spec_from_file_location(module_name, str(path)) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module diff --git a/src/pipecat/pipeline/task.py b/src/pipecat/pipeline/task.py index 692f325d9..0fe330655 100644 --- a/src/pipecat/pipeline/task.py +++ b/src/pipecat/pipeline/task.py @@ -236,6 +236,7 @@ class PipelineTask(BaseTask): ) observers.append(self._turn_trace_observer) self._finished = False + self._cancelled = False # This queue receives frames coming from the pipeline upstream. self._up_queue = asyncio.Queue() @@ -346,7 +347,6 @@ class PipelineTask(BaseTask): async def cancel(self): """Stops the running pipeline immediately.""" - logger.debug(f"Canceling pipeline task {self}") await self._cancel() async def run(self): @@ -406,12 +406,15 @@ class PipelineTask(BaseTask): await self.queue_frame(frame) async def _cancel(self): - # Make sure everything is cleaned up downstream. This is sent - # out-of-band from the main streaming task which is what we want since - # we want to cancel right away. - await self._source.push_frame(CancelFrame()) - # Only cancel the push task. Everything else will be cancelled in run(). - await self._task_manager.cancel_task(self._process_push_task) + if not self._cancelled: + logger.debug(f"Canceling pipeline task {self}") + self._cancelled = True + # Make sure everything is cleaned up downstream. This is sent + # out-of-band from the main streaming task which is what we want since + # we want to cancel right away. + await self._source.push_frame(CancelFrame()) + # Only cancel the push task. Everything else will be cancelled in run(). + await self._task_manager.cancel_task(self._process_push_task) async def _create_tasks(self): self._process_up_task = self._task_manager.create_task( diff --git a/src/pipecat/transports/network/fastapi_websocket.py b/src/pipecat/transports/network/fastapi_websocket.py index f04d56b0d..a6679dd7e 100644 --- a/src/pipecat/transports/network/fastapi_websocket.py +++ b/src/pipecat/transports/network/fastapi_websocket.py @@ -26,7 +26,7 @@ from pipecat.frames.frames import ( TransportMessageFrame, TransportMessageUrgentFrame, ) -from pipecat.processors.frame_processor import FrameDirection +from pipecat.processors.frame_processor import FrameDirection, FrameProcessorSetup from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType from pipecat.transports.base_input import BaseInputTransport from pipecat.transports.base_output import BaseOutputTransport @@ -45,7 +45,7 @@ except ModuleNotFoundError as e: class FastAPIWebsocketParams(TransportParams): add_wav_header: bool = False - serializer: FrameSerializer + serializer: Optional[FrameSerializer] = None session_timeout: Optional[int] = None @@ -125,7 +125,8 @@ class FastAPIWebsocketInputTransport(BaseInputTransport): async def start(self, frame: StartFrame): await super().start(frame) await self._client.setup(frame) - await self._params.serializer.setup(frame) + if self._params.serializer: + await self._params.serializer.setup(frame) if not self._monitor_websocket_task and self._params.session_timeout: self._monitor_websocket_task = self.create_task(self._monitor_websocket()) await self._client.trigger_client_connected() @@ -158,6 +159,9 @@ class FastAPIWebsocketInputTransport(BaseInputTransport): async def _receive_messages(self): try: async for message in self._client.receive(): + if not self._params.serializer: + continue + frame = await self._params.serializer.deserialize(message) if not frame: @@ -203,7 +207,8 @@ class FastAPIWebsocketOutputTransport(BaseOutputTransport): async def start(self, frame: StartFrame): await super().start(frame) await self._client.setup(frame) - await self._params.serializer.setup(frame) + if self._params.serializer: + await self._params.serializer.setup(frame) self._send_interval = (self.audio_chunk_size / self.sample_rate) / 2 await self.set_transport_ready(frame) @@ -266,6 +271,9 @@ class FastAPIWebsocketOutputTransport(BaseOutputTransport): await self._write_audio_sleep() async def _write_frame(self, frame: Frame): + if not self._params.serializer: + return + try: payload = await self._params.serializer.serialize(frame) if payload: @@ -302,7 +310,9 @@ class FastAPIWebsocketTransport(BaseTransport): on_session_timeout=self._on_session_timeout, ) - is_binary = self._params.serializer.type == FrameSerializerType.BINARY + is_binary = False + if self._params.serializer: + is_binary = self._params.serializer.type == FrameSerializerType.BINARY self._client = FastAPIWebsocketClient(websocket, is_binary, self._callbacks) self._input = FastAPIWebsocketInputTransport( diff --git a/src/pipecat/transports/network/small_webrtc.py b/src/pipecat/transports/network/small_webrtc.py index ffa3f441a..92cc2f3d4 100644 --- a/src/pipecat/transports/network/small_webrtc.py +++ b/src/pipecat/transports/network/small_webrtc.py @@ -50,7 +50,6 @@ class SmallWebRTCCallbacks(BaseModel): on_app_message: Callable[[Any], Awaitable[None]] on_client_connected: Callable[[SmallWebRTCConnection], Awaitable[None]] on_client_disconnected: Callable[[SmallWebRTCConnection], Awaitable[None]] - on_client_closed: Callable[[SmallWebRTCConnection], Awaitable[None]] class RawAudioTrack(AudioStreamTrack): @@ -169,7 +168,7 @@ class SmallWebRTCClient: @self._webrtc_connection.event_handler("disconnected") async def on_disconnected(connection: SmallWebRTCConnection): logger.debug("Peer connection lost.") - await self._handle_client_disconnected() + await self._handle_peer_disconnected() @self._webrtc_connection.event_handler("closed") async def on_closed(connection: SmallWebRTCConnection): @@ -313,7 +312,7 @@ class SmallWebRTCClient: logger.info(f"Disconnecting to Small WebRTC") self._closing = True await self._webrtc_connection.disconnect() - await self._handle_client_disconnected() + await self._handle_peer_disconnected() async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame): if self._can_send(): @@ -338,19 +337,18 @@ class SmallWebRTCClient: await self._callbacks.on_client_connected(self._webrtc_connection) - async def _handle_client_disconnected(self): + async def _handle_peer_disconnected(self): self._audio_input_track = None self._video_input_track = None self._audio_output_track = None self._video_output_track = None - await self._callbacks.on_client_disconnected(self._webrtc_connection) async def _handle_client_closed(self): self._audio_input_track = None self._video_input_track = None self._audio_output_track = None self._video_output_track = None - await self._callbacks.on_client_closed(self._webrtc_connection) + await self._callbacks.on_client_disconnected(self._webrtc_connection) async def _handle_app_message(self, message: Any): await self._callbacks.on_app_message(message) @@ -525,7 +523,6 @@ class SmallWebRTCTransport(BaseTransport): on_app_message=self._on_app_message, on_client_connected=self._on_client_connected, on_client_disconnected=self._on_client_disconnected, - on_client_closed=self._on_client_closed, ) self._client = SmallWebRTCClient(webrtc_connection, self._callbacks) @@ -538,7 +535,6 @@ class SmallWebRTCTransport(BaseTransport): self._register_event_handler("on_app_message") self._register_event_handler("on_client_connected") self._register_event_handler("on_client_disconnected") - self._register_event_handler("on_client_closed") def input(self) -> SmallWebRTCInputTransport: if not self._input: @@ -572,6 +568,3 @@ class SmallWebRTCTransport(BaseTransport): async def _on_client_disconnected(self, webrtc_connection): await self._call_event_handler("on_client_disconnected", webrtc_connection) - - async def _on_client_closed(self, webrtc_connection): - await self._call_event_handler("on_client_closed", webrtc_connection) diff --git a/src/pipecat/transports/network/websocket_client.py b/src/pipecat/transports/network/websocket_client.py index 693be54c9..23a291784 100644 --- a/src/pipecat/transports/network/websocket_client.py +++ b/src/pipecat/transports/network/websocket_client.py @@ -34,7 +34,7 @@ from pipecat.utils.asyncio import BaseTaskManager class WebsocketClientParams(TransportParams): add_wav_header: bool = True - serializer: FrameSerializer = ProtobufFrameSerializer() + serializer: Optional[FrameSerializer] = None class WebsocketClientCallbacks(BaseModel): @@ -133,7 +133,8 @@ class WebsocketClientInputTransport(BaseInputTransport): async def start(self, frame: StartFrame): await super().start(frame) - await self._params.serializer.setup(frame) + if self._params.serializer: + await self._params.serializer.setup(frame) await self._session.setup(frame) await self._session.connect() await self.set_transport_ready(frame) @@ -151,6 +152,8 @@ class WebsocketClientInputTransport(BaseInputTransport): await self._transport.cleanup() async def on_message(self, websocket, message): + if not self._params.serializer: + return frame = await self._params.serializer.deserialize(message) if not frame: return @@ -184,7 +187,8 @@ class WebsocketClientOutputTransport(BaseOutputTransport): async def start(self, frame: StartFrame): await super().start(frame) self._send_interval = (self.audio_chunk_size / self.sample_rate) / 2 - await self._params.serializer.setup(frame) + if self._params.serializer: + await self._params.serializer.setup(frame) await self._session.setup(frame) await self._session.connect() await self.set_transport_ready(frame) @@ -231,6 +235,8 @@ class WebsocketClientOutputTransport(BaseOutputTransport): await self._write_audio_sleep() async def _write_frame(self, frame: Frame): + if not self._params.serializer: + return payload = await self._params.serializer.serialize(frame) if payload: await self._session.send(payload) @@ -255,6 +261,7 @@ class WebsocketClientTransport(BaseTransport): super().__init__() self._params = params or WebsocketClientParams() + self._params.serializer = self._params.serializer or ProtobufFrameSerializer() callbacks = WebsocketClientCallbacks( on_connected=self._on_connected, diff --git a/src/pipecat/transports/network/websocket_server.py b/src/pipecat/transports/network/websocket_server.py index 7c8738871..095858f47 100644 --- a/src/pipecat/transports/network/websocket_server.py +++ b/src/pipecat/transports/network/websocket_server.py @@ -40,7 +40,7 @@ except ModuleNotFoundError as e: class WebsocketServerParams(TransportParams): add_wav_header: bool = False - serializer: FrameSerializer + serializer: Optional[FrameSerializer] = None session_timeout: Optional[int] = None @@ -80,7 +80,8 @@ class WebsocketServerInputTransport(BaseInputTransport): async def start(self, frame: StartFrame): await super().start(frame) - await self._params.serializer.setup(frame) + if self._params.serializer: + await self._params.serializer.setup(frame) if not self._server_task: self._server_task = self.create_task(self._server_task_handler()) await self.set_transport_ready(frame) @@ -134,6 +135,9 @@ class WebsocketServerInputTransport(BaseInputTransport): # Handle incoming messages try: async for message in websocket: + if not self._params.serializer: + continue + frame = await self._params.serializer.deserialize(message) if not frame: @@ -194,7 +198,8 @@ class WebsocketServerOutputTransport(BaseOutputTransport): async def start(self, frame: StartFrame): await super().start(frame) - await self._params.serializer.setup(frame) + if self._params.serializer: + await self._params.serializer.setup(frame) self._send_interval = (self.audio_chunk_size / self.sample_rate) / 2 await self.set_transport_ready(frame) @@ -252,6 +257,9 @@ class WebsocketServerOutputTransport(BaseOutputTransport): await self._write_audio_sleep() async def _write_frame(self, frame: Frame): + if not self._params.serializer: + return + try: payload = await self._params.serializer.serialize(frame) if payload and self._websocket: diff --git a/src/pipecat/transports/services/daily.py b/src/pipecat/transports/services/daily.py index 7fe3ed0dd..03d9e14ca 100644 --- a/src/pipecat/transports/services/daily.py +++ b/src/pipecat/transports/services/daily.py @@ -695,7 +695,9 @@ class DailyTransportClient(EventHandler): self._audio_renderers.setdefault(participant_id, {})[audio_source] = callback - logger.info(f"Starting to capture [{audio_source}] audio from participant {participant_id}") + logger.debug( + f"Starting to capture [{audio_source}] audio from participant {participant_id}" + ) self._client.set_audio_renderer( participant_id, @@ -723,6 +725,10 @@ class DailyTransportClient(EventHandler): self._video_renderers.setdefault(participant_id, {})[video_source] = callback + logger.debug( + f"Starting to capture [{video_source}] video from participant {participant_id}" + ) + self._client.set_video_renderer( participant_id, self._video_frame_received, @@ -1106,7 +1112,7 @@ class DailyInputTransport(BaseInputTransport): next_time = prev_time + 1 / framerate render_frame = (next_time - curr_time) < 0.1 - elif self._video_renderers[participant_id][video_source]["render_next_frame"]: + if self._video_renderers[participant_id][video_source]["render_next_frame"]: request_frame = self._video_renderers[participant_id][video_source][ "render_next_frame" ].pop(0) diff --git a/src/pipecat/utils/base_object.py b/src/pipecat/utils/base_object.py index 49705a899..03b42ade0 100644 --- a/src/pipecat/utils/base_object.py +++ b/src/pipecat/utils/base_object.py @@ -49,14 +49,16 @@ class BaseObject(ABC): return decorator def add_event_handler(self, event_name: str, handler): - if event_name not in self._event_handlers: - raise Exception(f"Event handler {event_name} not registered") - self._event_handlers[event_name].append(handler) + if event_name in self._event_handlers: + self._event_handlers[event_name].append(handler) + else: + logger.warning(f"Event handler {event_name} not registered") def _register_event_handler(self, event_name: str): - if event_name in self._event_handlers: - raise Exception(f"Event handler {event_name} already registered") - self._event_handlers[event_name] = [] + if event_name not in self._event_handlers: + self._event_handlers[event_name] = [] + else: + logger.warning(f"Event handler {event_name} not registered") async def _call_event_handler(self, event_name: str, *args, **kwargs): # If we haven't registered an event handler, we don't need to do