From aeb9f1ffca3ccb49311187ca73eb51cceae0d963 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Tue, 2 Sep 2025 15:03:21 -0700 Subject: [PATCH] transports: reorganize module --- CHANGELOG.md | 14 + .../foundational/01-say-one-thing-piper.py | 4 +- .../foundational/01-say-one-thing-rime.py | 4 +- examples/foundational/01-say-one-thing.py | 4 +- examples/foundational/01b-livekit-audio.py | 2 +- examples/foundational/01c-fastpitch.py | 4 +- examples/foundational/02-llm-say-one-thing.py | 4 +- examples/foundational/03-still-frame.py | 2 +- .../foundational/03b-still-frame-imagen.py | 2 +- .../04-transports-small-webrtc.py | 4 +- examples/foundational/04a-transports-daily.py | 2 +- .../foundational/04b-transports-livekit.py | 2 +- .../foundational/05-sync-speech-and-image.py | 2 +- .../foundational/06-listen-and-respond.py | 4 +- examples/foundational/06a-image-sync.py | 2 +- .../07-interruptible-cartesia-http.py | 4 +- examples/foundational/07-interruptible.py | 4 +- .../07a-interruptible-speechmatics-vad.py | 4 +- .../07a-interruptible-speechmatics.py | 4 +- .../foundational/07aa-interruptible-soniox.py | 4 +- .../07ab-interruptible-inworld-http.py | 4 +- .../07ac-interruptible-asyncai-http.py | 4 +- .../07ac-interruptible-asyncai.py | 4 +- .../07ad-interruptible-aicoustics.py | 4 +- .../07b-interruptible-langchain.py | 4 +- .../07c-interruptible-deepgram-vad.py | 4 +- .../07c-interruptible-deepgram.py | 4 +- .../07d-interruptible-elevenlabs-http.py | 4 +- .../07d-interruptible-elevenlabs.py | 4 +- .../07e-interruptible-playht-http.py | 4 +- .../foundational/07e-interruptible-playht.py | 4 +- .../foundational/07f-interruptible-azure.py | 4 +- .../foundational/07g-interruptible-openai.py | 4 +- .../07h-interruptible-openpipe.py | 4 +- .../foundational/07i-interruptible-xtts.py | 4 +- .../foundational/07j-interruptible-gladia.py | 4 +- .../foundational/07k-interruptible-lmnt.py | 4 +- .../foundational/07l-interruptible-groq.py | 4 +- .../foundational/07m-interruptible-aws.py | 4 +- .../foundational/07n-interruptible-gemini.py | 4 +- .../foundational/07n-interruptible-google.py | 4 +- .../07o-interruptible-assemblyai.py | 4 +- .../foundational/07p-interruptible-krisp.py | 4 +- .../07q-interruptible-rime-http.py | 4 +- .../foundational/07q-interruptible-rime.py | 4 +- .../07r-interruptible-riva-nim.py | 4 +- .../07s-interruptible-google-audio-in.py | 4 +- .../foundational/07t-interruptible-fish.py | 4 +- .../07u-interruptible-ultravox.py | 4 +- .../07v-interruptible-neuphonic-http.py | 4 +- .../07v-interruptible-neuphonic.py | 4 +- .../foundational/07w-interruptible-fal.py | 4 +- .../foundational/07y-interruptible-minimax.py | 4 +- .../07z-interruptible-sarvam-http.py | 4 +- .../foundational/07z-interruptible-sarvam.py | 4 +- examples/foundational/08-bots-arguing.py | 2 +- examples/foundational/09-mirror.py | 2 +- examples/foundational/09a-local-mirror.py | 2 +- examples/foundational/10-wake-phrase.py | 4 +- examples/foundational/11-sound-effects.py | 4 +- examples/foundational/12-describe-video.py | 2 +- .../12a-describe-video-gemini-flash.py | 2 +- .../foundational/12b-describe-video-gpt-4o.py | 2 +- .../12c-describe-video-anthropic.py | 2 +- .../foundational/13-whisper-transcription.py | 4 +- .../13b-deepgram-transcription.py | 4 +- .../foundational/13c-gladia-transcription.py | 4 +- .../foundational/13c-gladia-translation.py | 4 +- .../13d-assemblyai-transcription.py | 4 +- examples/foundational/13e-whisper-mlx.py | 4 +- .../13f-cartesia-transcription.py | 4 +- .../13g-sambanova-transcription.py | 4 +- .../13h-speechmatics-transcription.py | 4 +- .../foundational/13i-soniox-transcription.py | 4 +- .../foundational/13j-azure-transcription.py | 4 +- examples/foundational/14-function-calling.py | 4 +- .../14a-function-calling-anthropic.py | 4 +- .../14b-function-calling-anthropic-video.py | 2 +- .../14c-function-calling-together.py | 4 +- .../14d-function-calling-video.py | 2 +- .../14e-function-calling-google.py | 2 +- .../foundational/14f-function-calling-groq.py | 4 +- .../foundational/14g-function-calling-grok.py | 4 +- .../14h-function-calling-azure.py | 4 +- .../14i-function-calling-fireworks.py | 4 +- .../foundational/14j-function-calling-nim.py | 4 +- .../14k-function-calling-cerebras.py | 4 +- .../14l-function-calling-deepseek.py | 4 +- .../14m-function-calling-openrouter.py | 4 +- .../14n-function-calling-perplexity.py | 4 +- ...o-function-calling-gemini-openai-format.py | 4 +- .../14p-function-calling-gemini-vertex-ai.py | 4 +- .../foundational/14q-function-calling-qwen.py | 4 +- .../foundational/14r-function-calling-aws.py | 4 +- .../14s-function-calling-sambanova.py | 4 +- .../14t-function-calling-direct.py | 4 +- .../14u-function-calling-ollama.py | 4 +- .../14v-function-calling-openai.py | 4 +- .../14w-function-calling-mistral.py | 4 +- .../14x-function-calling-universal-context.py | 4 +- ...nction-calling-google-universal-context.py | 2 +- examples/foundational/15-switch-voices.py | 4 +- examples/foundational/15a-switch-languages.py | 4 +- .../16-gpu-container-local-bot.py | 4 +- examples/foundational/17-detect-user-idle.py | 4 +- examples/foundational/18-gstreamer-filesrc.py | 2 +- .../18a-gstreamer-videotestsrc.py | 2 +- .../foundational/19-openai-realtime-beta.py | 4 +- .../foundational/19a-azure-realtime-beta.py | 4 +- .../19b-openai-realtime-beta-text.py | 4 +- .../20a-persistent-context-openai.py | 4 +- .../20b-persistent-context-openai-realtime.py | 4 +- .../20c-persistent-context-anthropic.py | 4 +- .../20d-persistent-context-gemini.py | 2 +- .../20e-persistent-context-aws-nova-sonic.py | 4 +- examples/foundational/21-tavus-transport.py | 4 +- .../foundational/21a-tavus-video-service.py | 2 +- .../foundational/22-natural-conversation.py | 4 +- .../22b-natural-conversation-proposal.py | 4 +- .../22c-natural-conversation-mixed-llms.py | 4 +- .../22d-natural-conversation-gemini-audio.py | 4 +- .../foundational/23-bot-background-sound.py | 4 +- examples/foundational/24-stt-mute-filter.py | 4 +- examples/foundational/25-google-audio-in.py | 4 +- .../foundational/26-gemini-multimodal-live.py | 4 +- ...6a-gemini-multimodal-live-transcription.py | 4 +- ...gemini-multimodal-live-function-calling.py | 4 +- .../26c-gemini-multimodal-live-video.py | 2 +- .../26d-gemini-multimodal-live-text.py | 4 +- .../26e-gemini-multimodal-google-search.py | 4 +- .../26f-gemini-multimodal-live-files-api.py | 4 +- ...emini-multimodal-live-groundingMetadata.py | 4 +- examples/foundational/27-simli-layer.py | 2 +- .../28-transcription-processor.py | 4 +- .../foundational/29-turn-tracking-observer.py | 4 +- examples/foundational/30-observer.py | 4 +- .../32-gemini-grounding-metadata.py | 4 +- examples/foundational/33-gemini-rag.py | 4 +- examples/foundational/34-audio-recording.py | 4 +- .../35-pattern-pair-voice-switching.py | 4 +- .../foundational/36-user-email-gathering.py | 4 +- examples/foundational/37-mem0.py | 4 +- examples/foundational/38-smart-turn-fal.py | 4 +- .../38a-smart-turn-local-coreml.py | 4 +- examples/foundational/38b-smart-turn-local.py | 4 +- examples/foundational/39-mcp-stdio.py | 2 +- examples/foundational/39a-mcp-run-sse.py | 4 +- examples/foundational/39b-multiple-mcp.py | 2 +- examples/foundational/39c-mcp-run-http.py | 4 +- examples/foundational/40-aws-nova-sonic.py | 4 +- .../foundational/42-interruption-config.py | 4 +- .../foundational/43a-heygen-video-service.py | 2 +- .../foundational/44-voicemail-detection.py | 4 +- examples/quickstart/bot.py | 2 +- scripts/evals/eval.py | 2 +- src/pipecat/runner/daily.py | 2 +- src/pipecat/runner/run.py | 2 +- src/pipecat/runner/utils.py | 14 +- src/pipecat/services/tavus/video.py | 2 +- src/pipecat/transports/daily/__init__.py | 0 src/pipecat/transports/daily/transport.py | 2338 +++++++++++++++++ src/pipecat/transports/daily/utils.py | 410 +++ src/pipecat/transports/livekit/__init__.py | 0 src/pipecat/transports/livekit/transport.py | 988 +++++++ .../transports/network/fastapi_websocket.py | 540 +--- .../transports/network/small_webrtc.py | 934 +------ .../transports/network/webrtc_connection.py | 605 +---- .../transports/network/websocket_client.py | 493 +--- .../transports/network/websocket_server.py | 499 +--- src/pipecat/transports/services/daily.py | 2331 +--------------- .../transports/services/helpers/daily_rest.py | 411 +-- src/pipecat/transports/services/livekit.py | 987 +------ src/pipecat/transports/services/tavus.py | 769 +----- .../transports/smallwebrtc/__init__.py | 0 .../transports/smallwebrtc/connection.py | 612 +++++ .../transports/smallwebrtc/transport.py | 935 +++++++ src/pipecat/transports/tavus/__init__.py | 0 src/pipecat/transports/tavus/transport.py | 770 ++++++ src/pipecat/transports/websocket/__init__.py | 0 src/pipecat/transports/websocket/client.py | 494 ++++ src/pipecat/transports/websocket/fastapi.py | 547 ++++ src/pipecat/transports/websocket/server.py | 500 ++++ src/pipecat/transports/whatsapp/__init__.py | 5 - src/pipecat/transports/whatsapp/client.py | 4 +- 184 files changed, 7997 insertions(+), 7767 deletions(-) create mode 100644 src/pipecat/transports/daily/__init__.py create mode 100644 src/pipecat/transports/daily/transport.py create mode 100644 src/pipecat/transports/daily/utils.py create mode 100644 src/pipecat/transports/livekit/__init__.py create mode 100644 src/pipecat/transports/livekit/transport.py create mode 100644 src/pipecat/transports/smallwebrtc/__init__.py create mode 100644 src/pipecat/transports/smallwebrtc/connection.py create mode 100644 src/pipecat/transports/smallwebrtc/transport.py create mode 100644 src/pipecat/transports/tavus/__init__.py create mode 100644 src/pipecat/transports/tavus/transport.py create mode 100644 src/pipecat/transports/websocket/__init__.py create mode 100644 src/pipecat/transports/websocket/client.py create mode 100644 src/pipecat/transports/websocket/fastapi.py create mode 100644 src/pipecat/transports/websocket/server.py diff --git a/CHANGELOG.md b/CHANGELOG.md index af3969560..17cabd81c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -82,6 +82,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Deprecated +- Transports have been re-organized. + + ``` + pipecat.transports.network.small_webrtc -> pipecat.transports.smallwebrtc.transport + pipecat.transports.network.webrtc_connection -> pipecat.transports.smallwebrtc.connection + pipecat.transports.network.websocket_client -> pipecat.transports.websocket.client + pipecat.transports.network.websocket_server -> pipecat.transports.websocket.server + pipecat.transports.network.fastapi_websocket -> pipecat.transports.websocket.fastapi + pipecat.transports.services.daily -> pipecat.transports.daily.transport + pipecat.transports.services.helpers.daily_rest -> pipecat.transports.daily.utils + pipecat.transports.services.livekit -> pipecat.transports.livekit.transport + pipecat.transports.services.tavus -> pipecat.transports.tavus.transport + ``` + - `pipecat.frames.frames.KeypadEntry` is deprecated use `pipecat.audio.dtmf.types.KeypadEntry` instead. diff --git a/examples/foundational/01-say-one-thing-piper.py b/examples/foundational/01-say-one-thing-piper.py index b16601067..c84b68b53 100644 --- a/examples/foundational/01-say-one-thing-piper.py +++ b/examples/foundational/01-say-one-thing-piper.py @@ -18,8 +18,8 @@ from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.piper.tts import PiperTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/01-say-one-thing-rime.py b/examples/foundational/01-say-one-thing-rime.py index 9e15c0179..864367f3c 100644 --- a/examples/foundational/01-say-one-thing-rime.py +++ b/examples/foundational/01-say-one-thing-rime.py @@ -18,8 +18,8 @@ from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.rime.tts import RimeHttpTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/01-say-one-thing.py b/examples/foundational/01-say-one-thing.py index 1eb89149f..0406ba28d 100644 --- a/examples/foundational/01-say-one-thing.py +++ b/examples/foundational/01-say-one-thing.py @@ -17,8 +17,8 @@ from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/01b-livekit-audio.py b/examples/foundational/01b-livekit-audio.py index 75b73f280..24fa5a8d7 100644 --- a/examples/foundational/01b-livekit-audio.py +++ b/examples/foundational/01b-livekit-audio.py @@ -17,7 +17,7 @@ from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineTask from pipecat.runner.livekit import configure from pipecat.services.cartesia.tts import CartesiaTTSService -from pipecat.transports.services.livekit import LiveKitParams, LiveKitTransport +from pipecat.transports.livekit.transport import LiveKitParams, LiveKitTransport load_dotenv(override=True) diff --git a/examples/foundational/01c-fastpitch.py b/examples/foundational/01c-fastpitch.py index 55c8835a4..3a239a3fd 100644 --- a/examples/foundational/01c-fastpitch.py +++ b/examples/foundational/01c-fastpitch.py @@ -17,8 +17,8 @@ from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.riva.tts import FastPitchTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/02-llm-say-one-thing.py b/examples/foundational/02-llm-say-one-thing.py index 1282f3788..d04ff6df7 100644 --- a/examples/foundational/02-llm-say-one-thing.py +++ b/examples/foundational/02-llm-say-one-thing.py @@ -22,8 +22,8 @@ from pipecat.runner.utils import create_transport from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/03-still-frame.py b/examples/foundational/03-still-frame.py index 6b821ccb2..4769d5e1b 100644 --- a/examples/foundational/03-still-frame.py +++ b/examples/foundational/03-still-frame.py @@ -18,7 +18,7 @@ from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.fal.image import FalImageGenService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams load_dotenv(override=True) diff --git a/examples/foundational/03b-still-frame-imagen.py b/examples/foundational/03b-still-frame-imagen.py index 8e81f0dc8..409b2202a 100644 --- a/examples/foundational/03b-still-frame-imagen.py +++ b/examples/foundational/03b-still-frame-imagen.py @@ -17,7 +17,7 @@ from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.google.image import GoogleImageGenService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams load_dotenv(override=True) diff --git a/examples/foundational/04-transports-small-webrtc.py b/examples/foundational/04-transports-small-webrtc.py index 9d53f03df..027ae1e8c 100644 --- a/examples/foundational/04-transports-small-webrtc.py +++ b/examples/foundational/04-transports-small-webrtc.py @@ -27,8 +27,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import TransportParams -from pipecat.transports.network.small_webrtc import SmallWebRTCTransport -from pipecat.transports.network.webrtc_connection import IceServer, SmallWebRTCConnection +from pipecat.transports.smallwebrtc.connection import IceServer, SmallWebRTCConnection +from pipecat.transports.smallwebrtc.transport import SmallWebRTCTransport load_dotenv(override=True) diff --git a/examples/foundational/04a-transports-daily.py b/examples/foundational/04a-transports-daily.py index 8557b4a5f..64ddf04c9 100644 --- a/examples/foundational/04a-transports-daily.py +++ b/examples/foundational/04a-transports-daily.py @@ -21,7 +21,7 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.runner.daily import configure from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.services.daily import DailyLogLevel, DailyParams, DailyTransport +from pipecat.transports.daily.transport import DailyLogLevel, DailyParams, DailyTransport load_dotenv(override=True) diff --git a/examples/foundational/04b-transports-livekit.py b/examples/foundational/04b-transports-livekit.py index cddb68b90..5a80d149f 100644 --- a/examples/foundational/04b-transports-livekit.py +++ b/examples/foundational/04b-transports-livekit.py @@ -28,7 +28,7 @@ from pipecat.runner.livekit import configure from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.services.livekit import LiveKitParams, LiveKitTransport +from pipecat.transports.livekit.transport import LiveKitParams, LiveKitTransport load_dotenv(override=True) diff --git a/examples/foundational/05-sync-speech-and-image.py b/examples/foundational/05-sync-speech-and-image.py index 384ef617c..c4968245a 100644 --- a/examples/foundational/05-sync-speech-and-image.py +++ b/examples/foundational/05-sync-speech-and-image.py @@ -33,7 +33,7 @@ from pipecat.services.cartesia.tts import CartesiaHttpTTSService from pipecat.services.fal.image import FalImageGenService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams load_dotenv(override=True) diff --git a/examples/foundational/06-listen-and-respond.py b/examples/foundational/06-listen-and-respond.py index 7791f7ba0..7e6e642ef 100644 --- a/examples/foundational/06-listen-and-respond.py +++ b/examples/foundational/06-listen-and-respond.py @@ -28,8 +28,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/06a-image-sync.py b/examples/foundational/06a-image-sync.py index a2cf0e5ab..21998fbf2 100644 --- a/examples/foundational/06a-image-sync.py +++ b/examples/foundational/06a-image-sync.py @@ -29,7 +29,7 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams load_dotenv(override=True) diff --git a/examples/foundational/07-interruptible-cartesia-http.py b/examples/foundational/07-interruptible-cartesia-http.py index dbcd558b9..fa7615643 100644 --- a/examples/foundational/07-interruptible-cartesia-http.py +++ b/examples/foundational/07-interruptible-cartesia-http.py @@ -21,8 +21,8 @@ from pipecat.services.cartesia.tts import CartesiaHttpTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07-interruptible.py b/examples/foundational/07-interruptible.py index 60ad96d56..86f47ebdf 100644 --- a/examples/foundational/07-interruptible.py +++ b/examples/foundational/07-interruptible.py @@ -21,8 +21,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07a-interruptible-speechmatics-vad.py b/examples/foundational/07a-interruptible-speechmatics-vad.py index f5bd59bf0..09777c9b7 100644 --- a/examples/foundational/07a-interruptible-speechmatics-vad.py +++ b/examples/foundational/07a-interruptible-speechmatics-vad.py @@ -25,8 +25,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.speechmatics.stt import SpeechmaticsSTTService from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07a-interruptible-speechmatics.py b/examples/foundational/07a-interruptible-speechmatics.py index edfd03446..31e82c3c9 100644 --- a/examples/foundational/07a-interruptible-speechmatics.py +++ b/examples/foundational/07a-interruptible-speechmatics.py @@ -26,8 +26,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.speechmatics.stt import SpeechmaticsSTTService from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07aa-interruptible-soniox.py b/examples/foundational/07aa-interruptible-soniox.py index 293fb8d65..3ef2caf59 100644 --- a/examples/foundational/07aa-interruptible-soniox.py +++ b/examples/foundational/07aa-interruptible-soniox.py @@ -22,8 +22,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.soniox.stt import SonioxSTTService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07ab-interruptible-inworld-http.py b/examples/foundational/07ab-interruptible-inworld-http.py index 255ccab9e..60666bbf6 100644 --- a/examples/foundational/07ab-interruptible-inworld-http.py +++ b/examples/foundational/07ab-interruptible-inworld-http.py @@ -23,8 +23,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.inworld.tts import InworldTTSService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07ac-interruptible-asyncai-http.py b/examples/foundational/07ac-interruptible-asyncai-http.py index f1cb79ee7..eb949a171 100644 --- a/examples/foundational/07ac-interruptible-asyncai-http.py +++ b/examples/foundational/07ac-interruptible-asyncai-http.py @@ -23,8 +23,8 @@ from pipecat.services.asyncai.tts import AsyncAIHttpTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07ac-interruptible-asyncai.py b/examples/foundational/07ac-interruptible-asyncai.py index f4d9221a4..98e6c5ccb 100644 --- a/examples/foundational/07ac-interruptible-asyncai.py +++ b/examples/foundational/07ac-interruptible-asyncai.py @@ -22,8 +22,8 @@ from pipecat.services.asyncai.tts import AsyncAITTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07ad-interruptible-aicoustics.py b/examples/foundational/07ad-interruptible-aicoustics.py index 47fcc19d1..49b626054 100644 --- a/examples/foundational/07ad-interruptible-aicoustics.py +++ b/examples/foundational/07ad-interruptible-aicoustics.py @@ -26,8 +26,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07b-interruptible-langchain.py b/examples/foundational/07b-interruptible-langchain.py index 4afe73315..c6f6e18a8 100644 --- a/examples/foundational/07b-interruptible-langchain.py +++ b/examples/foundational/07b-interruptible-langchain.py @@ -33,8 +33,8 @@ from pipecat.runner.utils import create_transport from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07c-interruptible-deepgram-vad.py b/examples/foundational/07c-interruptible-deepgram-vad.py index 302229655..3569fc440 100644 --- a/examples/foundational/07c-interruptible-deepgram-vad.py +++ b/examples/foundational/07c-interruptible-deepgram-vad.py @@ -27,8 +27,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.deepgram.tts import DeepgramTTSService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07c-interruptible-deepgram.py b/examples/foundational/07c-interruptible-deepgram.py index 01177954b..ff78e24f5 100644 --- a/examples/foundational/07c-interruptible-deepgram.py +++ b/examples/foundational/07c-interruptible-deepgram.py @@ -22,8 +22,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.deepgram.tts import DeepgramTTSService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07d-interruptible-elevenlabs-http.py b/examples/foundational/07d-interruptible-elevenlabs-http.py index 9d5fe1a11..44a680a64 100644 --- a/examples/foundational/07d-interruptible-elevenlabs-http.py +++ b/examples/foundational/07d-interruptible-elevenlabs-http.py @@ -23,8 +23,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.elevenlabs.tts import ElevenLabsHttpTTSService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07d-interruptible-elevenlabs.py b/examples/foundational/07d-interruptible-elevenlabs.py index 1c9612ed6..71c95ca7b 100644 --- a/examples/foundational/07d-interruptible-elevenlabs.py +++ b/examples/foundational/07d-interruptible-elevenlabs.py @@ -22,8 +22,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.elevenlabs.tts import ElevenLabsTTSService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07e-interruptible-playht-http.py b/examples/foundational/07e-interruptible-playht-http.py index d1e1cc083..f28ed2a3a 100644 --- a/examples/foundational/07e-interruptible-playht-http.py +++ b/examples/foundational/07e-interruptible-playht-http.py @@ -22,8 +22,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.playht.tts import PlayHTHttpTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07e-interruptible-playht.py b/examples/foundational/07e-interruptible-playht.py index be9d5e38a..ec450c285 100644 --- a/examples/foundational/07e-interruptible-playht.py +++ b/examples/foundational/07e-interruptible-playht.py @@ -23,8 +23,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.playht.tts import PlayHTTTSService from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07f-interruptible-azure.py b/examples/foundational/07f-interruptible-azure.py index b7ebab18f..d87a46984 100644 --- a/examples/foundational/07f-interruptible-azure.py +++ b/examples/foundational/07f-interruptible-azure.py @@ -22,8 +22,8 @@ from pipecat.services.azure.llm import AzureLLMService from pipecat.services.azure.stt import AzureSTTService from pipecat.services.azure.tts import AzureTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07g-interruptible-openai.py b/examples/foundational/07g-interruptible-openai.py index 9f44df3b7..9c67e22e3 100644 --- a/examples/foundational/07g-interruptible-openai.py +++ b/examples/foundational/07g-interruptible-openai.py @@ -22,8 +22,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.openai.stt import OpenAISTTService from pipecat.services.openai.tts import OpenAITTSService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07h-interruptible-openpipe.py b/examples/foundational/07h-interruptible-openpipe.py index f93184141..fccf1db5b 100644 --- a/examples/foundational/07h-interruptible-openpipe.py +++ b/examples/foundational/07h-interruptible-openpipe.py @@ -23,8 +23,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openpipe.llm import OpenPipeLLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07i-interruptible-xtts.py b/examples/foundational/07i-interruptible-xtts.py index 6da7576ab..999b78fb3 100644 --- a/examples/foundational/07i-interruptible-xtts.py +++ b/examples/foundational/07i-interruptible-xtts.py @@ -23,8 +23,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.xtts.tts import XTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07j-interruptible-gladia.py b/examples/foundational/07j-interruptible-gladia.py index 6c9258f18..684d80abe 100644 --- a/examples/foundational/07j-interruptible-gladia.py +++ b/examples/foundational/07j-interruptible-gladia.py @@ -24,8 +24,8 @@ from pipecat.services.gladia.stt import GladiaSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07k-interruptible-lmnt.py b/examples/foundational/07k-interruptible-lmnt.py index 9aecfb667..1d13715b6 100644 --- a/examples/foundational/07k-interruptible-lmnt.py +++ b/examples/foundational/07k-interruptible-lmnt.py @@ -22,8 +22,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.lmnt.tts import LmntTTSService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07l-interruptible-groq.py b/examples/foundational/07l-interruptible-groq.py index 99aca0443..b6c242284 100644 --- a/examples/foundational/07l-interruptible-groq.py +++ b/examples/foundational/07l-interruptible-groq.py @@ -23,8 +23,8 @@ from pipecat.services.groq.llm import GroqLLMService from pipecat.services.groq.stt import GroqSTTService from pipecat.services.groq.tts import GroqTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07m-interruptible-aws.py b/examples/foundational/07m-interruptible-aws.py index aca20a0c1..ef1d14d53 100644 --- a/examples/foundational/07m-interruptible-aws.py +++ b/examples/foundational/07m-interruptible-aws.py @@ -20,8 +20,8 @@ from pipecat.services.aws.llm import AWSBedrockLLMService from pipecat.services.aws.stt import AWSTranscribeSTTService from pipecat.services.aws.tts import AWSPollyTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07n-interruptible-gemini.py b/examples/foundational/07n-interruptible-gemini.py index 6e4cb10a2..588831db3 100644 --- a/examples/foundational/07n-interruptible-gemini.py +++ b/examples/foundational/07n-interruptible-gemini.py @@ -41,8 +41,8 @@ from pipecat.services.google.stt import GoogleSTTService from pipecat.services.google.tts import GeminiTTSService from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07n-interruptible-google.py b/examples/foundational/07n-interruptible-google.py index 771a0ce98..930f965a6 100644 --- a/examples/foundational/07n-interruptible-google.py +++ b/examples/foundational/07n-interruptible-google.py @@ -23,8 +23,8 @@ from pipecat.services.google.stt import GoogleSTTService from pipecat.services.google.tts import GoogleTTSService from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07o-interruptible-assemblyai.py b/examples/foundational/07o-interruptible-assemblyai.py index ce45d79eb..c482f185a 100644 --- a/examples/foundational/07o-interruptible-assemblyai.py +++ b/examples/foundational/07o-interruptible-assemblyai.py @@ -22,8 +22,8 @@ from pipecat.services.assemblyai.stt import AssemblyAISTTService from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07p-interruptible-krisp.py b/examples/foundational/07p-interruptible-krisp.py index 2c18c82e1..34dfe9f87 100644 --- a/examples/foundational/07p-interruptible-krisp.py +++ b/examples/foundational/07p-interruptible-krisp.py @@ -23,8 +23,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.deepgram.tts import DeepgramTTSService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07q-interruptible-rime-http.py b/examples/foundational/07q-interruptible-rime-http.py index 40be98cd1..c25a696a9 100644 --- a/examples/foundational/07q-interruptible-rime-http.py +++ b/examples/foundational/07q-interruptible-rime-http.py @@ -23,8 +23,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.rime.tts import RimeHttpTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07q-interruptible-rime.py b/examples/foundational/07q-interruptible-rime.py index a21dca215..70d124888 100644 --- a/examples/foundational/07q-interruptible-rime.py +++ b/examples/foundational/07q-interruptible-rime.py @@ -22,8 +22,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.rime.tts import RimeTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07r-interruptible-riva-nim.py b/examples/foundational/07r-interruptible-riva-nim.py index fec903fbb..700717efa 100644 --- a/examples/foundational/07r-interruptible-riva-nim.py +++ b/examples/foundational/07r-interruptible-riva-nim.py @@ -22,8 +22,8 @@ from pipecat.services.nim.llm import NimLLMService from pipecat.services.riva.stt import RivaSTTService from pipecat.services.riva.tts import RivaTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07s-interruptible-google-audio-in.py b/examples/foundational/07s-interruptible-google-audio-in.py index 582c8a7ed..cc317bc9b 100644 --- a/examples/foundational/07s-interruptible-google-audio-in.py +++ b/examples/foundational/07s-interruptible-google-audio-in.py @@ -36,8 +36,8 @@ from pipecat.services.google.llm import GoogleLLMService from pipecat.services.google.tts import GoogleTTSService from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07t-interruptible-fish.py b/examples/foundational/07t-interruptible-fish.py index 3396185d5..748116997 100644 --- a/examples/foundational/07t-interruptible-fish.py +++ b/examples/foundational/07t-interruptible-fish.py @@ -22,8 +22,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.fish.tts import FishAudioTTSService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07u-interruptible-ultravox.py b/examples/foundational/07u-interruptible-ultravox.py index f561725bc..d603a4173 100644 --- a/examples/foundational/07u-interruptible-ultravox.py +++ b/examples/foundational/07u-interruptible-ultravox.py @@ -19,8 +19,8 @@ from pipecat.runner.utils import create_transport from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.ultravox.stt import UltravoxSTTService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07v-interruptible-neuphonic-http.py b/examples/foundational/07v-interruptible-neuphonic-http.py index 870adad73..ce8b12143 100644 --- a/examples/foundational/07v-interruptible-neuphonic-http.py +++ b/examples/foundational/07v-interruptible-neuphonic-http.py @@ -23,8 +23,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.neuphonic.tts import NeuphonicHttpTTSService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07v-interruptible-neuphonic.py b/examples/foundational/07v-interruptible-neuphonic.py index 12ad2be53..aa335ca86 100644 --- a/examples/foundational/07v-interruptible-neuphonic.py +++ b/examples/foundational/07v-interruptible-neuphonic.py @@ -22,8 +22,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.neuphonic.tts import NeuphonicTTSService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07w-interruptible-fal.py b/examples/foundational/07w-interruptible-fal.py index 29d107210..3254b29d6 100644 --- a/examples/foundational/07w-interruptible-fal.py +++ b/examples/foundational/07w-interruptible-fal.py @@ -22,8 +22,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.fal.stt import FalSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07y-interruptible-minimax.py b/examples/foundational/07y-interruptible-minimax.py index a0b7d33a8..6d6dc061a 100644 --- a/examples/foundational/07y-interruptible-minimax.py +++ b/examples/foundational/07y-interruptible-minimax.py @@ -24,8 +24,8 @@ from pipecat.services.minimax.tts import MiniMaxHttpTTSService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07z-interruptible-sarvam-http.py b/examples/foundational/07z-interruptible-sarvam-http.py index e75ef7909..ec2cc0486 100644 --- a/examples/foundational/07z-interruptible-sarvam-http.py +++ b/examples/foundational/07z-interruptible-sarvam-http.py @@ -23,8 +23,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.sarvam.tts import SarvamHttpTTSService from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/07z-interruptible-sarvam.py b/examples/foundational/07z-interruptible-sarvam.py index 67a386e4f..dd15131ac 100644 --- a/examples/foundational/07z-interruptible-sarvam.py +++ b/examples/foundational/07z-interruptible-sarvam.py @@ -24,8 +24,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.sarvam.tts import SarvamTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/08-bots-arguing.py b/examples/foundational/08-bots-arguing.py index a38bc273a..a5d1b98f6 100644 --- a/examples/foundational/08-bots-arguing.py +++ b/examples/foundational/08-bots-arguing.py @@ -17,7 +17,7 @@ from pipecat.runner.daily import configure from pipecat.services.azure import AzureLLMService, AzureTTSService from pipecat.services.elevenlabs import ElevenLabsTTSService from pipecat.services.fal import FalImageGenService -from pipecat.transports.services.daily import DailyTransport +from pipecat.transports.daily.transport import DailyTransport load_dotenv(override=True) diff --git a/examples/foundational/09-mirror.py b/examples/foundational/09-mirror.py index d2f292912..b53851644 100644 --- a/examples/foundational/09-mirror.py +++ b/examples/foundational/09-mirror.py @@ -22,7 +22,7 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams load_dotenv(override=True) diff --git a/examples/foundational/09a-local-mirror.py b/examples/foundational/09a-local-mirror.py index a854b248b..f7f634e08 100644 --- a/examples/foundational/09a-local-mirror.py +++ b/examples/foundational/09a-local-mirror.py @@ -24,8 +24,8 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport, maybe_capture_participant_camera from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams from pipecat.transports.local.tk import TkLocalTransport, TkTransportParams -from pipecat.transports.services.daily import DailyParams load_dotenv(override=True) diff --git a/examples/foundational/10-wake-phrase.py b/examples/foundational/10-wake-phrase.py index 7f4a3fe2b..d318b3536 100644 --- a/examples/foundational/10-wake-phrase.py +++ b/examples/foundational/10-wake-phrase.py @@ -22,8 +22,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/11-sound-effects.py b/examples/foundational/11-sound-effects.py index f2dabece9..70104190f 100644 --- a/examples/foundational/11-sound-effects.py +++ b/examples/foundational/11-sound-effects.py @@ -32,8 +32,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/12-describe-video.py b/examples/foundational/12-describe-video.py index fad52c2de..d221aea21 100644 --- a/examples/foundational/12-describe-video.py +++ b/examples/foundational/12-describe-video.py @@ -28,7 +28,7 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.moondream.vision import MoondreamService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams load_dotenv(override=True) diff --git a/examples/foundational/12a-describe-video-gemini-flash.py b/examples/foundational/12a-describe-video-gemini-flash.py index 090dd6db6..0ad511e27 100644 --- a/examples/foundational/12a-describe-video-gemini-flash.py +++ b/examples/foundational/12a-describe-video-gemini-flash.py @@ -28,7 +28,7 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.google.llm import GoogleLLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams load_dotenv(override=True) diff --git a/examples/foundational/12b-describe-video-gpt-4o.py b/examples/foundational/12b-describe-video-gpt-4o.py index f495801c7..0451a0341 100644 --- a/examples/foundational/12b-describe-video-gpt-4o.py +++ b/examples/foundational/12b-describe-video-gpt-4o.py @@ -28,7 +28,7 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams load_dotenv(override=True) diff --git a/examples/foundational/12c-describe-video-anthropic.py b/examples/foundational/12c-describe-video-anthropic.py index cc48d1567..54ebc20bf 100644 --- a/examples/foundational/12c-describe-video-anthropic.py +++ b/examples/foundational/12c-describe-video-anthropic.py @@ -28,7 +28,7 @@ from pipecat.services.anthropic.llm import AnthropicLLMService from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams load_dotenv(override=True) diff --git a/examples/foundational/13-whisper-transcription.py b/examples/foundational/13-whisper-transcription.py index d966bc6f3..accd88c5f 100644 --- a/examples/foundational/13-whisper-transcription.py +++ b/examples/foundational/13-whisper-transcription.py @@ -18,8 +18,8 @@ from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.whisper.stt import WhisperSTTService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/13b-deepgram-transcription.py b/examples/foundational/13b-deepgram-transcription.py index c029c20b0..91e271acb 100644 --- a/examples/foundational/13b-deepgram-transcription.py +++ b/examples/foundational/13b-deepgram-transcription.py @@ -18,8 +18,8 @@ from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.deepgram.stt import DeepgramSTTService, Language, LiveOptions from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/13c-gladia-transcription.py b/examples/foundational/13c-gladia-transcription.py index 9e856d80a..9b6dc9c8e 100644 --- a/examples/foundational/13c-gladia-transcription.py +++ b/examples/foundational/13c-gladia-transcription.py @@ -18,8 +18,8 @@ from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.gladia import GladiaSTTService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/13c-gladia-translation.py b/examples/foundational/13c-gladia-translation.py index 2c686239c..50ebe9870 100644 --- a/examples/foundational/13c-gladia-translation.py +++ b/examples/foundational/13c-gladia-translation.py @@ -25,8 +25,8 @@ from pipecat.services.gladia.config import ( from pipecat.services.gladia.stt import GladiaSTTService from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/13d-assemblyai-transcription.py b/examples/foundational/13d-assemblyai-transcription.py index ea4578762..4fb73efff 100644 --- a/examples/foundational/13d-assemblyai-transcription.py +++ b/examples/foundational/13d-assemblyai-transcription.py @@ -18,8 +18,8 @@ from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.assemblyai.stt import AssemblyAISTTService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/13e-whisper-mlx.py b/examples/foundational/13e-whisper-mlx.py index f7eee6e16..6756b9b9a 100644 --- a/examples/foundational/13e-whisper-mlx.py +++ b/examples/foundational/13e-whisper-mlx.py @@ -20,8 +20,8 @@ from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.whisper.stt import MLXModel, WhisperSTTServiceMLX from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/13f-cartesia-transcription.py b/examples/foundational/13f-cartesia-transcription.py index cce44d25e..6b0e22408 100644 --- a/examples/foundational/13f-cartesia-transcription.py +++ b/examples/foundational/13f-cartesia-transcription.py @@ -18,8 +18,8 @@ from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.cartesia.stt import CartesiaSTTService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/13g-sambanova-transcription.py b/examples/foundational/13g-sambanova-transcription.py index 51cb2b937..c7e29c788 100644 --- a/examples/foundational/13g-sambanova-transcription.py +++ b/examples/foundational/13g-sambanova-transcription.py @@ -21,8 +21,8 @@ from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.sambanova.stt import SambaNovaSTTService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/13h-speechmatics-transcription.py b/examples/foundational/13h-speechmatics-transcription.py index f84a76ed4..13c127e9c 100644 --- a/examples/foundational/13h-speechmatics-transcription.py +++ b/examples/foundational/13h-speechmatics-transcription.py @@ -19,8 +19,8 @@ from pipecat.runner.utils import create_transport from pipecat.services.speechmatics.stt import SpeechmaticsSTTService from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/13i-soniox-transcription.py b/examples/foundational/13i-soniox-transcription.py index e6db08f98..2b0a4355c 100644 --- a/examples/foundational/13i-soniox-transcription.py +++ b/examples/foundational/13i-soniox-transcription.py @@ -19,8 +19,8 @@ from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.soniox.stt import SonioxSTTService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/13j-azure-transcription.py b/examples/foundational/13j-azure-transcription.py index 66834a30b..f6f9f6348 100644 --- a/examples/foundational/13j-azure-transcription.py +++ b/examples/foundational/13j-azure-transcription.py @@ -19,8 +19,8 @@ from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.azure.stt import AzureSTTService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/14-function-calling.py b/examples/foundational/14-function-calling.py index 9fef69e9e..d338d9caa 100644 --- a/examples/foundational/14-function-calling.py +++ b/examples/foundational/14-function-calling.py @@ -24,8 +24,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/14a-function-calling-anthropic.py b/examples/foundational/14a-function-calling-anthropic.py index 5be4fac31..4e795e5f7 100644 --- a/examples/foundational/14a-function-calling-anthropic.py +++ b/examples/foundational/14a-function-calling-anthropic.py @@ -25,8 +25,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/14b-function-calling-anthropic-video.py b/examples/foundational/14b-function-calling-anthropic-video.py index 2f19cc3c2..fee1a9574 100644 --- a/examples/foundational/14b-function-calling-anthropic-video.py +++ b/examples/foundational/14b-function-calling-anthropic-video.py @@ -30,7 +30,7 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams load_dotenv(override=True) diff --git a/examples/foundational/14c-function-calling-together.py b/examples/foundational/14c-function-calling-together.py index 49add367c..bd2e9931c 100644 --- a/examples/foundational/14c-function-calling-together.py +++ b/examples/foundational/14c-function-calling-together.py @@ -25,8 +25,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.together.llm import TogetherLLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/14d-function-calling-video.py b/examples/foundational/14d-function-calling-video.py index d6f45c1a8..dd3b9144a 100644 --- a/examples/foundational/14d-function-calling-video.py +++ b/examples/foundational/14d-function-calling-video.py @@ -30,7 +30,7 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams load_dotenv(override=True) diff --git a/examples/foundational/14e-function-calling-google.py b/examples/foundational/14e-function-calling-google.py index 209a9e654..6fef65a53 100644 --- a/examples/foundational/14e-function-calling-google.py +++ b/examples/foundational/14e-function-calling-google.py @@ -30,7 +30,7 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.google.llm import GoogleLLMService from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams load_dotenv(override=True) diff --git a/examples/foundational/14f-function-calling-groq.py b/examples/foundational/14f-function-calling-groq.py index 77c56c986..829956680 100644 --- a/examples/foundational/14f-function-calling-groq.py +++ b/examples/foundational/14f-function-calling-groq.py @@ -26,8 +26,8 @@ from pipecat.services.groq.llm import GroqLLMService from pipecat.services.groq.stt import GroqSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/14g-function-calling-grok.py b/examples/foundational/14g-function-calling-grok.py index b18c4a884..29a633ee0 100644 --- a/examples/foundational/14g-function-calling-grok.py +++ b/examples/foundational/14g-function-calling-grok.py @@ -25,8 +25,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.grok.llm import GrokLLMService from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/14h-function-calling-azure.py b/examples/foundational/14h-function-calling-azure.py index 74a22f564..a41343cce 100644 --- a/examples/foundational/14h-function-calling-azure.py +++ b/examples/foundational/14h-function-calling-azure.py @@ -25,8 +25,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/14i-function-calling-fireworks.py b/examples/foundational/14i-function-calling-fireworks.py index 4adc65525..95ddf5b00 100644 --- a/examples/foundational/14i-function-calling-fireworks.py +++ b/examples/foundational/14i-function-calling-fireworks.py @@ -25,8 +25,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.fireworks.llm import FireworksLLMService from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/14j-function-calling-nim.py b/examples/foundational/14j-function-calling-nim.py index 5c09b1e67..2b455a271 100644 --- a/examples/foundational/14j-function-calling-nim.py +++ b/examples/foundational/14j-function-calling-nim.py @@ -25,8 +25,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.nim.llm import NimLLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/14k-function-calling-cerebras.py b/examples/foundational/14k-function-calling-cerebras.py index b96a979c5..4317e20a0 100644 --- a/examples/foundational/14k-function-calling-cerebras.py +++ b/examples/foundational/14k-function-calling-cerebras.py @@ -25,8 +25,8 @@ from pipecat.services.cerebras.llm import CerebrasLLMService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/14l-function-calling-deepseek.py b/examples/foundational/14l-function-calling-deepseek.py index cbf9c3437..25f0d31c7 100644 --- a/examples/foundational/14l-function-calling-deepseek.py +++ b/examples/foundational/14l-function-calling-deepseek.py @@ -25,8 +25,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.deepseek.llm import DeepSeekLLMService from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/14m-function-calling-openrouter.py b/examples/foundational/14m-function-calling-openrouter.py index 0fb5e1e8d..7b07fde48 100644 --- a/examples/foundational/14m-function-calling-openrouter.py +++ b/examples/foundational/14m-function-calling-openrouter.py @@ -25,8 +25,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.openrouter.llm import OpenRouterLLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/14n-function-calling-perplexity.py b/examples/foundational/14n-function-calling-perplexity.py index bb5132118..edf014e08 100644 --- a/examples/foundational/14n-function-calling-perplexity.py +++ b/examples/foundational/14n-function-calling-perplexity.py @@ -28,8 +28,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.perplexity.llm import PerplexityLLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/14o-function-calling-gemini-openai-format.py b/examples/foundational/14o-function-calling-gemini-openai-format.py index 4c97becfb..d64738226 100644 --- a/examples/foundational/14o-function-calling-gemini-openai-format.py +++ b/examples/foundational/14o-function-calling-gemini-openai-format.py @@ -25,8 +25,8 @@ from pipecat.services.elevenlabs.tts import ElevenLabsTTSService from pipecat.services.google.llm_openai import GoogleLLMOpenAIBetaService from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/14p-function-calling-gemini-vertex-ai.py b/examples/foundational/14p-function-calling-gemini-vertex-ai.py index 7144059c1..bd9518be2 100644 --- a/examples/foundational/14p-function-calling-gemini-vertex-ai.py +++ b/examples/foundational/14p-function-calling-gemini-vertex-ai.py @@ -25,8 +25,8 @@ from pipecat.services.elevenlabs.tts import ElevenLabsTTSService from pipecat.services.google.llm_vertex import GoogleVertexLLMService from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/14q-function-calling-qwen.py b/examples/foundational/14q-function-calling-qwen.py index 305d28e15..a0beed109 100644 --- a/examples/foundational/14q-function-calling-qwen.py +++ b/examples/foundational/14q-function-calling-qwen.py @@ -25,8 +25,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.qwen.llm import QwenLLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/14r-function-calling-aws.py b/examples/foundational/14r-function-calling-aws.py index 73b684e86..795896c1a 100644 --- a/examples/foundational/14r-function-calling-aws.py +++ b/examples/foundational/14r-function-calling-aws.py @@ -23,8 +23,8 @@ from pipecat.services.aws.stt import AWSTranscribeSTTService from pipecat.services.aws.tts import AWSPollyTTSService from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/14s-function-calling-sambanova.py b/examples/foundational/14s-function-calling-sambanova.py index 33c775ddf..435f00b84 100644 --- a/examples/foundational/14s-function-calling-sambanova.py +++ b/examples/foundational/14s-function-calling-sambanova.py @@ -26,8 +26,8 @@ from pipecat.services.llm_service import FunctionCallParams from pipecat.services.sambanova.llm import SambaNovaLLMService from pipecat.services.sambanova.stt import SambaNovaSTTService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/14t-function-calling-direct.py b/examples/foundational/14t-function-calling-direct.py index be8dfc835..1669c4422 100644 --- a/examples/foundational/14t-function-calling-direct.py +++ b/examples/foundational/14t-function-calling-direct.py @@ -24,8 +24,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/14u-function-calling-ollama.py b/examples/foundational/14u-function-calling-ollama.py index 13c97d0f6..08eb48b5b 100644 --- a/examples/foundational/14u-function-calling-ollama.py +++ b/examples/foundational/14u-function-calling-ollama.py @@ -26,8 +26,8 @@ from pipecat.services.llm_service import FunctionCallParams from pipecat.services.ollama.llm import OLLamaLLMService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/14v-function-calling-openai.py b/examples/foundational/14v-function-calling-openai.py index 4c9aee885..3932b5508 100644 --- a/examples/foundational/14v-function-calling-openai.py +++ b/examples/foundational/14v-function-calling-openai.py @@ -24,8 +24,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.openai.stt import OpenAISTTService from pipecat.services.openai.tts import OpenAITTSService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/14w-function-calling-mistral.py b/examples/foundational/14w-function-calling-mistral.py index 155fd9c28..f059b01e6 100644 --- a/examples/foundational/14w-function-calling-mistral.py +++ b/examples/foundational/14w-function-calling-mistral.py @@ -24,8 +24,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.mistral.llm import MistralLLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/14x-function-calling-universal-context.py b/examples/foundational/14x-function-calling-universal-context.py index 3998bbe5e..4d1871d6b 100644 --- a/examples/foundational/14x-function-calling-universal-context.py +++ b/examples/foundational/14x-function-calling-universal-context.py @@ -25,8 +25,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/14y-function-calling-google-universal-context.py b/examples/foundational/14y-function-calling-google-universal-context.py index 43572899e..4652f71f4 100644 --- a/examples/foundational/14y-function-calling-google-universal-context.py +++ b/examples/foundational/14y-function-calling-google-universal-context.py @@ -31,7 +31,7 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.google.llm import GoogleLLMService from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams load_dotenv(override=True) diff --git a/examples/foundational/15-switch-voices.py b/examples/foundational/15-switch-voices.py index f44a07f95..a19d36dae 100644 --- a/examples/foundational/15-switch-voices.py +++ b/examples/foundational/15-switch-voices.py @@ -26,8 +26,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/15a-switch-languages.py b/examples/foundational/15a-switch-languages.py index e34b88dfe..5750530a0 100644 --- a/examples/foundational/15a-switch-languages.py +++ b/examples/foundational/15a-switch-languages.py @@ -27,8 +27,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/16-gpu-container-local-bot.py b/examples/foundational/16-gpu-container-local-bot.py index a4367d4a9..1867e4295 100644 --- a/examples/foundational/16-gpu-container-local-bot.py +++ b/examples/foundational/16-gpu-container-local-bot.py @@ -22,8 +22,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.deepgram.tts import DeepgramTTSService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams, DailyTransportMessageFrame +from pipecat.transports.daily.transport import DailyParams, DailyTransportMessageFrame +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/17-detect-user-idle.py b/examples/foundational/17-detect-user-idle.py index ed9490f08..2cccd079c 100644 --- a/examples/foundational/17-detect-user-idle.py +++ b/examples/foundational/17-detect-user-idle.py @@ -23,8 +23,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/18-gstreamer-filesrc.py b/examples/foundational/18-gstreamer-filesrc.py index 16ad83ebd..eb768c16f 100644 --- a/examples/foundational/18-gstreamer-filesrc.py +++ b/examples/foundational/18-gstreamer-filesrc.py @@ -16,7 +16,7 @@ from pipecat.processors.gstreamer.pipeline_source import GStreamerPipelineSource from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams load_dotenv(override=True) diff --git a/examples/foundational/18a-gstreamer-videotestsrc.py b/examples/foundational/18a-gstreamer-videotestsrc.py index 4a8200045..db1a732b6 100644 --- a/examples/foundational/18a-gstreamer-videotestsrc.py +++ b/examples/foundational/18a-gstreamer-videotestsrc.py @@ -15,7 +15,7 @@ from pipecat.processors.gstreamer.pipeline_source import GStreamerPipelineSource from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams load_dotenv(override=True) diff --git a/examples/foundational/19-openai-realtime-beta.py b/examples/foundational/19-openai-realtime-beta.py index 9306c3303..333e7c053 100644 --- a/examples/foundational/19-openai-realtime-beta.py +++ b/examples/foundational/19-openai-realtime-beta.py @@ -31,8 +31,8 @@ from pipecat.services.openai_realtime_beta import ( SessionProperties, ) from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/19a-azure-realtime-beta.py b/examples/foundational/19a-azure-realtime-beta.py index 1ee156710..9c463a98d 100644 --- a/examples/foundational/19a-azure-realtime-beta.py +++ b/examples/foundational/19a-azure-realtime-beta.py @@ -28,8 +28,8 @@ from pipecat.services.openai_realtime_beta import ( SessionProperties, ) from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/19b-openai-realtime-beta-text.py b/examples/foundational/19b-openai-realtime-beta-text.py index c00e679df..63a699ae9 100644 --- a/examples/foundational/19b-openai-realtime-beta-text.py +++ b/examples/foundational/19b-openai-realtime-beta-text.py @@ -32,8 +32,8 @@ from pipecat.services.openai_realtime_beta import ( SessionProperties, ) from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/20a-persistent-context-openai.py b/examples/foundational/20a-persistent-context-openai.py index 21bb9e128..dfc3c2ba5 100644 --- a/examples/foundational/20a-persistent-context-openai.py +++ b/examples/foundational/20a-persistent-context-openai.py @@ -28,8 +28,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/20b-persistent-context-openai-realtime.py b/examples/foundational/20b-persistent-context-openai-realtime.py index 247077df1..7aa75e4aa 100644 --- a/examples/foundational/20b-persistent-context-openai-realtime.py +++ b/examples/foundational/20b-persistent-context-openai-realtime.py @@ -32,8 +32,8 @@ from pipecat.services.openai_realtime_beta import ( TurnDetection, ) from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/20c-persistent-context-anthropic.py b/examples/foundational/20c-persistent-context-anthropic.py index 4fb1c3a3b..2bcb60d09 100644 --- a/examples/foundational/20c-persistent-context-anthropic.py +++ b/examples/foundational/20c-persistent-context-anthropic.py @@ -28,8 +28,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/20d-persistent-context-gemini.py b/examples/foundational/20d-persistent-context-gemini.py index 358de6e4f..ccb44d3c0 100644 --- a/examples/foundational/20d-persistent-context-gemini.py +++ b/examples/foundational/20d-persistent-context-gemini.py @@ -32,7 +32,7 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.google.llm import GoogleLLMService from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams load_dotenv(override=True) diff --git a/examples/foundational/20e-persistent-context-aws-nova-sonic.py b/examples/foundational/20e-persistent-context-aws-nova-sonic.py index 112581f5c..bd3d9d545 100644 --- a/examples/foundational/20e-persistent-context-aws-nova-sonic.py +++ b/examples/foundational/20e-persistent-context-aws-nova-sonic.py @@ -26,8 +26,8 @@ from pipecat.runner.utils import create_transport from pipecat.services.aws_nova_sonic.aws import AWSNovaSonicLLMService from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/21-tavus-transport.py b/examples/foundational/21-tavus-transport.py index 1818c0fcc..5b370be8d 100644 --- a/examples/foundational/21-tavus-transport.py +++ b/examples/foundational/21-tavus-transport.py @@ -18,12 +18,10 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext -from pipecat.runner.types import RunnerArguments -from pipecat.runner.utils import create_transport from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.google.llm import GoogleLLMService -from pipecat.transports.services.tavus import TavusParams, TavusTransport +from pipecat.transports.tavus.transport import TavusParams, TavusTransport load_dotenv(override=True) diff --git a/examples/foundational/21a-tavus-video-service.py b/examples/foundational/21a-tavus-video-service.py index 670015077..bc96ba8d5 100644 --- a/examples/foundational/21a-tavus-video-service.py +++ b/examples/foundational/21a-tavus-video-service.py @@ -24,7 +24,7 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.google.llm import GoogleLLMService from pipecat.services.tavus.video import TavusVideoService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams load_dotenv(override=True) diff --git a/examples/foundational/22-natural-conversation.py b/examples/foundational/22-natural-conversation.py index 6f9c02967..4907aa137 100644 --- a/examples/foundational/22-natural-conversation.py +++ b/examples/foundational/22-natural-conversation.py @@ -29,8 +29,8 @@ from pipecat.services.llm_service import LLMService from pipecat.services.openai.llm import OpenAIContextAggregatorPair, OpenAILLMService from pipecat.sync.event_notifier import EventNotifier from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/22b-natural-conversation-proposal.py b/examples/foundational/22b-natural-conversation-proposal.py index a230b2c30..dc70d0379 100644 --- a/examples/foundational/22b-natural-conversation-proposal.py +++ b/examples/foundational/22b-natural-conversation-proposal.py @@ -48,8 +48,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.sync.base_notifier import BaseNotifier from pipecat.sync.event_notifier import EventNotifier from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams from pipecat.utils.time import time_now_iso8601 load_dotenv(override=True) diff --git a/examples/foundational/22c-natural-conversation-mixed-llms.py b/examples/foundational/22c-natural-conversation-mixed-llms.py index 3a68227fd..44f3f1349 100644 --- a/examples/foundational/22c-natural-conversation-mixed-llms.py +++ b/examples/foundational/22c-natural-conversation-mixed-llms.py @@ -49,8 +49,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.sync.base_notifier import BaseNotifier from pipecat.sync.event_notifier import EventNotifier from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams from pipecat.utils.time import time_now_iso8601 load_dotenv(override=True) diff --git a/examples/foundational/22d-natural-conversation-gemini-audio.py b/examples/foundational/22d-natural-conversation-gemini-audio.py index 1e03c3d45..5eed49092 100644 --- a/examples/foundational/22d-natural-conversation-gemini-audio.py +++ b/examples/foundational/22d-natural-conversation-gemini-audio.py @@ -52,8 +52,8 @@ from pipecat.services.llm_service import LLMService from pipecat.sync.base_notifier import BaseNotifier from pipecat.sync.event_notifier import EventNotifier from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams from pipecat.utils.time import time_now_iso8601 load_dotenv(override=True) diff --git a/examples/foundational/23-bot-background-sound.py b/examples/foundational/23-bot-background-sound.py index eb03db232..0bfec0c6a 100644 --- a/examples/foundational/23-bot-background-sound.py +++ b/examples/foundational/23-bot-background-sound.py @@ -24,8 +24,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/24-stt-mute-filter.py b/examples/foundational/24-stt-mute-filter.py index 9755c10ea..4dfa25c42 100644 --- a/examples/foundational/24-stt-mute-filter.py +++ b/examples/foundational/24-stt-mute-filter.py @@ -27,8 +27,8 @@ from pipecat.services.deepgram.tts import DeepgramTTSService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/25-google-audio-in.py b/examples/foundational/25-google-audio-in.py index d32742ba2..dd403c08a 100644 --- a/examples/foundational/25-google-audio-in.py +++ b/examples/foundational/25-google-audio-in.py @@ -37,8 +37,8 @@ from pipecat.runner.utils import create_transport from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.google.llm import GoogleLLMContext, GoogleLLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/26-gemini-multimodal-live.py b/examples/foundational/26-gemini-multimodal-live.py index 65feb6d3c..b446a9b8c 100644 --- a/examples/foundational/26-gemini-multimodal-live.py +++ b/examples/foundational/26-gemini-multimodal-live.py @@ -19,8 +19,8 @@ from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams # Load environment variables load_dotenv(override=True) diff --git a/examples/foundational/26a-gemini-multimodal-live-transcription.py b/examples/foundational/26a-gemini-multimodal-live-transcription.py index 043178a1b..ad3cd06ee 100644 --- a/examples/foundational/26a-gemini-multimodal-live-transcription.py +++ b/examples/foundational/26a-gemini-multimodal-live-transcription.py @@ -22,8 +22,8 @@ from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/26b-gemini-multimodal-live-function-calling.py b/examples/foundational/26b-gemini-multimodal-live-function-calling.py index 16b62cdf3..f14713a5c 100644 --- a/examples/foundational/26b-gemini-multimodal-live-function-calling.py +++ b/examples/foundational/26b-gemini-multimodal-live-function-calling.py @@ -25,8 +25,8 @@ from pipecat.runner.utils import create_transport from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/26c-gemini-multimodal-live-video.py b/examples/foundational/26c-gemini-multimodal-live-video.py index 54314038b..a28eaaacf 100644 --- a/examples/foundational/26c-gemini-multimodal-live-video.py +++ b/examples/foundational/26c-gemini-multimodal-live-video.py @@ -26,7 +26,7 @@ from pipecat.runner.utils import ( ) from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams load_dotenv(override=True) diff --git a/examples/foundational/26d-gemini-multimodal-live-text.py b/examples/foundational/26d-gemini-multimodal-live-text.py index 30b497784..667887b03 100644 --- a/examples/foundational/26d-gemini-multimodal-live-text.py +++ b/examples/foundational/26d-gemini-multimodal-live-text.py @@ -26,8 +26,8 @@ from pipecat.services.gemini_multimodal_live.gemini import ( InputParams, ) from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/26e-gemini-multimodal-google-search.py b/examples/foundational/26e-gemini-multimodal-google-search.py index 27439e159..31fad54fe 100644 --- a/examples/foundational/26e-gemini-multimodal-google-search.py +++ b/examples/foundational/26e-gemini-multimodal-google-search.py @@ -21,8 +21,8 @@ from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/26f-gemini-multimodal-live-files-api.py b/examples/foundational/26f-gemini-multimodal-live-files-api.py index f7c7d9b47..b01c21803 100644 --- a/examples/foundational/26f-gemini-multimodal-live-files-api.py +++ b/examples/foundational/26f-gemini-multimodal-live-files-api.py @@ -23,8 +23,8 @@ from pipecat.services.gemini_multimodal_live.gemini import ( GeminiMultimodalLiveLLMService, ) from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/26g-gemini-multimodal-live-groundingMetadata.py b/examples/foundational/26g-gemini-multimodal-live-groundingMetadata.py index 20de2a849..6cfeed51b 100644 --- a/examples/foundational/26g-gemini-multimodal-live-groundingMetadata.py +++ b/examples/foundational/26g-gemini-multimodal-live-groundingMetadata.py @@ -17,8 +17,8 @@ from pipecat.runner.utils import create_transport from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService from pipecat.services.google.frames import LLMSearchResponseFrame from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/27-simli-layer.py b/examples/foundational/27-simli-layer.py index 81030cfd8..2259ca1ee 100644 --- a/examples/foundational/27-simli-layer.py +++ b/examples/foundational/27-simli-layer.py @@ -24,7 +24,7 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.simli.video import SimliVideoService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams load_dotenv(override=True) diff --git a/examples/foundational/28-transcription-processor.py b/examples/foundational/28-transcription-processor.py index fc592e9ea..fc961689d 100644 --- a/examples/foundational/28-transcription-processor.py +++ b/examples/foundational/28-transcription-processor.py @@ -23,8 +23,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/29-turn-tracking-observer.py b/examples/foundational/29-turn-tracking-observer.py index a3e0b8b3c..857e80d6f 100644 --- a/examples/foundational/29-turn-tracking-observer.py +++ b/examples/foundational/29-turn-tracking-observer.py @@ -23,8 +23,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/30-observer.py b/examples/foundational/30-observer.py index c4eae71c3..34557ca56 100644 --- a/examples/foundational/30-observer.py +++ b/examples/foundational/30-observer.py @@ -38,8 +38,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_input import BaseInputTransport from pipecat.transports.base_output import BaseOutputTransport from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/32-gemini-grounding-metadata.py b/examples/foundational/32-gemini-grounding-metadata.py index 585671a34..223f6e149 100644 --- a/examples/foundational/32-gemini-grounding-metadata.py +++ b/examples/foundational/32-gemini-grounding-metadata.py @@ -26,8 +26,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.google.llm import GoogleLLMService, LLMSearchResponseFrame from pipecat.services.llm_service import LLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams sys.path.append(str(Path(__file__).parent.parent)) diff --git a/examples/foundational/33-gemini-rag.py b/examples/foundational/33-gemini-rag.py index 8dd383fc0..ad6ca2961 100644 --- a/examples/foundational/33-gemini-rag.py +++ b/examples/foundational/33-gemini-rag.py @@ -68,8 +68,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.google.llm import GoogleLLMService from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/34-audio-recording.py b/examples/foundational/34-audio-recording.py index 654a40c17..4652629c9 100644 --- a/examples/foundational/34-audio-recording.py +++ b/examples/foundational/34-audio-recording.py @@ -63,8 +63,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/35-pattern-pair-voice-switching.py b/examples/foundational/35-pattern-pair-voice-switching.py index 76bd345b1..8dc6cccd3 100644 --- a/examples/foundational/35-pattern-pair-voice-switching.py +++ b/examples/foundational/35-pattern-pair-voice-switching.py @@ -56,8 +56,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams from pipecat.utils.text.pattern_pair_aggregator import PatternMatch, PatternPairAggregator load_dotenv(override=True) diff --git a/examples/foundational/36-user-email-gathering.py b/examples/foundational/36-user-email-gathering.py index f6047dcd7..65048bc96 100644 --- a/examples/foundational/36-user-email-gathering.py +++ b/examples/foundational/36-user-email-gathering.py @@ -25,8 +25,8 @@ from pipecat.services.llm_service import FunctionCallParams from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.rime.tts import RimeHttpTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/37-mem0.py b/examples/foundational/37-mem0.py index bfd9c48c3..371eb33ee 100644 --- a/examples/foundational/37-mem0.py +++ b/examples/foundational/37-mem0.py @@ -61,8 +61,8 @@ from pipecat.services.elevenlabs.tts import ElevenLabsTTSService from pipecat.services.mem0.memory import Mem0MemoryService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/38-smart-turn-fal.py b/examples/foundational/38-smart-turn-fal.py index 12a68dfe2..6de739b04 100644 --- a/examples/foundational/38-smart-turn-fal.py +++ b/examples/foundational/38-smart-turn-fal.py @@ -25,8 +25,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/38a-smart-turn-local-coreml.py b/examples/foundational/38a-smart-turn-local-coreml.py index 6e254f245..b7bf1c14b 100644 --- a/examples/foundational/38a-smart-turn-local-coreml.py +++ b/examples/foundational/38a-smart-turn-local-coreml.py @@ -25,8 +25,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/38b-smart-turn-local.py b/examples/foundational/38b-smart-turn-local.py index 85fc94a0f..372811c39 100644 --- a/examples/foundational/38b-smart-turn-local.py +++ b/examples/foundational/38b-smart-turn-local.py @@ -25,8 +25,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/39-mcp-stdio.py b/examples/foundational/39-mcp-stdio.py index f63559984..a287ebced 100644 --- a/examples/foundational/39-mcp-stdio.py +++ b/examples/foundational/39-mcp-stdio.py @@ -35,7 +35,7 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.mcp_service import MCPClient from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams load_dotenv(override=True) diff --git a/examples/foundational/39a-mcp-run-sse.py b/examples/foundational/39a-mcp-run-sse.py index db2bac275..a929f7730 100644 --- a/examples/foundational/39a-mcp-run-sse.py +++ b/examples/foundational/39a-mcp-run-sse.py @@ -24,8 +24,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.mcp_service import MCPClient from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/39b-multiple-mcp.py b/examples/foundational/39b-multiple-mcp.py index c87eabf05..6cf93c379 100644 --- a/examples/foundational/39b-multiple-mcp.py +++ b/examples/foundational/39b-multiple-mcp.py @@ -38,7 +38,7 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.mcp_service import MCPClient from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams load_dotenv(override=True) diff --git a/examples/foundational/39c-mcp-run-http.py b/examples/foundational/39c-mcp-run-http.py index f3af94eb4..e487ce1e7 100644 --- a/examples/foundational/39c-mcp-run-http.py +++ b/examples/foundational/39c-mcp-run-http.py @@ -24,8 +24,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.google.llm import GoogleLLMService from pipecat.services.mcp_service import MCPClient from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/40-aws-nova-sonic.py b/examples/foundational/40-aws-nova-sonic.py index 125ac73b5..de7bbf638 100644 --- a/examples/foundational/40-aws-nova-sonic.py +++ b/examples/foundational/40-aws-nova-sonic.py @@ -24,8 +24,8 @@ from pipecat.runner.utils import create_transport from pipecat.services.aws_nova_sonic import AWSNovaSonicLLMService from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams # Load environment variables load_dotenv(override=True) diff --git a/examples/foundational/42-interruption-config.py b/examples/foundational/42-interruption-config.py index 0c69c6bc2..f939598c5 100644 --- a/examples/foundational/42-interruption-config.py +++ b/examples/foundational/42-interruption-config.py @@ -23,8 +23,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/foundational/43a-heygen-video-service.py b/examples/foundational/43a-heygen-video-service.py index 0e574446c..93c4f5ca6 100644 --- a/examples/foundational/43a-heygen-video-service.py +++ b/examples/foundational/43a-heygen-video-service.py @@ -23,7 +23,7 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.google.llm import GoogleLLMService from pipecat.services.heygen.video import HeyGenVideoService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams load_dotenv(override=True) diff --git a/examples/foundational/44-voicemail-detection.py b/examples/foundational/44-voicemail-detection.py index 1834346ba..45867f674 100644 --- a/examples/foundational/44-voicemail-detection.py +++ b/examples/foundational/44-voicemail-detection.py @@ -23,8 +23,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) diff --git a/examples/quickstart/bot.py b/examples/quickstart/bot.py index d0201a93a..5b2002abb 100644 --- a/examples/quickstart/bot.py +++ b/examples/quickstart/bot.py @@ -44,7 +44,7 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.services.daily import DailyParams +from pipecat.transports.daily.transport import DailyParams logger.info("✅ All components loaded successfully!") diff --git a/scripts/evals/eval.py b/scripts/evals/eval.py index d5c6516ac..a27b10aea 100644 --- a/scripts/evals/eval.py +++ b/scripts/evals/eval.py @@ -42,7 +42,7 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.openai.llm import OpenAILLMService -from pipecat.transports.services.daily import DailyParams, DailyTransport +from pipecat.transports.daily.transport import DailyParams, DailyTransport SCRIPT_DIR = Path(__file__).resolve().parent diff --git a/src/pipecat/runner/daily.py b/src/pipecat/runner/daily.py index 793b67e1c..0052184f9 100644 --- a/src/pipecat/runner/daily.py +++ b/src/pipecat/runner/daily.py @@ -43,7 +43,7 @@ import aiohttp from loguru import logger from pydantic import BaseModel -from pipecat.transports.services.helpers.daily_rest import ( +from pipecat.transports.daily.utils import ( DailyRESTHelper, DailyRoomParams, DailyRoomProperties, diff --git a/src/pipecat/runner/run.py b/src/pipecat/runner/run.py index a2bcc614f..0d03d8651 100644 --- a/src/pipecat/runner/run.py +++ b/src/pipecat/runner/run.py @@ -182,7 +182,7 @@ def _setup_webrtc_routes(app: FastAPI, esp32_mode: bool = False, host: str = "lo try: from pipecat_ai_small_webrtc_prebuilt.frontend import SmallWebRTCPrebuiltUI - from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection + from pipecat.transports.smallwebrtc.connection import SmallWebRTCConnection except ImportError as e: logger.error(f"WebRTC transport dependencies not installed: {e}") return diff --git a/src/pipecat/runner/utils.py b/src/pipecat/runner/utils.py index b638eb11e..585bd02b2 100644 --- a/src/pipecat/runner/utils.py +++ b/src/pipecat/runner/utils.py @@ -203,7 +203,7 @@ def get_transport_client_id(transport: BaseTransport, client: Any) -> str: """ # Import conditionally to avoid dependency issues try: - from pipecat.transports.network.small_webrtc import SmallWebRTCTransport + from pipecat.transports.smallwebrtc.transport import SmallWebRTCTransport if isinstance(transport, SmallWebRTCTransport): return client.pc_id @@ -211,7 +211,7 @@ def get_transport_client_id(transport: BaseTransport, client: Any) -> str: pass try: - from pipecat.transports.services.daily import DailyTransport + from pipecat.transports.daily.transport import DailyTransport if isinstance(transport, DailyTransport): return client["id"] @@ -233,7 +233,7 @@ async def maybe_capture_participant_camera( framerate: Video capture framerate. Defaults to 0 (auto). """ try: - from pipecat.transports.services.daily import DailyTransport + from pipecat.transports.daily.transport import DailyTransport if isinstance(transport, DailyTransport): await transport.capture_participant_video( @@ -254,7 +254,7 @@ async def maybe_capture_participant_screen( framerate: Video capture framerate. Defaults to 0 (auto). """ try: - from pipecat.transports.services.daily import DailyTransport + from pipecat.transports.daily.transport import DailyTransport if isinstance(transport, DailyTransport): await transport.capture_participant_video( @@ -359,7 +359,7 @@ async def _create_telephony_transport( Returns: Configured FastAPIWebsocketTransport ready for telephony use. """ - from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketTransport + from pipecat.transports.websocket.fastapi import FastAPIWebsocketTransport if params is None: raise ValueError( @@ -482,7 +482,7 @@ async def create_transport( if isinstance(runner_args, DailyRunnerArguments): params = _get_transport_params("daily", transport_params) - from pipecat.transports.services.daily import DailyTransport + from pipecat.transports.daily.transport import DailyTransport return DailyTransport( runner_args.room_url, @@ -494,7 +494,7 @@ async def create_transport( elif isinstance(runner_args, SmallWebRTCRunnerArguments): params = _get_transport_params("webrtc", transport_params) - from pipecat.transports.network.small_webrtc import SmallWebRTCTransport + from pipecat.transports.smallwebrtc.transport import SmallWebRTCTransport return SmallWebRTCTransport( params=params, diff --git a/src/pipecat/services/tavus/video.py b/src/pipecat/services/tavus/video.py index 1ec76c02c..aff5778d9 100644 --- a/src/pipecat/services/tavus/video.py +++ b/src/pipecat/services/tavus/video.py @@ -34,7 +34,7 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection, FrameProcessorSetup from pipecat.services.ai_service import AIService -from pipecat.transports.services.tavus import TavusCallbacks, TavusParams, TavusTransportClient +from pipecat.transports.tavus.transport import TavusCallbacks, TavusParams, TavusTransportClient class TavusVideoService(AIService): diff --git a/src/pipecat/transports/daily/__init__.py b/src/pipecat/transports/daily/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/pipecat/transports/daily/transport.py b/src/pipecat/transports/daily/transport.py new file mode 100644 index 000000000..cfc7998ef --- /dev/null +++ b/src/pipecat/transports/daily/transport.py @@ -0,0 +1,2338 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Daily transport implementation for Pipecat. + +This module provides comprehensive Daily video conferencing integration including +audio/video streaming, transcription, recording, dial-in/out functionality, and +real-time communication features. +""" + +import asyncio +import time +from concurrent.futures import CancelledError as FuturesCancelledError +from concurrent.futures import ThreadPoolExecutor +from dataclasses import dataclass +from typing import Any, Awaitable, Callable, Dict, Mapping, Optional + +import aiohttp +from loguru import logger +from pydantic import BaseModel + +from pipecat.audio.vad.vad_analyzer import VADAnalyzer, VADParams +from pipecat.frames.frames import ( + CancelFrame, + EndFrame, + ErrorFrame, + Frame, + InputAudioRawFrame, + InterimTranscriptionFrame, + OutputAudioRawFrame, + OutputImageRawFrame, + SpriteFrame, + StartFrame, + TranscriptionFrame, + TransportMessageFrame, + TransportMessageUrgentFrame, + UserAudioRawFrame, + UserImageRawFrame, + UserImageRequestFrame, +) +from pipecat.processors.frame_processor import FrameDirection, FrameProcessorSetup +from pipecat.transcriptions.language import Language +from pipecat.transports.base_input import BaseInputTransport +from pipecat.transports.base_output import BaseOutputTransport +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.utils.asyncio.task_manager import BaseTaskManager + +try: + from daily import ( + AudioData, + CallClient, + CustomAudioSource, + CustomAudioTrack, + Daily, + EventHandler, + VideoFrame, + VirtualCameraDevice, + VirtualSpeakerDevice, + ) + from daily import ( + LogLevel as DailyLogLevel, + ) +except ModuleNotFoundError as e: + logger.error(f"Exception: {e}") + logger.error( + "In order to use the Daily transport, you need to `pip install pipecat-ai[daily]`." + ) + raise Exception(f"Missing module: {e}") + +VAD_RESET_PERIOD_MS = 2000 + + +@dataclass +class DailyTransportMessageFrame(TransportMessageFrame): + """Frame for transport messages in Daily calls. + + Parameters: + participant_id: Optional ID of the participant this message is for/from. + """ + + participant_id: Optional[str] = None + + +@dataclass +class DailyTransportMessageUrgentFrame(TransportMessageUrgentFrame): + """Frame for urgent transport messages in Daily calls. + + Parameters: + participant_id: Optional ID of the participant this message is for/from. + """ + + participant_id: Optional[str] = None + + +class WebRTCVADAnalyzer(VADAnalyzer): + """Voice Activity Detection analyzer using WebRTC. + + Implements voice activity detection using Daily's native WebRTC VAD. + """ + + def __init__(self, *, sample_rate: Optional[int] = None, params: Optional[VADParams] = None): + """Initialize the WebRTC VAD analyzer. + + Args: + sample_rate: Audio sample rate in Hz. + params: VAD configuration parameters. + """ + super().__init__(sample_rate=sample_rate, params=params) + + self._webrtc_vad = Daily.create_native_vad( + reset_period_ms=VAD_RESET_PERIOD_MS, sample_rate=self.sample_rate, channels=1 + ) + logger.debug("Loaded native WebRTC VAD") + + def num_frames_required(self) -> int: + """Get the number of audio frames required for VAD analysis. + + Returns: + The number of frames needed (equivalent to 10ms of audio). + """ + return int(self.sample_rate / 100.0) + + def voice_confidence(self, buffer) -> float: + """Analyze audio buffer and return voice confidence score. + + Args: + buffer: Audio buffer to analyze. + + Returns: + Voice confidence score between 0.0 and 1.0. + """ + confidence = 0 + if len(buffer) > 0: + confidence = self._webrtc_vad.analyze_frames(buffer) + return confidence + + +class DailyDialinSettings(BaseModel): + """Settings for Daily's dial-in functionality. + + Parameters: + call_id: CallId is represented by UUID and represents the sessionId in the SIP Network. + call_domain: Call Domain is represented by UUID and represents your Daily Domain on the SIP Network. + """ + + call_id: str = "" + call_domain: str = "" + + +class DailyTranscriptionSettings(BaseModel): + """Configuration settings for Daily's transcription service. + + Parameters: + language: ISO language code for transcription (e.g. "en"). + model: Transcription model to use (e.g. "nova-2-general"). + profanity_filter: Whether to filter profanity from transcripts. + redact: Whether to redact sensitive information. + endpointing: Whether to use endpointing to determine speech segments. + punctuate: Whether to add punctuation to transcripts. + includeRawResponse: Whether to include raw response data. + extra: Additional parameters passed to the Deepgram transcription service. + """ + + language: str = "en" + model: str = "nova-2-general" + profanity_filter: bool = True + redact: bool = False + endpointing: bool = True + punctuate: bool = True + includeRawResponse: bool = True + extra: Mapping[str, Any] = {"interim_results": True} + + +class DailyParams(TransportParams): + """Configuration parameters for Daily transport. + + Parameters: + api_url: Daily API base URL. + api_key: Daily API authentication key. + audio_in_user_tracks: Receive users' audio in separate tracks + dialin_settings: Optional settings for dial-in functionality. + camera_out_enabled: Whether to enable the main camera output track. + microphone_out_enabled: Whether to enable the main microphone track. + transcription_enabled: Whether to enable speech transcription. + transcription_settings: Configuration for transcription service. + """ + + api_url: str = "https://api.daily.co/v1" + api_key: str = "" + audio_in_user_tracks: bool = True + dialin_settings: Optional[DailyDialinSettings] = None + camera_out_enabled: bool = True + microphone_out_enabled: bool = True + transcription_enabled: bool = False + transcription_settings: DailyTranscriptionSettings = DailyTranscriptionSettings() + + +class DailyCallbacks(BaseModel): + """Callback handlers for Daily events. + + Parameters: + on_active_speaker_changed: Called when the active speaker of the call has changed. + on_joined: Called when bot successfully joined a room. + on_left: Called when bot left a room. + on_error: Called when an error occurs. + on_app_message: Called when receiving an app message. + on_call_state_updated: Called when call state changes. + on_client_connected: Called when a client (participant) connects. + on_client_disconnected: Called when a client (participant) disconnects. + on_dialin_connected: Called when dial-in is connected. + on_dialin_ready: Called when dial-in is ready. + on_dialin_stopped: Called when dial-in is stopped. + on_dialin_error: Called when dial-in encounters an error. + on_dialin_warning: Called when dial-in has a warning. + on_dialout_answered: Called when dial-out is answered. + on_dialout_connected: Called when dial-out is connected. + on_dialout_stopped: Called when dial-out is stopped. + on_dialout_error: Called when dial-out encounters an error. + on_dialout_warning: Called when dial-out has a warning. + on_participant_joined: Called when a participant joins. + on_participant_left: Called when a participant leaves. + on_participant_updated: Called when participant info is updated. + on_transcription_message: Called when receiving transcription. + on_transcription_stopped: Called when transcription is stopped. + on_transcription_error: Called when transcription encounters an error. + on_recording_started: Called when recording starts. + on_recording_stopped: Called when recording stops. + on_recording_error: Called when recording encounters an error. + """ + + on_active_speaker_changed: Callable[[Mapping[str, Any]], Awaitable[None]] + on_joined: Callable[[Mapping[str, Any]], Awaitable[None]] + on_left: Callable[[], Awaitable[None]] + on_error: Callable[[str], Awaitable[None]] + on_app_message: Callable[[Any, str], Awaitable[None]] + on_call_state_updated: Callable[[str], Awaitable[None]] + on_client_connected: Callable[[Mapping[str, Any]], Awaitable[None]] + on_client_disconnected: Callable[[Mapping[str, Any]], Awaitable[None]] + on_dialin_connected: Callable[[Any], Awaitable[None]] + on_dialin_ready: Callable[[str], Awaitable[None]] + on_dialin_stopped: Callable[[Any], Awaitable[None]] + on_dialin_error: Callable[[Any], Awaitable[None]] + on_dialin_warning: Callable[[Any], Awaitable[None]] + on_dialout_answered: Callable[[Any], Awaitable[None]] + on_dialout_connected: Callable[[Any], Awaitable[None]] + on_dialout_stopped: Callable[[Any], Awaitable[None]] + on_dialout_error: Callable[[Any], Awaitable[None]] + on_dialout_warning: Callable[[Any], Awaitable[None]] + on_participant_joined: Callable[[Mapping[str, Any]], Awaitable[None]] + on_participant_left: Callable[[Mapping[str, Any], str], Awaitable[None]] + on_participant_updated: Callable[[Mapping[str, Any]], Awaitable[None]] + on_transcription_message: Callable[[Mapping[str, Any]], Awaitable[None]] + on_transcription_stopped: Callable[[str, bool], Awaitable[None]] + on_transcription_error: Callable[[str], Awaitable[None]] + on_recording_started: Callable[[Mapping[str, Any]], Awaitable[None]] + on_recording_stopped: Callable[[str], Awaitable[None]] + on_recording_error: Callable[[str, str], Awaitable[None]] + + +def completion_callback(future): + """Create a completion callback for Daily API calls. + + Args: + future: The asyncio Future to set the result on. + + Returns: + A callback function that sets the future result. + """ + + def _callback(*args): + def set_result(future, *args): + try: + if len(args) > 1: + future.set_result(args) + else: + future.set_result(*args) + except asyncio.InvalidStateError: + pass + + future.get_loop().call_soon_threadsafe(set_result, future, *args) + + return _callback + + +@dataclass +class DailyAudioTrack: + """Container for Daily audio track components. + + Parameters: + source: The custom audio source for the track. + track: The custom audio track instance. + """ + + source: CustomAudioSource + track: CustomAudioTrack + + +class DailyTransportClient(EventHandler): + """Core client for interacting with Daily's API. + + Manages the connection to Daily rooms and handles all low-level API interactions + including room management, media streaming, transcription, and event handling. + """ + + _daily_initialized: bool = False + + def __new__(cls, *args, **kwargs): + """Override EventHandler's __new__ method to ensure Daily is initialized only once.""" + return super().__new__(cls) + + def __init__( + self, + room_url: str, + token: Optional[str], + bot_name: str, + params: DailyParams, + callbacks: DailyCallbacks, + transport_name: str, + ): + """Initialize the Daily transport client. + + Args: + room_url: URL of the Daily room to connect to. + token: Optional authentication token for the room. + bot_name: Display name for the bot in the call. + params: Configuration parameters for the transport. + callbacks: Event callback handlers. + transport_name: Name identifier for the transport. + """ + super().__init__() + + if not DailyTransportClient._daily_initialized: + DailyTransportClient._daily_initialized = True + Daily.init() + + self._room_url: str = room_url + self._token: Optional[str] = token + self._bot_name: str = bot_name + self._params: DailyParams = params + self._callbacks = callbacks + self._transport_name = transport_name + + self._participant_id: str = "" + self._audio_renderers = {} + self._video_renderers = {} + self._transcription_ids = [] + self._transcription_status = None + self._dial_out_session_id: str = "" + + self._joining = False + self._joined = False + self._joined_event = asyncio.Event() + self._leave_counter = 0 + + self._task_manager: Optional[BaseTaskManager] = None + + # We use the executor to cleanup the client. We just do it from one + # place, so only one thread is really needed. + self._executor = ThreadPoolExecutor(max_workers=1) + + self._client: CallClient = CallClient(event_handler=self) + + # We use separate tasks to execute callbacks (events, audio or + # video). In the case of events, if we call a `CallClient` function + # inside the callback and wait for its completion this will result in a + # deadlock (because we haven't exited the event callback). The deadlocks + # occur because `daily-python` is holding the GIL when calling the + # callbacks. So, if our callback handler makes a `CallClient` call and + # waits for it to finish using completions (and a future) we will + # deadlock because completions use event handlers (which are holding the + # GIL). + self._event_task = None + self._audio_task = None + self._video_task = None + + # Input and ouput sample rates. They will be initialize on setup(). + self._in_sample_rate = 0 + self._out_sample_rate = 0 + + self._camera: Optional[VirtualCameraDevice] = None + self._speaker: Optional[VirtualSpeakerDevice] = None + self._microphone_track: Optional[DailyAudioTrack] = None + self._custom_audio_tracks: Dict[str, DailyAudioTrack] = {} + + def _camera_name(self): + """Generate a unique virtual camera name for this client instance.""" + return f"camera-{self}" + + def _speaker_name(self): + """Generate a unique virtual speaker name for this client instance.""" + return f"speaker-{self}" + + @property + def room_url(self) -> str: + """Get the Daily room URL. + + Returns: + The room URL this client is connected to. + """ + return self._room_url + + @property + def participant_id(self) -> str: + """Get the participant ID for this client. + + Returns: + The participant ID assigned by Daily. + """ + return self._participant_id + + @property + def in_sample_rate(self) -> int: + """Get the input audio sample rate. + + Returns: + The input sample rate in Hz. + """ + return self._in_sample_rate + + @property + def out_sample_rate(self) -> int: + """Get the output audio sample rate. + + Returns: + The output sample rate in Hz. + """ + return self._out_sample_rate + + async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame): + """Send an application message to participants. + + Args: + frame: The message frame to send. + """ + if not self._joined: + return + + participant_id = None + if isinstance(frame, (DailyTransportMessageFrame, DailyTransportMessageUrgentFrame)): + participant_id = frame.participant_id + + future = self._get_event_loop().create_future() + self._client.send_app_message( + frame.message, participant_id, completion=completion_callback(future) + ) + await future + + async def read_next_audio_frame(self) -> Optional[InputAudioRawFrame]: + """Reads the next 20ms audio frame from the virtual speaker.""" + if not self._speaker: + return None + + sample_rate = self._in_sample_rate + num_channels = self._params.audio_in_channels + num_frames = int(sample_rate / 100) * 2 # 20ms of audio + + future = self._get_event_loop().create_future() + self._speaker.read_frames(num_frames, completion=completion_callback(future)) + audio = await future + + if len(audio) > 0: + return InputAudioRawFrame( + audio=audio, sample_rate=sample_rate, num_channels=num_channels + ) + else: + # If we don't read any audio it could be there's no participant + # connected. daily-python will return immediately if that's the + # case, so let's sleep for a little bit (i.e. busy wait). + await asyncio.sleep(0.01) + return None + + async def register_audio_destination(self, destination: str): + """Register a custom audio destination for multi-track output. + + Args: + destination: The destination identifier to register. + """ + self._custom_audio_tracks[destination] = await self.add_custom_audio_track(destination) + self._client.update_publishing({"customAudio": {destination: True}}) + + async def write_audio_frame(self, frame: OutputAudioRawFrame): + """Write an audio frame to the appropriate audio track. + + Args: + frame: The audio frame to write. + """ + future = self._get_event_loop().create_future() + + destination = frame.transport_destination + audio_source: Optional[CustomAudioSource] = None + if not destination and self._microphone_track: + audio_source = self._microphone_track.source + elif destination and destination in self._custom_audio_tracks: + track = self._custom_audio_tracks[destination] + audio_source = track.source + + if audio_source: + audio_source.write_frames(frame.audio, completion=completion_callback(future)) + else: + logger.warning(f"{self} unable to write audio frames to destination [{destination}]") + future.set_result(None) + + await future + + async def write_video_frame(self, frame: OutputImageRawFrame): + """Write a video frame to the camera device. + + Args: + frame: The image frame to write. + """ + if not frame.transport_destination and self._camera: + self._camera.write_frame(frame.image) + + async def setup(self, setup: FrameProcessorSetup): + """Setup the client with task manager and event queues. + + Args: + setup: The frame processor setup configuration. + """ + if self._task_manager: + return + + self._task_manager = setup.task_manager + + self._event_queue = asyncio.Queue() + self._event_task = self._task_manager.create_task( + self._callback_task_handler(self._event_queue), + f"{self}::event_callback_task", + ) + + async def cleanup(self): + """Cleanup client resources and cancel tasks.""" + if self._event_task and self._task_manager: + await self._task_manager.cancel_task(self._event_task) + self._event_task = None + if self._audio_task and self._task_manager: + await self._task_manager.cancel_task(self._audio_task) + self._audio_task = None + if self._video_task and self._task_manager: + await self._task_manager.cancel_task(self._video_task) + self._video_task = None + # Make sure we don't block the event loop in case `client.release()` + # takes extra time. + await self._get_event_loop().run_in_executor(self._executor, self._cleanup) + + async def start(self, frame: StartFrame): + """Start the client and initialize audio/video components. + + Args: + frame: The start frame containing initialization parameters. + """ + self._in_sample_rate = self._params.audio_in_sample_rate or frame.audio_in_sample_rate + self._out_sample_rate = self._params.audio_out_sample_rate or frame.audio_out_sample_rate + + if self._params.audio_in_enabled: + if self._params.audio_in_user_tracks and not self._audio_task and self._task_manager: + self._audio_queue = asyncio.Queue() + self._audio_task = self._task_manager.create_task( + self._callback_task_handler(self._audio_queue), + f"{self}::audio_callback_task", + ) + elif not self._speaker: + self._speaker = Daily.create_speaker_device( + self._speaker_name(), + sample_rate=self._in_sample_rate, + channels=self._params.audio_in_channels, + non_blocking=True, + ) + Daily.select_speaker_device(self._speaker_name()) + + if self._params.video_in_enabled and not self._video_task and self._task_manager: + self._video_queue = asyncio.Queue() + self._video_task = self._task_manager.create_task( + self._callback_task_handler(self._video_queue), + f"{self}::video_callback_task", + ) + if self._params.video_out_enabled and not self._camera: + self._camera = Daily.create_camera_device( + self._camera_name(), + width=self._params.video_out_width, + height=self._params.video_out_height, + color_format=self._params.video_out_color_format, + ) + + if self._params.audio_out_enabled and not self._microphone_track: + audio_source = CustomAudioSource(self._out_sample_rate, self._params.audio_out_channels) + audio_track = CustomAudioTrack(audio_source) + self._microphone_track = DailyAudioTrack(source=audio_source, track=audio_track) + + async def join(self): + """Join the Daily room with configured settings.""" + # Transport already joined or joining, ignore. + if self._joined or self._joining: + # Increment leave counter if we already joined. + self._leave_counter += 1 + return + + logger.info(f"Joining {self._room_url}") + self._joining = True + + # For performance reasons, never subscribe to video streams (unless a + # video renderer is registered). + self._client.update_subscription_profiles( + {"base": {"camera": "unsubscribed", "screenVideo": "unsubscribed"}} + ) + + self._client.set_user_name(self._bot_name) + + try: + (data, error) = await self._join() + + if not error: + self._joined = True + self._joining = False + # Increment leave counter if we successfully joined. + self._leave_counter += 1 + + logger.info(f"Joined {self._room_url}") + + if self._params.transcription_enabled: + await self.start_transcription(self._params.transcription_settings) + + await self._callbacks.on_joined(data) + + self._joined_event.set() + else: + error_msg = f"Error joining {self._room_url}: {error}" + logger.error(error_msg) + await self._callbacks.on_error(error_msg) + except asyncio.TimeoutError: + error_msg = f"Time out joining {self._room_url}" + logger.error(error_msg) + self._joining = False + await self._callbacks.on_error(error_msg) + + async def _join(self): + """Execute the actual room join operation.""" + if not self._client: + return + + future = self._get_event_loop().create_future() + + camera_enabled = self._params.video_out_enabled and self._params.camera_out_enabled + microphone_enabled = self._params.audio_out_enabled and self._params.microphone_out_enabled + + self._client.join( + self._room_url, + self._token, + completion=completion_callback(future), + client_settings={ + "inputs": { + "camera": { + "isEnabled": camera_enabled, + "settings": { + "deviceId": self._camera_name(), + }, + }, + "microphone": { + "isEnabled": microphone_enabled, + "settings": { + "customTrack": { + "id": self._microphone_track.track.id + if self._microphone_track + else "no-microphone-track" + } + }, + }, + }, + "publishing": { + "camera": { + "sendSettings": { + "maxQuality": "low", + "encodings": { + "low": { + "maxBitrate": self._params.video_out_bitrate, + "maxFramerate": self._params.video_out_framerate, + } + }, + } + }, + "microphone": { + "sendSettings": { + "channelConfig": "stereo" + if self._params.audio_out_channels == 2 + else "mono", + "bitrate": self._params.audio_out_bitrate, + } + }, + }, + }, + ) + + return await asyncio.wait_for(future, timeout=10) + + async def leave(self): + """Leave the Daily room and cleanup resources.""" + # Decrement leave counter when leaving. + self._leave_counter -= 1 + + # Transport not joined, ignore. + if not self._joined or self._leave_counter > 0: + return + + self._joined = False + self._joined_event.clear() + + logger.info(f"Leaving {self._room_url}") + + if self._params.transcription_enabled: + await self.stop_transcription() + + # Remove any custom tracks, if any. + for track_name, _ in self._custom_audio_tracks.items(): + await self.remove_custom_audio_track(track_name) + + try: + error = await self._leave() + if not error: + logger.info(f"Left {self._room_url}") + await self._callbacks.on_left() + else: + error_msg = f"Error leaving {self._room_url}: {error}" + logger.error(error_msg) + await self._callbacks.on_error(error_msg) + except asyncio.TimeoutError: + error_msg = f"Time out leaving {self._room_url}" + logger.error(error_msg) + await self._callbacks.on_error(error_msg) + + async def _leave(self): + """Execute the actual room leave operation.""" + if not self._client: + return + + future = self._get_event_loop().create_future() + self._client.leave(completion=completion_callback(future)) + return await asyncio.wait_for(future, timeout=10) + + def _cleanup(self): + """Cleanup the Daily client instance.""" + if self._client: + self._client.release() + self._client = None + + def participants(self): + """Get current participants in the room. + + Returns: + Dictionary of participants keyed by participant ID. + """ + return self._client.participants() + + def participant_counts(self): + """Get participant count information. + + Returns: + Dictionary with participant count details. + """ + return self._client.participant_counts() + + async def start_dialout(self, settings): + """Start a dial-out call to a phone number. + + Args: + settings: Dial-out configuration settings. + """ + logger.debug(f"Starting dialout: settings={settings}") + + future = self._get_event_loop().create_future() + self._client.start_dialout(settings, completion=completion_callback(future)) + error = await future + if error: + logger.error(f"Unable to start dialout: {error}") + + async def stop_dialout(self, participant_id): + """Stop a dial-out call for a specific participant. + + Args: + participant_id: ID of the participant to stop dial-out for. + """ + logger.debug(f"Stopping dialout: participant_id={participant_id}") + + future = self._get_event_loop().create_future() + self._client.stop_dialout(participant_id, completion=completion_callback(future)) + error = await future + if error: + logger.error(f"Unable to stop dialout: {error}") + + async def send_dtmf(self, settings): + """Send DTMF tones during a call. + + Args: + settings: DTMF settings including tones and target session. + """ + session_id = settings.get("sessionId") or self._dial_out_session_id + if not session_id: + logger.error("Unable to send DTMF: 'sessionId' is not set") + return + + # Update 'sessionId' field. + settings["sessionId"] = session_id + + future = self._get_event_loop().create_future() + self._client.send_dtmf(settings, completion=completion_callback(future)) + await future + + async def sip_call_transfer(self, settings): + """Transfer a SIP call to another destination. + + Args: + settings: SIP call transfer settings. + """ + future = self._get_event_loop().create_future() + self._client.sip_call_transfer(settings, completion=completion_callback(future)) + await future + + async def sip_refer(self, settings): + """Send a SIP REFER request. + + Args: + settings: SIP REFER settings. + """ + future = self._get_event_loop().create_future() + self._client.sip_refer(settings, completion=completion_callback(future)) + await future + + async def start_recording(self, streaming_settings, stream_id, force_new): + """Start recording the call. + + Args: + streaming_settings: Recording configuration settings. + stream_id: Unique identifier for the recording stream. + force_new: Whether to force a new recording session. + """ + logger.debug( + f"Starting recording: stream_id={stream_id} force_new={force_new} settings={streaming_settings}" + ) + + future = self._get_event_loop().create_future() + self._client.start_recording( + streaming_settings, stream_id, force_new, completion=completion_callback(future) + ) + error = await future + if error: + logger.error(f"Unable to start recording: {error}") + + async def stop_recording(self, stream_id): + """Stop recording the call. + + Args: + stream_id: Unique identifier for the recording stream to stop. + """ + logger.debug(f"Stopping recording: stream_id={stream_id}") + + future = self._get_event_loop().create_future() + self._client.stop_recording(stream_id, completion=completion_callback(future)) + error = await future + if error: + logger.error(f"Unable to stop recording: {error}") + + async def start_transcription(self, settings): + """Start transcription for the call. + + Args: + settings: Transcription configuration settings. + """ + if not self._token: + logger.warning("Transcription can't be started without a room token") + return + + logger.debug(f"Starting transcription: settings={settings}") + + future = self._get_event_loop().create_future() + self._client.start_transcription( + settings=self._params.transcription_settings.model_dump(exclude_none=True), + completion=completion_callback(future), + ) + error = await future + if error: + logger.error(f"Unable to start transcription: {error}") + + async def stop_transcription(self): + """Stop transcription for the call.""" + if not self._token: + return + + logger.debug(f"Stopping transcription") + + future = self._get_event_loop().create_future() + self._client.stop_transcription(completion=completion_callback(future)) + error = await future + if error: + logger.error(f"Unable to stop transcription: {error}") + + async def send_prebuilt_chat_message(self, message: str, user_name: Optional[str] = None): + """Send a chat message to Daily's Prebuilt main room. + + Args: + message: The chat message to send. + user_name: Optional user name that will appear as sender of the message. + """ + if not self._joined: + return + + future = self._get_event_loop().create_future() + self._client.send_prebuilt_chat_message( + message, user_name=user_name, completion=completion_callback(future) + ) + await future + + async def capture_participant_transcription(self, participant_id: str): + """Enable transcription capture for a specific participant. + + Args: + participant_id: ID of the participant to capture transcription for. + """ + if not self._params.transcription_enabled: + return + + self._transcription_ids.append(participant_id) + if self._joined and self._transcription_status: + await self.update_transcription(self._transcription_ids) + + async def capture_participant_audio( + self, + participant_id: str, + callback: Callable, + audio_source: str = "microphone", + sample_rate: int = 16000, + callback_interval_ms: int = 20, + ): + """Capture audio from a specific participant. + + Args: + participant_id: ID of the participant to capture audio from. + callback: Callback function to handle audio data. + audio_source: Audio source to capture (microphone, screenAudio, or custom). + sample_rate: Desired sample rate for audio capture. + callback_interval_ms: Interval between audio callbacks in milliseconds. + """ + # Only enable the desired audio source subscription on this participant. + if audio_source in ("microphone", "screenAudio"): + media = {"media": {audio_source: "subscribed"}} + else: + media = {"media": {"customAudio": {audio_source: "subscribed"}}} + + await self.update_subscriptions(participant_settings={participant_id: media}) + + self._audio_renderers.setdefault(participant_id, {})[audio_source] = callback + + logger.debug( + f"Starting to capture [{audio_source}] audio from participant {participant_id}" + ) + + self._client.set_audio_renderer( + participant_id, + self._audio_data_received, + audio_source=audio_source, + sample_rate=sample_rate, + callback_interval_ms=callback_interval_ms, + ) + + async def capture_participant_video( + self, + participant_id: str, + callback: Callable, + framerate: int = 30, + video_source: str = "camera", + color_format: str = "RGB", + ): + """Capture video from a specific participant. + + Args: + participant_id: ID of the participant to capture video from. + callback: Callback function to handle video frames. + framerate: Desired framerate for video capture. + video_source: Video source to capture (camera, screenVideo, or custom). + color_format: Color format for video frames. + """ + # Only enable the desired audio source subscription on this participant. + if video_source in ("camera", "screenVideo"): + media = {"media": {video_source: "subscribed"}} + else: + media = {"media": {"customVideo": {video_source: "subscribed"}}} + + await self.update_subscriptions(participant_settings={participant_id: media}) + + self._video_renderers.setdefault(participant_id, {})[video_source] = callback + + logger.debug( + f"Starting to capture [{video_source}] video from participant {participant_id}" + ) + + self._client.set_video_renderer( + participant_id, + self._video_frame_received, + video_source=video_source, + color_format=color_format, + ) + + async def add_custom_audio_track(self, track_name: str) -> DailyAudioTrack: + """Add a custom audio track for multi-stream output. + + Args: + track_name: Name for the custom audio track. + + Returns: + The created DailyAudioTrack instance. + """ + future = self._get_event_loop().create_future() + + audio_source = CustomAudioSource(self._out_sample_rate, 1) + + audio_track = CustomAudioTrack(audio_source) + + self._client.add_custom_audio_track( + track_name=track_name, + audio_track=audio_track, + ignore_audio_level=True, + completion=completion_callback(future), + ) + + await future + + track = DailyAudioTrack(source=audio_source, track=audio_track) + + return track + + async def remove_custom_audio_track(self, track_name: str): + """Remove a custom audio track. + + Args: + track_name: Name of the custom audio track to remove. + """ + future = self._get_event_loop().create_future() + self._client.remove_custom_audio_track( + track_name=track_name, + completion=completion_callback(future), + ) + await future + + async def update_transcription(self, participants=None, instance_id=None): + """Update transcription settings for specific participants. + + Args: + participants: List of participant IDs to enable transcription for. + instance_id: Optional transcription instance ID. + """ + future = self._get_event_loop().create_future() + self._client.update_transcription( + participants, instance_id, completion=completion_callback(future) + ) + await future + + async def update_subscriptions(self, participant_settings=None, profile_settings=None): + """Update media subscription settings. + + Args: + participant_settings: Per-participant subscription settings. + profile_settings: Global subscription profile settings. + """ + future = self._get_event_loop().create_future() + self._client.update_subscriptions( + participant_settings=participant_settings, + profile_settings=profile_settings, + completion=completion_callback(future), + ) + await future + + async def update_publishing(self, publishing_settings: Mapping[str, Any]): + """Update media publishing settings. + + Args: + publishing_settings: Publishing configuration settings. + """ + future = self._get_event_loop().create_future() + self._client.update_publishing( + publishing_settings=publishing_settings, + completion=completion_callback(future), + ) + await future + + async def update_remote_participants(self, remote_participants: Mapping[str, Any]): + """Update settings for remote participants. + + Args: + remote_participants: Remote participant configuration settings. + """ + future = self._get_event_loop().create_future() + self._client.update_remote_participants( + remote_participants=remote_participants, completion=completion_callback(future) + ) + await future + + # + # + # Daily (EventHandler) + # + + def on_active_speaker_changed(self, participant): + """Handle active speaker change events. + + Args: + participant: The new active speaker participant info. + """ + self._call_event_callback(self._callbacks.on_active_speaker_changed, participant) + + def on_app_message(self, message: Any, sender: str): + """Handle application message events. + + Args: + message: The received message data. + sender: ID of the message sender. + """ + self._call_event_callback(self._callbacks.on_app_message, message, sender) + + def on_call_state_updated(self, state: str): + """Handle call state update events. + + Args: + state: The new call state. + """ + self._call_event_callback(self._callbacks.on_call_state_updated, state) + + def on_dialin_connected(self, data: Any): + """Handle dial-in connected events. + + Args: + data: Dial-in connection data. + """ + self._call_event_callback(self._callbacks.on_dialin_connected, data) + + def on_dialin_ready(self, sip_endpoint: str): + """Handle dial-in ready events. + + Args: + sip_endpoint: The SIP endpoint for dial-in. + """ + self._call_event_callback(self._callbacks.on_dialin_ready, sip_endpoint) + + def on_dialin_stopped(self, data: Any): + """Handle dial-in stopped events. + + Args: + data: Dial-in stop data. + """ + self._call_event_callback(self._callbacks.on_dialin_stopped, data) + + def on_dialin_error(self, data: Any): + """Handle dial-in error events. + + Args: + data: Dial-in error data. + """ + self._call_event_callback(self._callbacks.on_dialin_error, data) + + def on_dialin_warning(self, data: Any): + """Handle dial-in warning events. + + Args: + data: Dial-in warning data. + """ + self._call_event_callback(self._callbacks.on_dialin_warning, data) + + def on_dialout_answered(self, data: Any): + """Handle dial-out answered events. + + Args: + data: Dial-out answered data. + """ + self._call_event_callback(self._callbacks.on_dialout_answered, data) + + def on_dialout_connected(self, data: Any): + """Handle dial-out connected events. + + Args: + data: Dial-out connection data. + """ + self._dial_out_session_id = data["sessionId"] if "sessionId" in data else "" + self._call_event_callback(self._callbacks.on_dialout_connected, data) + + def on_dialout_stopped(self, data: Any): + """Handle dial-out stopped events. + + Args: + data: Dial-out stop data. + """ + # Cleanup only if our session stopped. + if data["sessionId"] == self._dial_out_session_id: + self._dial_out_session_id = "" + self._call_event_callback(self._callbacks.on_dialout_stopped, data) + + def on_dialout_error(self, data: Any): + """Handle dial-out error events. + + Args: + data: Dial-out error data. + """ + # Cleanup only if our session errored out. + if data["sessionId"] == self._dial_out_session_id: + self._dial_out_session_id = "" + self._call_event_callback(self._callbacks.on_dialout_error, data) + + def on_dialout_warning(self, data: Any): + """Handle dial-out warning events. + + Args: + data: Dial-out warning data. + """ + self._call_event_callback(self._callbacks.on_dialout_warning, data) + + def on_participant_joined(self, participant): + """Handle participant joined events. + + Args: + participant: The participant that joined. + """ + self._call_event_callback(self._callbacks.on_participant_joined, participant) + + def on_participant_left(self, participant, reason): + """Handle participant left events. + + Args: + participant: The participant that left. + reason: Reason for leaving. + """ + self._call_event_callback(self._callbacks.on_participant_left, participant, reason) + + def on_participant_updated(self, participant): + """Handle participant updated events. + + Args: + participant: The updated participant info. + """ + self._call_event_callback(self._callbacks.on_participant_updated, participant) + + def on_transcription_started(self, status): + """Handle transcription started events. + + Args: + status: Transcription start status. + """ + logger.debug(f"Transcription started: {status}") + self._transcription_status = status + self._call_event_callback(self.update_transcription, self._transcription_ids) + + def on_transcription_stopped(self, stopped_by, stopped_by_error): + """Handle transcription stopped events. + + Args: + stopped_by: Who stopped the transcription. + stopped_by_error: Whether stopped due to error. + """ + logger.debug("Transcription stopped") + self._call_event_callback( + self._callbacks.on_transcription_stopped, stopped_by, stopped_by_error + ) + + def on_transcription_error(self, message): + """Handle transcription error events. + + Args: + message: Error message. + """ + logger.error(f"Transcription error: {message}") + self._call_event_callback(self._callbacks.on_transcription_error, message) + + def on_transcription_message(self, message): + """Handle transcription message events. + + Args: + message: The transcription message data. + """ + self._call_event_callback(self._callbacks.on_transcription_message, message) + + def on_recording_started(self, status): + """Handle recording started events. + + Args: + status: Recording start status. + """ + logger.debug(f"Recording started: {status}") + self._call_event_callback(self._callbacks.on_recording_started, status) + + def on_recording_stopped(self, stream_id): + """Handle recording stopped events. + + Args: + stream_id: ID of the stopped recording stream. + """ + logger.debug(f"Recording stopped: {stream_id}") + self._call_event_callback(self._callbacks.on_recording_stopped, stream_id) + + def on_recording_error(self, stream_id, message): + """Handle recording error events. + + Args: + stream_id: ID of the recording stream with error. + message: Error message. + """ + logger.error(f"Recording error for {stream_id}: {message}") + self._call_event_callback(self._callbacks.on_recording_error, stream_id, message) + + # + # Daily (CallClient callbacks) + # + + def _audio_data_received(self, participant_id: str, audio_data: AudioData, audio_source: str): + """Handle received audio data from participants.""" + callback = self._audio_renderers[participant_id][audio_source] + self._call_audio_callback(callback, participant_id, audio_data, audio_source) + + def _video_frame_received( + self, participant_id: str, video_frame: VideoFrame, video_source: str + ): + """Handle received video frames from participants.""" + callback = self._video_renderers[participant_id][video_source] + self._call_video_callback(callback, participant_id, video_frame, video_source) + + # + # Queue callbacks handling + # + + def _call_audio_callback(self, callback, *args): + """Queue an audio callback for async execution.""" + self._call_async_callback(self._audio_queue, callback, *args) + + def _call_video_callback(self, callback, *args): + """Queue a video callback for async execution.""" + self._call_async_callback(self._video_queue, callback, *args) + + def _call_event_callback(self, callback, *args): + """Queue an event callback for async execution.""" + self._call_async_callback(self._event_queue, callback, *args) + + def _call_async_callback(self, queue: asyncio.Queue, callback, *args): + """Queue a callback for async execution on the event loop.""" + try: + future = asyncio.run_coroutine_threadsafe( + queue.put((callback, *args)), self._get_event_loop() + ) + future.result() + except FuturesCancelledError: + pass + + async def _callback_task_handler(self, queue: asyncio.Queue): + """Handle queued callbacks from the specified queue.""" + while True: + # Wait to process any callback until we are joined. + await self._joined_event.wait() + (callback, *args) = await queue.get() + await callback(*args) + queue.task_done() + + def _get_event_loop(self) -> asyncio.AbstractEventLoop: + """Get the event loop from the task manager.""" + if not self._task_manager: + raise Exception(f"{self}: missing task manager (pipeline not started?)") + return self._task_manager.get_event_loop() + + def __str__(self): + """String representation of the DailyTransportClient.""" + return f"{self._transport_name}::DailyTransportClient" + + +class DailyInputTransport(BaseInputTransport): + """Handles incoming media streams and events from Daily calls. + + Processes incoming audio, video, transcriptions and other events from Daily + room participants, including participant media capture and event forwarding. + """ + + def __init__( + self, + transport: BaseTransport, + client: DailyTransportClient, + params: DailyParams, + **kwargs, + ): + """Initialize the Daily input transport. + + Args: + transport: The parent transport instance. + client: DailyTransportClient instance. + params: Configuration parameters. + **kwargs: Additional arguments passed to parent class. + """ + super().__init__(params, **kwargs) + + self._transport = transport + self._client = client + self._params = params + + self._video_renderers = {} + + # Whether we have seen a StartFrame already. + self._initialized = False + + # Whether we have started audio streaming. + self._streaming_started = False + + # Store the list of participants we should stream. This is necessary in + # case we don't start streaming right away. + self._capture_participant_audio = [] + + # Audio task when using a virtual speaker (i.e. no user tracks). + self._audio_in_task: Optional[asyncio.Task] = None + + self._vad_analyzer: Optional[VADAnalyzer] = params.vad_analyzer + + @property + def vad_analyzer(self) -> Optional[VADAnalyzer]: + """Get the Voice Activity Detection analyzer. + + Returns: + The VAD analyzer instance if configured. + """ + return self._vad_analyzer + + async def start_audio_in_streaming(self): + """Start receiving audio from participants.""" + if not self._params.audio_in_enabled: + return + + logger.debug(f"Start receiving audio") + + if self._params.audio_in_enabled: + if self._params.audio_in_user_tracks: + # Capture invididual participant tracks. + for participant_id, audio_source, sample_rate in self._capture_participant_audio: + await self._client.capture_participant_audio( + participant_id, self._on_participant_audio_data, audio_source, sample_rate + ) + elif not self._audio_in_task: + # Create audio task. It reads audio frames from a single room + # track and pushes them internally for VAD processing. + self._audio_in_task = self.create_task(self._audio_in_task_handler()) + + self._streaming_started = True + + async def setup(self, setup: FrameProcessorSetup): + """Setup the input transport with shared client setup. + + Args: + setup: The frame processor setup configuration. + """ + await super().setup(setup) + await self._client.setup(setup) + + async def cleanup(self): + """Cleanup input transport and shared resources.""" + await super().cleanup() + await self._client.cleanup() + await self._transport.cleanup() + + async def start(self, frame: StartFrame): + """Start the input transport and join the Daily room. + + Args: + frame: The start frame containing initialization parameters. + """ + # Parent start. + await super().start(frame) + + if self._initialized: + return + + self._initialized = True + + # Setup client. + await self._client.start(frame) + + # Join the room. + await self._client.join() + + # Indicate the transport that we are connected. + await self.set_transport_ready(frame) + + if self._params.audio_in_stream_on_start: + await self.start_audio_in_streaming() + + async def stop(self, frame: EndFrame): + """Stop the input transport and leave the Daily room. + + Args: + frame: The end frame signaling transport shutdown. + """ + # Parent stop. + await super().stop(frame) + # Leave the room. + await self._client.leave() + # Stop audio thread. + if self._audio_in_task: + await self.cancel_task(self._audio_in_task) + self._audio_in_task = None + + async def cancel(self, frame: CancelFrame): + """Cancel the input transport and leave the Daily room. + + Args: + frame: The cancel frame signaling immediate cancellation. + """ + # Parent stop. + await super().cancel(frame) + # Leave the room. + await self._client.leave() + # Stop audio thread. + if self._audio_in_task: + await self.cancel_task(self._audio_in_task) + self._audio_in_task = None + + # + # FrameProcessor + # + + async def process_frame(self, frame: Frame, direction: FrameDirection): + """Process incoming frames, including user image requests. + + Args: + frame: The frame to process. + direction: The direction of frame flow in the pipeline. + """ + await super().process_frame(frame, direction) + + if isinstance(frame, UserImageRequestFrame): + await self.request_participant_image(frame) + + # + # Frames + # + + async def push_transcription_frame(self, frame: TranscriptionFrame | InterimTranscriptionFrame): + """Push a transcription frame downstream. + + Args: + frame: The transcription frame to push. + """ + await self.push_frame(frame) + + async def push_app_message(self, message: Any, sender: str): + """Push an application message as an urgent transport frame. + + Args: + message: The message data to send. + sender: ID of the message sender. + """ + frame = DailyTransportMessageUrgentFrame(message=message, participant_id=sender) + await self.push_frame(frame) + + # + # Audio in + # + + async def capture_participant_audio( + self, + participant_id: str, + audio_source: str = "microphone", + sample_rate: int = 16000, + ): + """Capture audio from a specific participant. + + Args: + participant_id: ID of the participant to capture audio from. + audio_source: Audio source to capture from. + sample_rate: Desired sample rate for audio capture. + """ + if self._streaming_started: + await self._client.capture_participant_audio( + participant_id, self._on_participant_audio_data, audio_source, sample_rate + ) + else: + self._capture_participant_audio.append((participant_id, audio_source, sample_rate)) + + async def _on_participant_audio_data( + self, participant_id: str, audio: AudioData, audio_source: str + ): + """Handle received participant audio data.""" + frame = UserAudioRawFrame( + user_id=participant_id, + audio=audio.audio_frames, + sample_rate=audio.sample_rate, + num_channels=audio.num_channels, + ) + frame.transport_source = audio_source + await self.push_audio_frame(frame) + + async def _audio_in_task_handler(self): + while True: + frame = await self._client.read_next_audio_frame() + if frame: + await self.push_audio_frame(frame) + + # + # Camera in + # + + async def capture_participant_video( + self, + participant_id: str, + framerate: int = 30, + video_source: str = "camera", + color_format: str = "RGB", + ): + """Capture video from a specific participant. + + Args: + participant_id: ID of the participant to capture video from. + framerate: Desired framerate for video capture. + video_source: Video source to capture from. + color_format: Color format for video frames. + """ + if participant_id not in self._video_renderers: + self._video_renderers[participant_id] = {} + + self._video_renderers[participant_id][video_source] = { + "framerate": framerate, + "timestamp": 0, + "render_next_frame": [], + } + + await self._client.capture_participant_video( + participant_id, self._on_participant_video_frame, framerate, video_source, color_format + ) + + async def request_participant_image(self, frame: UserImageRequestFrame): + """Request a video frame from a specific participant. + + Args: + frame: The user image request frame. + """ + if frame.user_id in self._video_renderers: + video_source = frame.video_source if frame.video_source else "camera" + self._video_renderers[frame.user_id][video_source]["render_next_frame"].append(frame) + + async def _on_participant_video_frame( + self, participant_id: str, video_frame: VideoFrame, video_source: str + ): + """Handle received participant video frames.""" + render_frame = False + + curr_time = time.time() + prev_time = self._video_renderers[participant_id][video_source]["timestamp"] + framerate = self._video_renderers[participant_id][video_source]["framerate"] + + # Some times we render frames because of a request. + request_frame = None + + if framerate > 0: + next_time = prev_time + 1 / framerate + render_frame = (next_time - curr_time) < 0.1 + + if self._video_renderers[participant_id][video_source]["render_next_frame"]: + request_frame = self._video_renderers[participant_id][video_source][ + "render_next_frame" + ].pop(0) + render_frame = True + + if render_frame: + frame = UserImageRawFrame( + user_id=participant_id, + request=request_frame, + image=video_frame.buffer, + size=(video_frame.width, video_frame.height), + format=video_frame.color_format, + ) + frame.transport_source = video_source + await self.push_video_frame(frame) + self._video_renderers[participant_id][video_source]["timestamp"] = curr_time + + +class DailyOutputTransport(BaseOutputTransport): + """Handles outgoing media streams and events to Daily calls. + + Manages sending audio, video and other data to Daily calls, + including audio destination registration and message transmission. + """ + + def __init__( + self, transport: BaseTransport, client: DailyTransportClient, params: DailyParams, **kwargs + ): + """Initialize the Daily output transport. + + Args: + transport: The parent transport instance. + client: DailyTransportClient instance. + params: Configuration parameters. + **kwargs: Additional arguments passed to parent class. + """ + super().__init__(params, **kwargs) + + self._transport = transport + self._client = client + + # Whether we have seen a StartFrame already. + self._initialized = False + + async def setup(self, setup: FrameProcessorSetup): + """Setup the output transport with shared client setup. + + Args: + setup: The frame processor setup configuration. + """ + await super().setup(setup) + await self._client.setup(setup) + + async def cleanup(self): + """Cleanup output transport and shared resources.""" + await super().cleanup() + await self._client.cleanup() + await self._transport.cleanup() + + async def start(self, frame: StartFrame): + """Start the output transport and join the Daily room. + + Args: + frame: The start frame containing initialization parameters. + """ + # Parent start. + await super().start(frame) + + if self._initialized: + return + + self._initialized = True + + # Setup client. + await self._client.start(frame) + + # Join the room. + await self._client.join() + + # Indicate the transport that we are connected. + await self.set_transport_ready(frame) + + async def stop(self, frame: EndFrame): + """Stop the output transport and leave the Daily room. + + Args: + frame: The end frame signaling transport shutdown. + """ + # Parent stop. + await super().stop(frame) + # Leave the room. + await self._client.leave() + + async def cancel(self, frame: CancelFrame): + """Cancel the output transport and leave the Daily room. + + Args: + frame: The cancel frame signaling immediate cancellation. + """ + # Parent stop. + await super().cancel(frame) + # Leave the room. + await self._client.leave() + + async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame): + """Send a transport message to participants. + + Args: + frame: The transport message frame to send. + """ + await self._client.send_message(frame) + + async def register_video_destination(self, destination: str): + """Register a video output destination. + + Args: + destination: The destination identifier to register. + """ + logger.warning(f"{self} registering video destinations is not supported yet") + + async def register_audio_destination(self, destination: str): + """Register an audio output destination. + + Args: + destination: The destination identifier to register. + """ + await self._client.register_audio_destination(destination) + + async def write_audio_frame(self, frame: OutputAudioRawFrame): + """Write an audio frame to the Daily call. + + Args: + frame: The audio frame to write. + """ + await self._client.write_audio_frame(frame) + + async def write_video_frame(self, frame: OutputImageRawFrame): + """Write a video frame to the Daily call. + + Args: + frame: The video frame to write. + """ + await self._client.write_video_frame(frame) + + +class DailyTransport(BaseTransport): + """Transport implementation for Daily audio and video calls. + + Provides comprehensive Daily integration including audio/video streaming, + transcription, recording, dial-in/out functionality, and real-time communication + features for conversational AI applications. + """ + + def __init__( + self, + room_url: str, + token: Optional[str], + bot_name: str, + params: Optional[DailyParams] = None, + input_name: Optional[str] = None, + output_name: Optional[str] = None, + ): + """Initialize the Daily transport. + + Args: + room_url: URL of the Daily room to connect to. + token: Optional authentication token for the room. + bot_name: Display name for the bot in the call. + params: Configuration parameters for the transport. + input_name: Optional name for the input transport. + output_name: Optional name for the output transport. + """ + super().__init__(input_name=input_name, output_name=output_name) + + callbacks = DailyCallbacks( + on_active_speaker_changed=self._on_active_speaker_changed, + on_joined=self._on_joined, + on_left=self._on_left, + on_error=self._on_error, + on_app_message=self._on_app_message, + on_call_state_updated=self._on_call_state_updated, + on_client_connected=self._on_client_connected, + on_client_disconnected=self._on_client_disconnected, + on_dialin_connected=self._on_dialin_connected, + on_dialin_ready=self._on_dialin_ready, + on_dialin_stopped=self._on_dialin_stopped, + on_dialin_error=self._on_dialin_error, + on_dialin_warning=self._on_dialin_warning, + on_dialout_answered=self._on_dialout_answered, + on_dialout_connected=self._on_dialout_connected, + on_dialout_stopped=self._on_dialout_stopped, + on_dialout_error=self._on_dialout_error, + on_dialout_warning=self._on_dialout_warning, + on_participant_joined=self._on_participant_joined, + on_participant_left=self._on_participant_left, + on_participant_updated=self._on_participant_updated, + on_transcription_message=self._on_transcription_message, + on_transcription_stopped=self._on_transcription_stopped, + on_transcription_error=self._on_transcription_error, + on_recording_started=self._on_recording_started, + on_recording_stopped=self._on_recording_stopped, + on_recording_error=self._on_recording_error, + ) + self._params = params or DailyParams() + + self._client = DailyTransportClient( + room_url, token, bot_name, self._params, callbacks, self.name + ) + self._input: Optional[DailyInputTransport] = None + self._output: Optional[DailyOutputTransport] = None + + self._other_participant_has_joined = False + + # Register supported handlers. The user will only be able to register + # these handlers. + self._register_event_handler("on_active_speaker_changed") + self._register_event_handler("on_joined") + self._register_event_handler("on_left") + self._register_event_handler("on_error") + self._register_event_handler("on_app_message") + self._register_event_handler("on_call_state_updated") + self._register_event_handler("on_client_connected") + self._register_event_handler("on_client_disconnected") + self._register_event_handler("on_dialin_connected") + self._register_event_handler("on_dialin_ready") + self._register_event_handler("on_dialin_stopped") + self._register_event_handler("on_dialin_error") + self._register_event_handler("on_dialin_warning") + self._register_event_handler("on_dialout_answered") + self._register_event_handler("on_dialout_connected") + self._register_event_handler("on_dialout_stopped") + self._register_event_handler("on_dialout_error") + self._register_event_handler("on_dialout_warning") + self._register_event_handler("on_first_participant_joined") + self._register_event_handler("on_participant_joined") + self._register_event_handler("on_participant_left") + self._register_event_handler("on_participant_updated") + self._register_event_handler("on_transcription_message") + self._register_event_handler("on_recording_started") + self._register_event_handler("on_recording_stopped") + self._register_event_handler("on_recording_error") + + # + # BaseTransport + # + + def input(self) -> DailyInputTransport: + """Get the input transport for receiving media and events. + + Returns: + The Daily input transport instance. + """ + if not self._input: + self._input = DailyInputTransport( + self, self._client, self._params, name=self._input_name + ) + return self._input + + def output(self) -> DailyOutputTransport: + """Get the output transport for sending media and events. + + Returns: + The Daily output transport instance. + """ + if not self._output: + self._output = DailyOutputTransport( + self, self._client, self._params, name=self._output_name + ) + return self._output + + # + # DailyTransport + # + + @property + def room_url(self) -> str: + """Get the Daily room URL. + + Returns: + The room URL this transport is connected to. + """ + return self._client.room_url + + @property + def participant_id(self) -> str: + """Get the participant ID for this transport. + + Returns: + The participant ID assigned by Daily. + """ + return self._client.participant_id + + def set_log_level(self, level: DailyLogLevel): + """Set the logging level for Daily's internal logging system. + + Args: + level: The log level to set. Should be a member of the DailyLogLevel enum, + such as DailyLogLevel.Info, DailyLogLevel.Debug, etc. + + Example: + transport.set_log_level(DailyLogLevel.Info) + """ + Daily.set_log_level(level) + + async def send_image(self, frame: OutputImageRawFrame | SpriteFrame): + """Send an image frame to the Daily call. + + Args: + frame: The image frame to send. + """ + if self._output: + await self._output.queue_frame(frame, FrameDirection.DOWNSTREAM) + + async def send_audio(self, frame: OutputAudioRawFrame): + """Send an audio frame to the Daily call. + + Args: + frame: The audio frame to send. + """ + if self._output: + await self._output.queue_frame(frame, FrameDirection.DOWNSTREAM) + + def participants(self): + """Get current participants in the room. + + Returns: + Dictionary of participants keyed by participant ID. + """ + return self._client.participants() + + def participant_counts(self): + """Get participant count information. + + Returns: + Dictionary with participant count details. + """ + return self._client.participant_counts() + + async def start_dialout(self, settings=None): + """Start a dial-out call to a phone number. + + Args: + settings: Dial-out configuration settings. + """ + await self._client.start_dialout(settings) + + async def stop_dialout(self, participant_id): + """Stop a dial-out call for a specific participant. + + Args: + participant_id: ID of the participant to stop dial-out for. + """ + await self._client.stop_dialout(participant_id) + + async def sip_call_transfer(self, settings): + """Transfer a SIP call to another destination. + + Args: + settings: SIP call transfer settings. + """ + await self._client.sip_call_transfer(settings) + + async def sip_refer(self, settings): + """Send a SIP REFER request. + + Args: + settings: SIP REFER settings. + """ + await self._client.sip_refer(settings) + + async def start_recording(self, streaming_settings=None, stream_id=None, force_new=None): + """Start recording the call. + + Args: + streaming_settings: Recording configuration settings. + stream_id: Unique identifier for the recording stream. + force_new: Whether to force a new recording session. + """ + await self._client.start_recording(streaming_settings, stream_id, force_new) + + async def stop_recording(self, stream_id=None): + """Stop recording the call. + + Args: + stream_id: Unique identifier for the recording stream to stop. + """ + await self._client.stop_recording(stream_id) + + async def start_transcription(self, settings=None): + """Start transcription for the call. + + Args: + settings: Transcription configuration settings. + """ + await self._client.start_transcription(settings) + + async def stop_transcription(self): + """Stop transcription for the call.""" + await self._client.stop_transcription() + + async def send_prebuilt_chat_message(self, message: str, user_name: Optional[str] = None): + """Send a chat message to Daily's Prebuilt main room. + + Args: + message: The chat message to send. + user_name: Optional user name that will appear as sender of the message. + """ + await self._client.send_prebuilt_chat_message(message, user_name) + + async def capture_participant_transcription(self, participant_id: str): + """Enable transcription capture for a specific participant. + + Args: + participant_id: ID of the participant to capture transcription for. + """ + await self._client.capture_participant_transcription(participant_id) + + async def capture_participant_audio( + self, + participant_id: str, + audio_source: str = "microphone", + sample_rate: int = 16000, + ): + """Capture audio from a specific participant. + + Args: + participant_id: ID of the participant to capture audio from. + audio_source: Audio source to capture from. + sample_rate: Desired sample rate for audio capture. + """ + if self._input: + await self._input.capture_participant_audio(participant_id, audio_source, sample_rate) + + async def capture_participant_video( + self, + participant_id: str, + framerate: int = 30, + video_source: str = "camera", + color_format: str = "RGB", + ): + """Capture video from a specific participant. + + Args: + participant_id: ID of the participant to capture video from. + framerate: Desired framerate for video capture. + video_source: Video source to capture from. + color_format: Color format for video frames. + """ + if self._input: + await self._input.capture_participant_video( + participant_id, framerate, video_source, color_format + ) + + async def update_publishing(self, publishing_settings: Mapping[str, Any]): + """Update media publishing settings. + + Args: + publishing_settings: Publishing configuration settings. + """ + await self._client.update_publishing(publishing_settings=publishing_settings) + + async def update_subscriptions(self, participant_settings=None, profile_settings=None): + """Update media subscription settings. + + Args: + participant_settings: Per-participant subscription settings. + profile_settings: Global subscription profile settings. + """ + await self._client.update_subscriptions( + participant_settings=participant_settings, profile_settings=profile_settings + ) + + async def update_remote_participants(self, remote_participants: Mapping[str, Any]): + """Update settings for remote participants. + + Args: + remote_participants: Remote participant configuration settings. + """ + await self._client.update_remote_participants(remote_participants=remote_participants) + + async def _on_active_speaker_changed(self, participant: Any): + """Handle active speaker change events.""" + await self._call_event_handler("on_active_speaker_changed", participant) + + async def _on_joined(self, data): + """Handle room joined events.""" + await self._call_event_handler("on_joined", data) + + async def _on_left(self): + """Handle room left events.""" + await self._call_event_handler("on_left") + + async def _on_error(self, error): + """Handle error events and push error frames.""" + await self._call_event_handler("on_error", error) + # Push error frame to notify the pipeline + error_frame = ErrorFrame(error) + + if self._input: + await self._input.push_error(error_frame) + elif self._output: + await self._output.push_error(error_frame) + else: + logger.error("Both input and output are None while trying to push error") + raise Exception("No valid input or output channel to push error") + + async def _on_app_message(self, message: Any, sender: str): + """Handle application message events.""" + if self._input: + await self._input.push_app_message(message, sender) + await self._call_event_handler("on_app_message", message, sender) + + async def _on_call_state_updated(self, state: str): + """Handle call state update events.""" + await self._call_event_handler("on_call_state_updated", state) + + async def _on_client_connected(self, participant: Any): + """Handle client connected events.""" + await self._call_event_handler("on_client_connected", participant) + + async def _on_client_disconnected(self, participant: Any): + """Handle client disconnected events.""" + await self._call_event_handler("on_client_disconnected", participant) + + async def _handle_dialin_ready(self, sip_endpoint: str): + """Handle dial-in ready events by updating SIP configuration.""" + if not self._params.dialin_settings: + return + + async with aiohttp.ClientSession() as session: + headers = { + "Authorization": f"Bearer {self._params.api_key}", + "Content-Type": "application/json", + } + data = { + "callId": self._params.dialin_settings.call_id, + "callDomain": self._params.dialin_settings.call_domain, + "sipUri": sip_endpoint, + } + + url = f"{self._params.api_url}/dialin/pinlessCallUpdate" + + try: + async with session.post( + url, headers=headers, json=data, timeout=aiohttp.ClientTimeout(total=10) + ) as r: + if r.status != 200: + text = await r.text() + logger.error( + f"Unable to handle dialin-ready event (status: {r.status}, error: {text})" + ) + return + + logger.debug("Event dialin-ready was handled successfully") + except asyncio.TimeoutError: + logger.error(f"Timeout handling dialin-ready event ({url})") + except Exception as e: + logger.exception(f"Error handling dialin-ready event ({url}): {e}") + + async def _on_dialin_connected(self, data): + """Handle dial-in connected events.""" + await self._call_event_handler("on_dialin_connected", data) + + async def _on_dialin_ready(self, sip_endpoint): + """Handle dial-in ready events.""" + if self._params.dialin_settings: + await self._handle_dialin_ready(sip_endpoint) + await self._call_event_handler("on_dialin_ready", sip_endpoint) + + async def _on_dialin_stopped(self, data): + """Handle dial-in stopped events.""" + await self._call_event_handler("on_dialin_stopped", data) + + async def _on_dialin_error(self, data): + """Handle dial-in error events.""" + await self._call_event_handler("on_dialin_error", data) + + async def _on_dialin_warning(self, data): + """Handle dial-in warning events.""" + await self._call_event_handler("on_dialin_warning", data) + + async def _on_dialout_answered(self, data): + """Handle dial-out answered events.""" + await self._call_event_handler("on_dialout_answered", data) + + async def _on_dialout_connected(self, data): + """Handle dial-out connected events.""" + await self._call_event_handler("on_dialout_connected", data) + + async def _on_dialout_stopped(self, data): + """Handle dial-out stopped events.""" + await self._call_event_handler("on_dialout_stopped", data) + + async def _on_dialout_error(self, data): + """Handle dial-out error events.""" + await self._call_event_handler("on_dialout_error", data) + + async def _on_dialout_warning(self, data): + """Handle dial-out warning events.""" + await self._call_event_handler("on_dialout_warning", data) + + async def _on_participant_joined(self, participant): + """Handle participant joined events.""" + id = participant["id"] + logger.info(f"Participant joined {id}") + + if self._input and self._params.audio_in_enabled and self._params.audio_in_user_tracks: + await self._input.capture_participant_audio( + id, "microphone", self._client.in_sample_rate + ) + + if not self._other_participant_has_joined: + self._other_participant_has_joined = True + await self._call_event_handler("on_first_participant_joined", participant) + + await self._call_event_handler("on_participant_joined", participant) + # Also call on_client_connected for compatibility with other transports + await self._call_event_handler("on_client_connected", participant) + + async def _on_participant_left(self, participant, reason): + """Handle participant left events.""" + id = participant["id"] + logger.info(f"Participant left {id}") + await self._call_event_handler("on_participant_left", participant, reason) + # Also call on_client_disconnected for compatibility with other transports + await self._call_event_handler("on_client_disconnected", participant) + + async def _on_participant_updated(self, participant): + """Handle participant updated events.""" + await self._call_event_handler("on_participant_updated", participant) + + async def _on_transcription_message(self, message): + """Handle transcription message events.""" + await self._call_event_handler("on_transcription_message", message) + + participant_id = "" + if "participantId" in message: + participant_id = message["participantId"] + if not participant_id: + return + + text = message["text"] + timestamp = message["timestamp"] + is_final = message["rawResponse"]["is_final"] + try: + language = message["rawResponse"]["channel"]["alternatives"][0]["languages"][0] + language = Language(language) + except KeyError: + language = None + if is_final: + frame = TranscriptionFrame(text, participant_id, timestamp, language, result=message) + logger.debug(f"Transcription (from: {participant_id}): [{text}]") + else: + frame = InterimTranscriptionFrame( + text, + participant_id, + timestamp, + language, + result=message, + ) + + if self._input: + await self._input.push_transcription_frame(frame) + + async def _on_transcription_stopped(self, stopped_by, stopped_by_error): + """Handle transcription stopped events.""" + await self._call_event_handler("on_transcription_stopped", stopped_by, stopped_by_error) + + async def _on_transcription_error(self, message): + """Handle transcription error events.""" + await self._call_event_handler("on_transcription_error", message) + + async def _on_recording_started(self, status): + """Handle recording started events.""" + await self._call_event_handler("on_recording_started", status) + + async def _on_recording_stopped(self, stream_id): + """Handle recording stopped events.""" + await self._call_event_handler("on_recording_stopped", stream_id) + + async def _on_recording_error(self, stream_id, message): + """Handle recording error events.""" + await self._call_event_handler("on_recording_error", stream_id, message) diff --git a/src/pipecat/transports/daily/utils.py b/src/pipecat/transports/daily/utils.py new file mode 100644 index 000000000..2003b2743 --- /dev/null +++ b/src/pipecat/transports/daily/utils.py @@ -0,0 +1,410 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Daily REST Helpers. + +Methods that wrap the Daily API to create rooms, check room URLs, and get meeting tokens. +""" + +import time +from typing import Dict, List, Literal, Optional +from urllib.parse import urlparse + +import aiohttp +from pydantic import BaseModel, Field, ValidationError + + +class DailyRoomSipParams(BaseModel): + """SIP configuration parameters for Daily rooms. + + Parameters: + display_name: Name shown for the SIP endpoint. + video: Whether video is enabled for SIP. + sip_mode: SIP connection mode, typically 'dial-in'. + num_endpoints: Number of allowed SIP endpoints. + codecs: Codecs to support for audio and video. If None, uses Daily defaults. + Example: {"audio": ["OPUS"], "video": ["H264"]} + """ + + display_name: str = "sw-sip-dialin" + video: bool = False + sip_mode: str = "dial-in" + num_endpoints: int = 1 + codecs: Optional[Dict[str, List[str]]] = None + + +class RecordingsBucketConfig(BaseModel): + """Configuration for storing Daily recordings in a custom S3 bucket. + + Refer to the Daily API documentation for more information: + https://docs.daily.co/guides/products/live-streaming-recording/storing-recordings-in-a-custom-s3-bucket + + Parameters: + bucket_name: Name of the S3 bucket for storing recordings. + bucket_region: AWS region where the S3 bucket is located. + assume_role_arn: ARN of the IAM role to assume for S3 access. + allow_api_access: Whether to allow API access to the recordings. + """ + + bucket_name: str + bucket_region: str + assume_role_arn: str + allow_api_access: bool = False + + +class TranscriptionBucketConfig(BaseModel): + """Configuration for storing Daily transcription in a custom S3 bucket. + + Refer to the Daily API documentation for more information: + https://docs.daily.co/guides/products/live-streaming-recording/storing-recordings-in-a-custom-s3-bucket + + Parameters: + bucket_name: Name of the S3 bucket for storing transcription. + bucket_region: AWS region where the S3 bucket is located. + assume_role_arn: ARN of the IAM role to assume for S3 access. + allow_api_access: Whether to allow API access to the transcription. + """ + + bucket_name: str + bucket_region: str + assume_role_arn: str + allow_api_access: bool = False + + +class DailyRoomProperties(BaseModel, extra="allow"): + """Properties for configuring a Daily room. + + Reference: https://docs.daily.co/reference/rest-api/rooms/create-room#properties + + Parameters: + exp: Optional Unix epoch timestamp for room expiration (e.g., time.time() + 300 for 5 minutes). + enable_chat: Whether chat is enabled in the room. + enable_prejoin_ui: Whether the pre-join UI is enabled. + enable_emoji_reactions: Whether emoji reactions are enabled. + eject_at_room_exp: Whether to remove participants when room expires. + enable_dialout: Whether SIP dial-out is enabled. + enable_recording: Recording settings ('cloud', 'local', 'raw-tracks'). + enable_transcription_storage: Whether transcription storage is enabled. + geo: Geographic region for room. + max_participants: Maximum number of participants allowed in the room. + recordings_bucket: Configuration for custom S3 bucket recordings. + transcription_bucket: Configuration for custom S3 bucket transcription. + sip: SIP configuration parameters. + sip_uri: SIP URI information returned by Daily. + start_video_off: Whether video is off by default. + """ + + exp: Optional[float] = None + enable_chat: bool = False + enable_prejoin_ui: bool = False + enable_emoji_reactions: bool = False + eject_at_room_exp: bool = False + enable_dialout: Optional[bool] = None + enable_recording: Optional[Literal["cloud", "local", "raw-tracks"]] = None + enable_transcription_storage: Optional[bool] = None + geo: Optional[str] = None + max_participants: Optional[int] = None + recordings_bucket: Optional[RecordingsBucketConfig] = None + transcription_bucket: Optional[TranscriptionBucketConfig] = None + sip: Optional[DailyRoomSipParams] = None + sip_uri: Optional[dict] = None + start_video_off: bool = False + + @property + def sip_endpoint(self) -> str: + """Get the SIP endpoint URI if available. + + Returns: + SIP endpoint URI or empty string if not available. + """ + if not self.sip_uri: + return "" + else: + return "sip:%s" % self.sip_uri["endpoint"] + + +class DailyRoomParams(BaseModel): + """Parameters for creating a Daily room. + + Parameters: + name: Optional custom name for the room. + privacy: Room privacy setting ('private' or 'public'). + properties: Room configuration properties. + """ + + name: Optional[str] = None + privacy: Literal["private", "public"] = "public" + properties: DailyRoomProperties = Field(default_factory=DailyRoomProperties) + + +class DailyRoomObject(BaseModel): + """Represents a Daily room returned by the API. + + Parameters: + id: Unique room identifier. + name: Room name. + api_created: Whether room was created via API. + privacy: Room privacy setting ('private' or 'public'). + url: Full URL for joining the room. + created_at: Timestamp of room creation in ISO 8601 format (e.g., "2019-01-26T09:01:22.000Z"). + config: Room configuration properties. + """ + + id: str + name: str + api_created: bool + privacy: str + url: str + created_at: str + config: DailyRoomProperties + + +class DailyMeetingTokenProperties(BaseModel): + """Properties for configuring a Daily meeting token. + + Refer to the Daily API documentation for more information: + https://docs.daily.co/reference/rest-api/meeting-tokens/create-meeting-token#properties + + Parameters: + room_name: The room for which this token is valid. If not set, the token is valid for all rooms in your domain. + eject_at_token_exp: If True, the user will be ejected from the room when the token expires. + eject_after_elapsed: The number of seconds after which the user will be ejected from the room. + nbf: Not before timestamp - users cannot join with this token before this time. + exp: Expiration time (unix timestamp in seconds). Strongly recommended for security. + is_owner: If True, the token will grant owner privileges in the room. + user_name: The name of the user. This will be added to the token payload. + user_id: A unique identifier for the user. This will be added to the token payload. + enable_screenshare: If True, the user will be able to share their screen. + start_video_off: If True, the user's video will be turned off when they join the room. + start_audio_off: If True, the user's audio will be turned off when they join the room. + enable_recording: Recording settings for the token. Must be one of 'cloud', 'local' or 'raw-tracks'. + enable_prejoin_ui: If True, the user will see the prejoin UI before joining the room. + start_cloud_recording: Start cloud recording when the user joins the room. + permissions: Specifies the initial default permissions for a non-meeting-owner participant. + """ + + room_name: Optional[str] = None + eject_at_token_exp: Optional[bool] = None + eject_after_elapsed: Optional[int] = None + nbf: Optional[int] = None + exp: Optional[int] = None + is_owner: Optional[bool] = None + user_name: Optional[str] = None + user_id: Optional[str] = None + enable_screenshare: Optional[bool] = None + start_video_off: Optional[bool] = None + start_audio_off: Optional[bool] = None + enable_recording: Optional[Literal["cloud", "local", "raw-tracks"]] = None + enable_prejoin_ui: Optional[bool] = None + start_cloud_recording: Optional[bool] = None + permissions: Optional[dict] = None + + +class DailyMeetingTokenParams(BaseModel): + """Parameters for creating a Daily meeting token. + + Refer to the Daily API documentation for more information: + https://docs.daily.co/reference/rest-api/meeting-tokens/create-meeting-token#body-params + + Parameters: + properties: Meeting token configuration properties. + """ + + properties: DailyMeetingTokenProperties = Field(default_factory=DailyMeetingTokenProperties) + + +class DailyRESTHelper: + """Helper class for interacting with Daily's REST API. + + Provides methods for creating, managing, and accessing Daily rooms. + """ + + def __init__( + self, + *, + daily_api_key: str, + daily_api_url: str = "https://api.daily.co/v1", + aiohttp_session: aiohttp.ClientSession, + ): + """Initialize the Daily REST helper. + + Args: + daily_api_key: Your Daily API key. + daily_api_url: Daily API base URL (e.g. "https://api.daily.co/v1"). + aiohttp_session: Async HTTP session for making requests. + """ + self.daily_api_key = daily_api_key + self.daily_api_url = daily_api_url + self.aiohttp_session = aiohttp_session + + def get_name_from_url(self, room_url: str) -> str: + """Extract room name from a Daily room URL. + + Args: + room_url: Full Daily room URL. + + Returns: + Room name portion of the URL. + """ + return urlparse(room_url).path[1:] + + async def get_room_from_url(self, room_url: str) -> DailyRoomObject: + """Get room details from a Daily room URL. + + Args: + room_url: Full Daily room URL. + + Returns: + DailyRoomObject instance for the room. + """ + room_name = self.get_name_from_url(room_url) + return await self._get_room_from_name(room_name) + + async def create_room(self, params: DailyRoomParams) -> DailyRoomObject: + """Create a new Daily room. + + Args: + params: Room configuration parameters. + + Returns: + DailyRoomObject instance for the created room. + + Raises: + Exception: If room creation fails or response is invalid. + """ + headers = {"Authorization": f"Bearer {self.daily_api_key}"} + json = params.model_dump(exclude_none=True) + async with self.aiohttp_session.post( + f"{self.daily_api_url}/rooms", headers=headers, json=json + ) as r: + if r.status != 200: + text = await r.text() + raise Exception(f"Unable to create room (status: {r.status}): {text}") + + data = await r.json() + + try: + room = DailyRoomObject(**data) + except ValidationError as e: + raise Exception(f"Invalid response: {e}") + + return room + + async def get_token( + self, + room_url: str, + expiry_time: float = 60 * 60, + eject_at_token_exp: bool = False, + owner: bool = True, + params: Optional[DailyMeetingTokenParams] = None, + ) -> str: + """Generate a meeting token for user to join a Daily room. + + Args: + room_url: Daily room URL. + expiry_time: Token validity duration in seconds (default: 1 hour). + eject_at_token_exp: Whether to eject user when token expires. + owner: Whether token has owner privileges. + params: Optional additional token properties. Note that room_name, + exp, and is_owner will be set based on the other function + parameters regardless of values in params. + + Returns: + Meeting token. + + Raises: + Exception: If token generation fails or room URL is missing. + """ + if not room_url: + raise Exception( + "No Daily room specified. You must specify a Daily room in order a token to be generated." + ) + + expiration: int = int(time.time() + expiry_time) + + room_name = self.get_name_from_url(room_url) + + headers = {"Authorization": f"Bearer {self.daily_api_key}"} + + if params is None: + params = DailyMeetingTokenParams( + properties=DailyMeetingTokenProperties( + room_name=room_name, + is_owner=owner, + exp=expiration, + eject_at_token_exp=eject_at_token_exp, + ) + ) + else: + params.properties.room_name = room_name + params.properties.exp = expiration + params.properties.eject_at_token_exp = eject_at_token_exp + params.properties.is_owner = owner + + json = params.model_dump(exclude_none=True) + + async with self.aiohttp_session.post( + f"{self.daily_api_url}/meeting-tokens", headers=headers, json=json + ) as r: + if r.status != 200: + text = await r.text() + raise Exception(f"Failed to create meeting token (status: {r.status}): {text}") + + data = await r.json() + + return data["token"] + + async def delete_room_by_url(self, room_url: str) -> bool: + """Delete a room using its URL. + + Args: + room_url: Daily room URL. + + Returns: + True if deletion was successful. + """ + room_name = self.get_name_from_url(room_url) + return await self.delete_room_by_name(room_name) + + async def delete_room_by_name(self, room_name: str) -> bool: + """Delete a room using its name. + + Args: + room_name: Name of the room to delete. + + Returns: + True if deletion was successful. + + Raises: + Exception: If deletion fails (excluding 404 Not Found). + """ + headers = {"Authorization": f"Bearer {self.daily_api_key}"} + async with self.aiohttp_session.delete( + f"{self.daily_api_url}/rooms/{room_name}", headers=headers + ) as r: + if r.status != 200 and r.status != 404: + text = await r.text() + raise Exception(f"Failed to delete room [{room_name}] (status: {r.status}): {text}") + + return True + + async def _get_room_from_name(self, room_name: str) -> DailyRoomObject: + """Internal method to get room details by name.""" + headers = {"Authorization": f"Bearer {self.daily_api_key}"} + async with self.aiohttp_session.get( + f"{self.daily_api_url}/rooms/{room_name}", headers=headers + ) as r: + if r.status != 200: + raise Exception(f"Room not found: {room_name}") + + data = await r.json() + + try: + room = DailyRoomObject(**data) + except ValidationError as e: + raise Exception(f"Invalid response: {e}") + + return room diff --git a/src/pipecat/transports/livekit/__init__.py b/src/pipecat/transports/livekit/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/pipecat/transports/livekit/transport.py b/src/pipecat/transports/livekit/transport.py new file mode 100644 index 000000000..f21775cf6 --- /dev/null +++ b/src/pipecat/transports/livekit/transport.py @@ -0,0 +1,988 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""LiveKit transport implementation for Pipecat. + +This module provides comprehensive LiveKit real-time communication integration +including audio streaming, data messaging, participant management, and room +event handling for conversational AI applications. +""" + +import asyncio +from dataclasses import dataclass +from typing import Any, Awaitable, Callable, List, Optional + +from loguru import logger +from pydantic import BaseModel + +from pipecat.audio.utils import create_stream_resampler +from pipecat.audio.vad.vad_analyzer import VADAnalyzer +from pipecat.frames.frames import ( + AudioRawFrame, + CancelFrame, + EndFrame, + OutputAudioRawFrame, + StartFrame, + TransportMessageFrame, + TransportMessageUrgentFrame, + UserAudioRawFrame, +) +from pipecat.processors.frame_processor import FrameDirection, FrameProcessorSetup +from pipecat.transports.base_input import BaseInputTransport +from pipecat.transports.base_output import BaseOutputTransport +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.utils.asyncio.task_manager import BaseTaskManager + +try: + from livekit import rtc + from tenacity import retry, stop_after_attempt, wait_exponential +except ModuleNotFoundError as e: + logger.error(f"Exception: {e}") + logger.error("In order to use LiveKit, you need to `pip install pipecat-ai[livekit]`.") + raise Exception(f"Missing module: {e}") + + +@dataclass +class LiveKitTransportMessageFrame(TransportMessageFrame): + """Frame for transport messages in LiveKit rooms. + + Parameters: + participant_id: Optional ID of the participant this message is for/from. + """ + + participant_id: Optional[str] = None + + +@dataclass +class LiveKitTransportMessageUrgentFrame(TransportMessageUrgentFrame): + """Frame for urgent transport messages in LiveKit rooms. + + Parameters: + participant_id: Optional ID of the participant this message is for/from. + """ + + participant_id: Optional[str] = None + + +class LiveKitParams(TransportParams): + """Configuration parameters for LiveKit transport. + + Inherits all parameters from TransportParams without additional configuration. + """ + + pass + + +class LiveKitCallbacks(BaseModel): + """Callback handlers for LiveKit events. + + Parameters: + on_connected: Called when connected to the LiveKit room. + on_disconnected: Called when disconnected from the LiveKit room. + on_participant_connected: Called when a participant joins the room. + on_participant_disconnected: Called when a participant leaves the room. + on_audio_track_subscribed: Called when an audio track is subscribed. + on_audio_track_unsubscribed: Called when an audio track is unsubscribed. + on_data_received: Called when data is received from a participant. + on_first_participant_joined: Called when the first participant joins. + """ + + on_connected: Callable[[], Awaitable[None]] + on_disconnected: Callable[[], Awaitable[None]] + on_participant_connected: Callable[[str], Awaitable[None]] + on_participant_disconnected: Callable[[str], Awaitable[None]] + on_audio_track_subscribed: Callable[[str], Awaitable[None]] + on_audio_track_unsubscribed: Callable[[str], Awaitable[None]] + on_data_received: Callable[[bytes, str], Awaitable[None]] + on_first_participant_joined: Callable[[str], Awaitable[None]] + + +class LiveKitTransportClient: + """Core client for interacting with LiveKit rooms. + + Manages the connection to LiveKit rooms and handles all low-level API interactions + including room management, audio streaming, data messaging, and event handling. + """ + + def __init__( + self, + url: str, + token: str, + room_name: str, + params: LiveKitParams, + callbacks: LiveKitCallbacks, + transport_name: str, + ): + """Initialize the LiveKit transport client. + + Args: + url: LiveKit server URL to connect to. + token: Authentication token for the room. + room_name: Name of the LiveKit room to join. + params: Configuration parameters for the transport. + callbacks: Event callback handlers. + transport_name: Name identifier for the transport. + """ + self._url = url + self._token = token + self._room_name = room_name + self._params = params + self._callbacks = callbacks + self._transport_name = transport_name + self._room: Optional[rtc.Room] = None + self._participant_id: str = "" + self._connected = False + self._disconnect_counter = 0 + self._audio_source: Optional[rtc.AudioSource] = None + self._audio_track: Optional[rtc.LocalAudioTrack] = None + self._audio_tracks = {} + self._audio_queue = asyncio.Queue() + self._other_participant_has_joined = False + self._task_manager: Optional[BaseTaskManager] = None + + @property + def participant_id(self) -> str: + """Get the participant ID for this client. + + Returns: + The participant ID assigned by LiveKit. + """ + return self._participant_id + + @property + def room(self) -> rtc.Room: + """Get the LiveKit room instance. + + Returns: + The LiveKit room object. + + Raises: + Exception: If room object is not available. + """ + if not self._room: + raise Exception(f"{self}: missing room object (pipeline not started?)") + return self._room + + async def setup(self, setup: FrameProcessorSetup): + """Setup the client with task manager and room initialization. + + Args: + setup: The frame processor setup configuration. + """ + if self._task_manager: + return + + self._task_manager = setup.task_manager + self._room = rtc.Room(loop=self._task_manager.get_event_loop()) + + # Set up room event handlers + self.room.on("participant_connected")(self._on_participant_connected_wrapper) + self.room.on("participant_disconnected")(self._on_participant_disconnected_wrapper) + self.room.on("track_subscribed")(self._on_track_subscribed_wrapper) + self.room.on("track_unsubscribed")(self._on_track_unsubscribed_wrapper) + self.room.on("data_received")(self._on_data_received_wrapper) + self.room.on("connected")(self._on_connected_wrapper) + self.room.on("disconnected")(self._on_disconnected_wrapper) + + async def cleanup(self): + """Cleanup client resources.""" + await self.disconnect() + + async def start(self, frame: StartFrame): + """Start the client and initialize audio components. + + Args: + frame: The start frame containing initialization parameters. + """ + self._out_sample_rate = self._params.audio_out_sample_rate or frame.audio_out_sample_rate + + @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) + async def connect(self): + """Connect to the LiveKit room with retry logic.""" + if self._connected: + # Increment disconnect counter if already connected. + self._disconnect_counter += 1 + return + + logger.info(f"Connecting to {self._room_name}") + + try: + await self.room.connect( + self._url, + self._token, + options=rtc.RoomOptions(auto_subscribe=True), + ) + self._connected = True + # Increment disconnect counter if we successfully connected. + self._disconnect_counter += 1 + + self._participant_id = self.room.local_participant.sid + logger.info(f"Connected to {self._room_name}") + + # Set up audio source and track + self._audio_source = rtc.AudioSource( + self._out_sample_rate, self._params.audio_out_channels + ) + self._audio_track = rtc.LocalAudioTrack.create_audio_track( + "pipecat-audio", self._audio_source + ) + options = rtc.TrackPublishOptions() + options.source = rtc.TrackSource.SOURCE_MICROPHONE + await self.room.local_participant.publish_track(self._audio_track, options) + + await self._callbacks.on_connected() + + # Check if there are already participants in the room + participants = self.get_participants() + if participants and not self._other_participant_has_joined: + self._other_participant_has_joined = True + await self._callbacks.on_first_participant_joined(participants[0]) + except Exception as e: + logger.error(f"Error connecting to {self._room_name}: {e}") + raise + + async def disconnect(self): + """Disconnect from the LiveKit room.""" + # Decrement leave counter when leaving. + self._disconnect_counter -= 1 + + if not self._connected or self._disconnect_counter > 0: + return + + logger.info(f"Disconnecting from {self._room_name}") + await self.room.disconnect() + self._connected = False + logger.info(f"Disconnected from {self._room_name}") + await self._callbacks.on_disconnected() + + async def send_data(self, data: bytes, participant_id: Optional[str] = None): + """Send data to participants in the room. + + Args: + data: The data bytes to send. + participant_id: Optional specific participant to send to. + """ + if not self._connected: + return + + try: + if participant_id: + await self.room.local_participant.publish_data( + data, reliable=True, destination_identities=[participant_id] + ) + else: + await self.room.local_participant.publish_data(data, reliable=True) + except Exception as e: + logger.error(f"Error sending data: {e}") + + async def publish_audio(self, audio_frame: rtc.AudioFrame): + """Publish an audio frame to the room. + + Args: + audio_frame: The LiveKit audio frame to publish. + """ + if not self._connected or not self._audio_source: + return + + try: + await self._audio_source.capture_frame(audio_frame) + except Exception as e: + logger.error(f"Error publishing audio: {e}") + + def get_participants(self) -> List[str]: + """Get list of participant IDs in the room. + + Returns: + List of participant IDs. + """ + return [p.sid for p in self.room.remote_participants.values()] + + async def get_participant_metadata(self, participant_id: str) -> dict: + """Get metadata for a specific participant. + + Args: + participant_id: ID of the participant to get metadata for. + + Returns: + Dictionary containing participant metadata. + """ + participant = self.room.remote_participants.get(participant_id) + if participant: + return { + "id": participant.sid, + "name": participant.name, + "metadata": participant.metadata, + "is_speaking": participant.is_speaking, + } + return {} + + async def set_participant_metadata(self, metadata: str): + """Set metadata for the local participant. + + Args: + metadata: Metadata string to set. + """ + await self.room.local_participant.set_metadata(metadata) + + async def mute_participant(self, participant_id: str): + """Mute a specific participant's audio tracks. + + Args: + participant_id: ID of the participant to mute. + """ + participant = self.room.remote_participants.get(participant_id) + if participant: + for track in participant.tracks.values(): + if track.kind == "audio": + await track.set_enabled(False) + + async def unmute_participant(self, participant_id: str): + """Unmute a specific participant's audio tracks. + + Args: + participant_id: ID of the participant to unmute. + """ + participant = self.room.remote_participants.get(participant_id) + if participant: + for track in participant.tracks.values(): + if track.kind == "audio": + await track.set_enabled(True) + + # Wrapper methods for event handlers + def _on_participant_connected_wrapper(self, participant: rtc.RemoteParticipant): + """Wrapper for participant connected events.""" + self._task_manager.create_task( + self._async_on_participant_connected(participant), + f"{self}::_async_on_participant_connected", + ) + + def _on_participant_disconnected_wrapper(self, participant: rtc.RemoteParticipant): + """Wrapper for participant disconnected events.""" + self._task_manager.create_task( + self._async_on_participant_disconnected(participant), + f"{self}::_async_on_participant_disconnected", + ) + + def _on_track_subscribed_wrapper( + self, + track: rtc.Track, + publication: rtc.RemoteTrackPublication, + participant: rtc.RemoteParticipant, + ): + """Wrapper for track subscribed events.""" + self._task_manager.create_task( + self._async_on_track_subscribed(track, publication, participant), + f"{self}::_async_on_track_subscribed", + ) + + def _on_track_unsubscribed_wrapper( + self, + track: rtc.Track, + publication: rtc.RemoteTrackPublication, + participant: rtc.RemoteParticipant, + ): + """Wrapper for track unsubscribed events.""" + self._task_manager.create_task( + self._async_on_track_unsubscribed(track, publication, participant), + f"{self}::_async_on_track_unsubscribed", + ) + + def _on_data_received_wrapper(self, data: rtc.DataPacket): + """Wrapper for data received events.""" + self._task_manager.create_task( + self._async_on_data_received(data), + f"{self}::_async_on_data_received", + ) + + def _on_connected_wrapper(self): + """Wrapper for connected events.""" + self._task_manager.create_task(self._async_on_connected(), f"{self}::_async_on_connected") + + def _on_disconnected_wrapper(self): + """Wrapper for disconnected events.""" + self._task_manager.create_task( + self._async_on_disconnected(), f"{self}::_async_on_disconnected" + ) + + # Async methods for event handling + async def _async_on_participant_connected(self, participant: rtc.RemoteParticipant): + """Handle participant connected events.""" + logger.info(f"Participant connected: {participant.identity}") + await self._callbacks.on_participant_connected(participant.sid) + if not self._other_participant_has_joined: + self._other_participant_has_joined = True + await self._callbacks.on_first_participant_joined(participant.sid) + + async def _async_on_participant_disconnected(self, participant: rtc.RemoteParticipant): + """Handle participant disconnected events.""" + logger.info(f"Participant disconnected: {participant.identity}") + await self._callbacks.on_participant_disconnected(participant.sid) + if len(self.get_participants()) == 0: + self._other_participant_has_joined = False + + async def _async_on_track_subscribed( + self, + track: rtc.Track, + publication: rtc.RemoteTrackPublication, + participant: rtc.RemoteParticipant, + ): + """Handle track subscribed events.""" + if track.kind == rtc.TrackKind.KIND_AUDIO: + logger.info(f"Audio track subscribed: {track.sid} from participant {participant.sid}") + self._audio_tracks[participant.sid] = track + audio_stream = rtc.AudioStream(track) + self._task_manager.create_task( + self._process_audio_stream(audio_stream, participant.sid), + f"{self}::_process_audio_stream", + ) + await self._callbacks.on_audio_track_subscribed(participant.sid) + + async def _async_on_track_unsubscribed( + self, + track: rtc.Track, + publication: rtc.RemoteTrackPublication, + participant: rtc.RemoteParticipant, + ): + """Handle track unsubscribed events.""" + logger.info(f"Track unsubscribed: {publication.sid} from {participant.identity}") + if track.kind == rtc.TrackKind.KIND_AUDIO: + await self._callbacks.on_audio_track_unsubscribed(participant.sid) + + async def _async_on_data_received(self, data: rtc.DataPacket): + """Handle data received events.""" + await self._callbacks.on_data_received(data.data, data.participant.sid) + + async def _async_on_connected(self): + """Handle connected events.""" + await self._callbacks.on_connected() + + async def _async_on_disconnected(self, reason=None): + """Handle disconnected events.""" + self._connected = False + logger.info(f"Disconnected from {self._room_name}. Reason: {reason}") + await self._callbacks.on_disconnected() + + async def _process_audio_stream(self, audio_stream: rtc.AudioStream, participant_id: str): + """Process incoming audio stream from a participant.""" + logger.info(f"Started processing audio stream for participant {participant_id}") + async for event in audio_stream: + if isinstance(event, rtc.AudioFrameEvent): + await self._audio_queue.put((event, participant_id)) + else: + logger.warning(f"Received unexpected event type: {type(event)}") + + async def get_next_audio_frame(self): + """Get the next audio frame from the queue.""" + while True: + frame, participant_id = await self._audio_queue.get() + yield frame, participant_id + + def __str__(self): + """String representation of the LiveKit transport client.""" + return f"{self._transport_name}::LiveKitTransportClient" + + +class LiveKitInputTransport(BaseInputTransport): + """Handles incoming media streams and events from LiveKit rooms. + + Processes incoming audio streams from room participants and forwards them + as Pipecat frames, including audio resampling and VAD integration. + """ + + def __init__( + self, + transport: BaseTransport, + client: LiveKitTransportClient, + params: LiveKitParams, + **kwargs, + ): + """Initialize the LiveKit input transport. + + Args: + transport: The parent transport instance. + client: LiveKitTransportClient instance. + params: Configuration parameters. + **kwargs: Additional arguments passed to parent class. + """ + super().__init__(params, **kwargs) + self._transport = transport + self._client = client + + self._audio_in_task = None + self._vad_analyzer: Optional[VADAnalyzer] = params.vad_analyzer + self._resampler = create_stream_resampler() + + # Whether we have seen a StartFrame already. + self._initialized = False + + @property + def vad_analyzer(self) -> Optional[VADAnalyzer]: + """Get the Voice Activity Detection analyzer. + + Returns: + The VAD analyzer instance if configured. + """ + return self._vad_analyzer + + async def start(self, frame: StartFrame): + """Start the input transport and connect to LiveKit room. + + Args: + frame: The start frame containing initialization parameters. + """ + await super().start(frame) + + if self._initialized: + return + + self._initialized = True + + await self._client.start(frame) + await self._client.connect() + if not self._audio_in_task and self._params.audio_in_enabled: + self._audio_in_task = self.create_task(self._audio_in_task_handler()) + await self.set_transport_ready(frame) + logger.info("LiveKitInputTransport started") + + async def stop(self, frame: EndFrame): + """Stop the input transport and disconnect from LiveKit room. + + Args: + frame: The end frame signaling transport shutdown. + """ + await super().stop(frame) + await self._client.disconnect() + if self._audio_in_task: + await self.cancel_task(self._audio_in_task) + logger.info("LiveKitInputTransport stopped") + + async def cancel(self, frame: CancelFrame): + """Cancel the input transport and disconnect from LiveKit room. + + Args: + frame: The cancel frame signaling immediate cancellation. + """ + await super().cancel(frame) + await self._client.disconnect() + if self._audio_in_task and self._params.audio_in_enabled: + await self.cancel_task(self._audio_in_task) + + async def setup(self, setup: FrameProcessorSetup): + """Setup the input transport with shared client setup. + + Args: + setup: The frame processor setup configuration. + """ + await super().setup(setup) + await self._client.setup(setup) + + async def cleanup(self): + """Cleanup input transport and shared resources.""" + await super().cleanup() + await self._transport.cleanup() + + async def push_app_message(self, message: Any, sender: str): + """Push an application message as an urgent transport frame. + + Args: + message: The message data to send. + sender: ID of the message sender. + """ + frame = LiveKitTransportMessageUrgentFrame(message=message, participant_id=sender) + await self.push_frame(frame) + + async def _audio_in_task_handler(self): + """Handle incoming audio frames from participants.""" + logger.info("Audio input task started") + audio_iterator = self._client.get_next_audio_frame() + async for audio_data in audio_iterator: + if audio_data: + audio_frame_event, participant_id = audio_data + pipecat_audio_frame = await self._convert_livekit_audio_to_pipecat( + audio_frame_event + ) + + # Skip frames with no audio data + if len(pipecat_audio_frame.audio) == 0: + continue + + input_audio_frame = UserAudioRawFrame( + user_id=participant_id, + audio=pipecat_audio_frame.audio, + sample_rate=pipecat_audio_frame.sample_rate, + num_channels=pipecat_audio_frame.num_channels, + ) + await self.push_audio_frame(input_audio_frame) + + async def _convert_livekit_audio_to_pipecat( + self, audio_frame_event: rtc.AudioFrameEvent + ) -> AudioRawFrame: + """Convert LiveKit audio frame to Pipecat audio frame.""" + audio_frame = audio_frame_event.frame + + audio_data = await self._resampler.resample( + audio_frame.data.tobytes(), audio_frame.sample_rate, self.sample_rate + ) + + return AudioRawFrame( + audio=audio_data, + sample_rate=self.sample_rate, + num_channels=audio_frame.num_channels, + ) + + +class LiveKitOutputTransport(BaseOutputTransport): + """Handles outgoing media streams and events to LiveKit rooms. + + Manages sending audio frames and data messages to LiveKit room participants, + including audio format conversion for LiveKit compatibility. + """ + + def __init__( + self, + transport: BaseTransport, + client: LiveKitTransportClient, + params: LiveKitParams, + **kwargs, + ): + """Initialize the LiveKit output transport. + + Args: + transport: The parent transport instance. + client: LiveKitTransportClient instance. + params: Configuration parameters. + **kwargs: Additional arguments passed to parent class. + """ + super().__init__(params, **kwargs) + self._transport = transport + self._client = client + + # Whether we have seen a StartFrame already. + self._initialized = False + + async def start(self, frame: StartFrame): + """Start the output transport and connect to LiveKit room. + + Args: + frame: The start frame containing initialization parameters. + """ + await super().start(frame) + + if self._initialized: + return + + self._initialized = True + + await self._client.start(frame) + await self._client.connect() + await self.set_transport_ready(frame) + logger.info("LiveKitOutputTransport started") + + async def stop(self, frame: EndFrame): + """Stop the output transport and disconnect from LiveKit room. + + Args: + frame: The end frame signaling transport shutdown. + """ + await super().stop(frame) + await self._client.disconnect() + logger.info("LiveKitOutputTransport stopped") + + async def cancel(self, frame: CancelFrame): + """Cancel the output transport and disconnect from LiveKit room. + + Args: + frame: The cancel frame signaling immediate cancellation. + """ + await super().cancel(frame) + await self._client.disconnect() + + async def setup(self, setup: FrameProcessorSetup): + """Setup the output transport with shared client setup. + + Args: + setup: The frame processor setup configuration. + """ + await super().setup(setup) + await self._client.setup(setup) + + async def cleanup(self): + """Cleanup output transport and shared resources.""" + await super().cleanup() + await self._transport.cleanup() + + async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame): + """Send a transport message to participants. + + Args: + frame: The transport message frame to send. + """ + if isinstance(frame, (LiveKitTransportMessageFrame, LiveKitTransportMessageUrgentFrame)): + await self._client.send_data(frame.message.encode(), frame.participant_id) + else: + await self._client.send_data(frame.message.encode()) + + async def write_audio_frame(self, frame: OutputAudioRawFrame): + """Write an audio frame to the LiveKit room. + + Args: + frame: The audio frame to write. + """ + livekit_audio = self._convert_pipecat_audio_to_livekit(frame.audio) + await self._client.publish_audio(livekit_audio) + + def _convert_pipecat_audio_to_livekit(self, pipecat_audio: bytes) -> rtc.AudioFrame: + """Convert Pipecat audio data to LiveKit audio frame.""" + bytes_per_sample = 2 # Assuming 16-bit audio + total_samples = len(pipecat_audio) // bytes_per_sample + samples_per_channel = total_samples // self._params.audio_out_channels + + return rtc.AudioFrame( + data=pipecat_audio, + sample_rate=self.sample_rate, + num_channels=self._params.audio_out_channels, + samples_per_channel=samples_per_channel, + ) + + +class LiveKitTransport(BaseTransport): + """Transport implementation for LiveKit real-time communication. + + Provides comprehensive LiveKit integration including audio streaming, data + messaging, participant management, and room event handling for conversational + AI applications. + """ + + def __init__( + self, + url: str, + token: str, + room_name: str, + params: Optional[LiveKitParams] = None, + input_name: Optional[str] = None, + output_name: Optional[str] = None, + ): + """Initialize the LiveKit transport. + + Args: + url: LiveKit server URL to connect to. + token: Authentication token for the room. + room_name: Name of the LiveKit room to join. + params: Configuration parameters for the transport. + input_name: Optional name for the input transport. + output_name: Optional name for the output transport. + """ + super().__init__(input_name=input_name, output_name=output_name) + + callbacks = LiveKitCallbacks( + on_connected=self._on_connected, + on_disconnected=self._on_disconnected, + on_participant_connected=self._on_participant_connected, + on_participant_disconnected=self._on_participant_disconnected, + on_audio_track_subscribed=self._on_audio_track_subscribed, + on_audio_track_unsubscribed=self._on_audio_track_unsubscribed, + on_data_received=self._on_data_received, + on_first_participant_joined=self._on_first_participant_joined, + ) + self._params = params or LiveKitParams() + + self._client = LiveKitTransportClient( + url, token, room_name, self._params, callbacks, self.name + ) + self._input: Optional[LiveKitInputTransport] = None + self._output: Optional[LiveKitOutputTransport] = None + + self._register_event_handler("on_connected") + self._register_event_handler("on_disconnected") + self._register_event_handler("on_participant_connected") + self._register_event_handler("on_participant_disconnected") + self._register_event_handler("on_audio_track_subscribed") + self._register_event_handler("on_audio_track_unsubscribed") + self._register_event_handler("on_data_received") + self._register_event_handler("on_first_participant_joined") + self._register_event_handler("on_participant_left") + self._register_event_handler("on_call_state_updated") + + def input(self) -> LiveKitInputTransport: + """Get the input transport for receiving media and events. + + Returns: + The LiveKit input transport instance. + """ + if not self._input: + self._input = LiveKitInputTransport( + self, self._client, self._params, name=self._input_name + ) + return self._input + + def output(self) -> LiveKitOutputTransport: + """Get the output transport for sending media and events. + + Returns: + The LiveKit output transport instance. + """ + if not self._output: + self._output = LiveKitOutputTransport( + self, self._client, self._params, name=self._output_name + ) + return self._output + + @property + def participant_id(self) -> str: + """Get the participant ID for this transport. + + Returns: + The participant ID assigned by LiveKit. + """ + return self._client.participant_id + + async def send_audio(self, frame: OutputAudioRawFrame): + """Send an audio frame to the LiveKit room. + + Args: + frame: The audio frame to send. + """ + if self._output: + await self._output.queue_frame(frame, FrameDirection.DOWNSTREAM) + + def get_participants(self) -> List[str]: + """Get list of participant IDs in the room. + + Returns: + List of participant IDs. + """ + return self._client.get_participants() + + async def get_participant_metadata(self, participant_id: str) -> dict: + """Get metadata for a specific participant. + + Args: + participant_id: ID of the participant to get metadata for. + + Returns: + Dictionary containing participant metadata. + """ + return await self._client.get_participant_metadata(participant_id) + + async def set_metadata(self, metadata: str): + """Set metadata for the local participant. + + Args: + metadata: Metadata string to set. + """ + await self._client.set_participant_metadata(metadata) + + async def mute_participant(self, participant_id: str): + """Mute a specific participant's audio tracks. + + Args: + participant_id: ID of the participant to mute. + """ + await self._client.mute_participant(participant_id) + + async def unmute_participant(self, participant_id: str): + """Unmute a specific participant's audio tracks. + + Args: + participant_id: ID of the participant to unmute. + """ + await self._client.unmute_participant(participant_id) + + async def _on_connected(self): + """Handle room connected events.""" + await self._call_event_handler("on_connected") + + async def _on_disconnected(self): + """Handle room disconnected events.""" + await self._call_event_handler("on_disconnected") + + async def _on_participant_connected(self, participant_id: str): + """Handle participant connected events.""" + await self._call_event_handler("on_participant_connected", participant_id) + + async def _on_participant_disconnected(self, participant_id: str): + """Handle participant disconnected events.""" + await self._call_event_handler("on_participant_disconnected", participant_id) + await self._call_event_handler("on_participant_left", participant_id, "disconnected") + + async def _on_audio_track_subscribed(self, participant_id: str): + """Handle audio track subscribed events.""" + await self._call_event_handler("on_audio_track_subscribed", participant_id) + participant = self._client.room.remote_participants.get(participant_id) + if participant: + for publication in participant.audio_tracks.values(): + self._client._on_track_subscribed_wrapper( + publication.track, publication, participant + ) + + async def _on_audio_track_unsubscribed(self, participant_id: str): + """Handle audio track unsubscribed events.""" + await self._call_event_handler("on_audio_track_unsubscribed", participant_id) + + async def _on_data_received(self, data: bytes, participant_id: str): + """Handle data received events.""" + if self._input: + await self._input.push_app_message(data.decode(), participant_id) + await self._call_event_handler("on_data_received", data, participant_id) + + async def send_message(self, message: str, participant_id: Optional[str] = None): + """Send a message to participants in the room. + + Args: + message: The message string to send. + participant_id: Optional specific participant to send to. + """ + if self._output: + frame = LiveKitTransportMessageFrame(message=message, participant_id=participant_id) + await self._output.send_message(frame) + + async def send_message_urgent(self, message: str, participant_id: Optional[str] = None): + """Send an urgent message to participants in the room. + + Args: + message: The urgent message string to send. + participant_id: Optional specific participant to send to. + """ + if self._output: + frame = LiveKitTransportMessageUrgentFrame( + message=message, participant_id=participant_id + ) + await self._output.send_message(frame) + + async def on_room_event(self, event): + """Handle room events. + + Args: + event: The room event to handle. + """ + # Handle room events + pass + + async def on_participant_event(self, event): + """Handle participant events. + + Args: + event: The participant event to handle. + """ + # Handle participant events + pass + + async def on_track_event(self, event): + """Handle track events. + + Args: + event: The track event to handle. + """ + # Handle track events + pass + + async def _on_call_state_updated(self, state: str): + """Handle call state update events.""" + await self._call_event_handler("on_call_state_updated", self, state) + + async def _on_first_participant_joined(self, participant_id: str): + """Handle first participant joined events.""" + await self._call_event_handler("on_first_participant_joined", participant_id) diff --git a/src/pipecat/transports/network/fastapi_websocket.py b/src/pipecat/transports/network/fastapi_websocket.py index 8287783c2..59cb07572 100644 --- a/src/pipecat/transports/network/fastapi_websocket.py +++ b/src/pipecat/transports/network/fastapi_websocket.py @@ -11,537 +11,15 @@ using FastAPI and WebSocket connections. Supports binary and text serialization with configurable session timeouts and WAV header generation. """ -import asyncio -import io -import time -import typing -import wave -from typing import Awaitable, Callable, Optional +import warnings -from loguru import logger -from pydantic import BaseModel +from pipecat.transports.websocket.fastapi import * -from pipecat.frames.frames import ( - CancelFrame, - EndFrame, - Frame, - InputAudioRawFrame, - OutputAudioRawFrame, - StartFrame, - StartInterruptionFrame, - TransportMessageFrame, - TransportMessageUrgentFrame, -) -from pipecat.processors.frame_processor import FrameDirection -from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType -from pipecat.transports.base_input import BaseInputTransport -from pipecat.transports.base_output import BaseOutputTransport -from pipecat.transports.base_transport import BaseTransport, TransportParams - -try: - from fastapi import WebSocket - from starlette.websockets import WebSocketState -except ModuleNotFoundError as e: - logger.error(f"Exception: {e}") - logger.error( - "In order to use FastAPI websockets, you need to `pip install pipecat-ai[websocket]`." +with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "Module `pipecat.transports.network.fastapi_websocket` is deprecated, " + "use `pipecat.transports.websocket.fastapi` instead.", + DeprecationWarning, + stacklevel=2, ) - raise Exception(f"Missing module: {e}") - - -class FastAPIWebsocketParams(TransportParams): - """Configuration parameters for FastAPI WebSocket transport. - - Parameters: - add_wav_header: Whether to add WAV headers to audio frames. - serializer: Frame serializer for encoding/decoding messages. - session_timeout: Session timeout in seconds, None for no timeout. - """ - - add_wav_header: bool = False - serializer: Optional[FrameSerializer] = None - session_timeout: Optional[int] = None - - -class FastAPIWebsocketCallbacks(BaseModel): - """Callback functions for WebSocket events. - - Parameters: - on_client_connected: Called when a client connects to the WebSocket. - on_client_disconnected: Called when a client disconnects from the WebSocket. - on_session_timeout: Called when a session timeout occurs. - """ - - on_client_connected: Callable[[WebSocket], Awaitable[None]] - on_client_disconnected: Callable[[WebSocket], Awaitable[None]] - on_session_timeout: Callable[[WebSocket], Awaitable[None]] - - -class FastAPIWebsocketClient: - """WebSocket client wrapper for handling connections and message passing. - - Manages WebSocket state, message sending/receiving, and connection lifecycle - with support for both binary and text message types. - """ - - def __init__(self, websocket: WebSocket, is_binary: bool, callbacks: FastAPIWebsocketCallbacks): - """Initialize the WebSocket client. - - Args: - websocket: The FastAPI WebSocket connection. - is_binary: Whether to use binary message format. - callbacks: Event callback functions. - """ - self._websocket = websocket - self._closing = False - self._is_binary = is_binary - self._callbacks = callbacks - self._leave_counter = 0 - - async def setup(self, _: StartFrame): - """Set up the WebSocket client. - - Args: - _: The start frame (unused). - """ - self._leave_counter += 1 - - def receive(self) -> typing.AsyncIterator[bytes | str]: - """Get an async iterator for receiving WebSocket messages. - - Returns: - An async iterator yielding bytes or strings based on message type. - """ - return self._websocket.iter_bytes() if self._is_binary else self._websocket.iter_text() - - async def send(self, data: str | bytes): - """Send data through the WebSocket connection. - - Args: - data: The data to send (string or bytes). - """ - try: - if self._can_send(): - if self._is_binary: - await self._websocket.send_bytes(data) - else: - await self._websocket.send_text(data) - except Exception as e: - logger.error( - f"{self} exception sending data: {e.__class__.__name__} ({e}), application_state: {self._websocket.application_state}" - ) - # For some reason the websocket is disconnected, and we are not able to send data - # So let's properly handle it and disconnect the transport if it is not already disconnecting - if ( - self._websocket.application_state == WebSocketState.DISCONNECTED - and not self.is_closing - ): - logger.warning("Closing already disconnected websocket!") - self._closing = True - await self.trigger_client_disconnected() - - async def disconnect(self): - """Disconnect the WebSocket client.""" - self._leave_counter -= 1 - if self._leave_counter > 0: - return - - if self.is_connected and not self.is_closing: - self._closing = True - try: - await self._websocket.close() - except Exception as e: - logger.error(f"{self} exception while closing the websocket: {e}") - finally: - await self.trigger_client_disconnected() - - async def trigger_client_disconnected(self): - """Trigger the client disconnected callback.""" - await self._callbacks.on_client_disconnected(self._websocket) - - async def trigger_client_connected(self): - """Trigger the client connected callback.""" - await self._callbacks.on_client_connected(self._websocket) - - async def trigger_client_timeout(self): - """Trigger the client timeout callback.""" - await self._callbacks.on_session_timeout(self._websocket) - - def _can_send(self): - """Check if data can be sent through the WebSocket.""" - return self.is_connected and not self.is_closing - - @property - def is_connected(self) -> bool: - """Check if the WebSocket is currently connected. - - Returns: - True if the WebSocket is in connected state. - """ - return self._websocket.client_state == WebSocketState.CONNECTED - - @property - def is_closing(self) -> bool: - """Check if the WebSocket is currently closing. - - Returns: - True if the WebSocket is in the process of closing. - """ - return self._closing - - -class FastAPIWebsocketInputTransport(BaseInputTransport): - """Input transport for FastAPI WebSocket connections. - - Handles incoming WebSocket messages, deserializes frames, and manages - connection monitoring with optional session timeouts. - """ - - def __init__( - self, - transport: BaseTransport, - client: FastAPIWebsocketClient, - params: FastAPIWebsocketParams, - **kwargs, - ): - """Initialize the WebSocket input transport. - - Args: - transport: The parent transport instance. - client: The WebSocket client wrapper. - params: Transport configuration parameters. - **kwargs: Additional arguments passed to parent class. - """ - super().__init__(params, **kwargs) - self._transport = transport - self._client = client - self._params = params - self._receive_task = None - self._monitor_websocket_task = None - - # Whether we have seen a StartFrame already. - self._initialized = False - - async def start(self, frame: StartFrame): - """Start the input transport and begin message processing. - - Args: - frame: The start frame containing initialization parameters. - """ - await super().start(frame) - - if self._initialized: - return - - self._initialized = True - - await self._client.setup(frame) - if self._params.serializer: - await self._params.serializer.setup(frame) - if not self._monitor_websocket_task and self._params.session_timeout: - self._monitor_websocket_task = self.create_task(self._monitor_websocket()) - await self._client.trigger_client_connected() - if not self._receive_task: - self._receive_task = self.create_task(self._receive_messages()) - await self.set_transport_ready(frame) - - async def _stop_tasks(self): - """Stop all running tasks.""" - if self._monitor_websocket_task: - await self.cancel_task(self._monitor_websocket_task) - self._monitor_websocket_task = None - if self._receive_task: - await self.cancel_task(self._receive_task) - self._receive_task = None - - async def stop(self, frame: EndFrame): - """Stop the input transport and cleanup resources. - - Args: - frame: The end frame signaling transport shutdown. - """ - await super().stop(frame) - await self._stop_tasks() - await self._client.disconnect() - - async def cancel(self, frame: CancelFrame): - """Cancel the input transport and stop all processing. - - Args: - frame: The cancel frame signaling immediate cancellation. - """ - await super().cancel(frame) - await self._stop_tasks() - await self._client.disconnect() - - async def cleanup(self): - """Clean up transport resources.""" - await super().cleanup() - await self._transport.cleanup() - - async def _receive_messages(self): - """Main message receiving loop for WebSocket messages.""" - try: - async for message in self._client.receive(): - if not self._params.serializer: - continue - - frame = await self._params.serializer.deserialize(message) - - if not frame: - continue - - if isinstance(frame, InputAudioRawFrame): - await self.push_audio_frame(frame) - else: - await self.push_frame(frame) - except Exception as e: - logger.error(f"{self} exception receiving data: {e.__class__.__name__} ({e})") - - await self._client.trigger_client_disconnected() - - async def _monitor_websocket(self): - """Wait for self._params.session_timeout seconds, if the websocket is still open, trigger timeout event.""" - await asyncio.sleep(self._params.session_timeout) - await self._client.trigger_client_timeout() - - -class FastAPIWebsocketOutputTransport(BaseOutputTransport): - """Output transport for FastAPI WebSocket connections. - - Handles outgoing frame serialization, audio streaming with timing simulation, - and WebSocket message transmission with optional WAV header generation. - """ - - def __init__( - self, - transport: BaseTransport, - client: FastAPIWebsocketClient, - params: FastAPIWebsocketParams, - **kwargs, - ): - """Initialize the WebSocket output transport. - - Args: - transport: The parent transport instance. - client: The WebSocket client wrapper. - params: Transport configuration parameters. - **kwargs: Additional arguments passed to parent class. - """ - super().__init__(params, **kwargs) - - self._transport = transport - self._client = client - self._params = params - - # write_audio_frame() is called quickly, as soon as we get audio - # (e.g. from the TTS), and since this is just a network connection we - # would be sending it to quickly. Instead, we want to block to emulate - # an audio device, this is what the send interval is. It will be - # computed on StartFrame. - self._send_interval = 0 - self._next_send_time = 0 - - # Whether we have seen a StartFrame already. - self._initialized = False - - async def start(self, frame: StartFrame): - """Start the output transport and initialize timing. - - Args: - frame: The start frame containing initialization parameters. - """ - await super().start(frame) - - if self._initialized: - return - - self._initialized = True - - await self._client.setup(frame) - if self._params.serializer: - await self._params.serializer.setup(frame) - self._send_interval = (self.audio_chunk_size / self.sample_rate) / 2 - await self.set_transport_ready(frame) - - async def stop(self, frame: EndFrame): - """Stop the output transport and cleanup resources. - - Args: - frame: The end frame signaling transport shutdown. - """ - await super().stop(frame) - await self._write_frame(frame) - await self._client.disconnect() - - async def cancel(self, frame: CancelFrame): - """Cancel the output transport and stop all processing. - - Args: - frame: The cancel frame signaling immediate cancellation. - """ - await super().cancel(frame) - await self._write_frame(frame) - await self._client.disconnect() - - async def cleanup(self): - """Clean up transport resources.""" - await super().cleanup() - await self._transport.cleanup() - - async def process_frame(self, frame: Frame, direction: FrameDirection): - """Process outgoing frames with special handling for interruptions. - - Args: - frame: The frame to process. - direction: The direction of frame flow in the pipeline. - """ - await super().process_frame(frame, direction) - - if isinstance(frame, StartInterruptionFrame): - await self._write_frame(frame) - self._next_send_time = 0 - - async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame): - """Send a transport message frame. - - Args: - frame: The transport message frame to send. - """ - await self._write_frame(frame) - - async def write_audio_frame(self, frame: OutputAudioRawFrame): - """Write an audio frame to the WebSocket with timing simulation. - - Args: - frame: The output audio frame to write. - """ - if self._client.is_closing or not self._client.is_connected: - return - - frame = OutputAudioRawFrame( - audio=frame.audio, - sample_rate=self.sample_rate, - num_channels=self._params.audio_out_channels, - ) - - if self._params.add_wav_header: - with io.BytesIO() as buffer: - with wave.open(buffer, "wb") as wf: - wf.setsampwidth(2) - wf.setnchannels(frame.num_channels) - wf.setframerate(frame.sample_rate) - wf.writeframes(frame.audio) - wav_frame = OutputAudioRawFrame( - buffer.getvalue(), - sample_rate=frame.sample_rate, - num_channels=frame.num_channels, - ) - frame = wav_frame - - await self._write_frame(frame) - - # Simulate audio playback with a sleep. - await self._write_audio_sleep() - - async def _write_frame(self, frame: Frame): - """Serialize and send a frame through the WebSocket.""" - if not self._params.serializer: - return - - try: - payload = await self._params.serializer.serialize(frame) - if payload: - await self._client.send(payload) - except Exception as e: - logger.error(f"{self} exception sending data: {e.__class__.__name__} ({e})") - - async def _write_audio_sleep(self): - """Simulate audio playback timing with appropriate delays.""" - # Simulate a clock. - current_time = time.monotonic() - sleep_duration = max(0, self._next_send_time - current_time) - await asyncio.sleep(sleep_duration) - if sleep_duration == 0: - self._next_send_time = time.monotonic() + self._send_interval - else: - self._next_send_time += self._send_interval - - -class FastAPIWebsocketTransport(BaseTransport): - """FastAPI WebSocket transport for real-time audio/video streaming. - - Provides bidirectional WebSocket communication with frame serialization, - session management, and event handling for client connections and timeouts. - """ - - def __init__( - self, - websocket: WebSocket, - params: FastAPIWebsocketParams, - input_name: Optional[str] = None, - output_name: Optional[str] = None, - ): - """Initialize the FastAPI WebSocket transport. - - Args: - websocket: The FastAPI WebSocket connection. - params: Transport configuration parameters. - input_name: Optional name for the input processor. - output_name: Optional name for the output processor. - """ - super().__init__(input_name=input_name, output_name=output_name) - - self._params = params - - self._callbacks = FastAPIWebsocketCallbacks( - on_client_connected=self._on_client_connected, - on_client_disconnected=self._on_client_disconnected, - on_session_timeout=self._on_session_timeout, - ) - - is_binary = False - if self._params.serializer: - is_binary = self._params.serializer.type == FrameSerializerType.BINARY - self._client = FastAPIWebsocketClient(websocket, is_binary, self._callbacks) - - self._input = FastAPIWebsocketInputTransport( - self, self._client, self._params, name=self._input_name - ) - self._output = FastAPIWebsocketOutputTransport( - self, self._client, self._params, name=self._output_name - ) - - # Register supported handlers. The user will only be able to register - # these handlers. - self._register_event_handler("on_client_connected") - self._register_event_handler("on_client_disconnected") - self._register_event_handler("on_session_timeout") - - def input(self) -> FastAPIWebsocketInputTransport: - """Get the input transport processor. - - Returns: - The WebSocket input transport instance. - """ - return self._input - - def output(self) -> FastAPIWebsocketOutputTransport: - """Get the output transport processor. - - Returns: - The WebSocket output transport instance. - """ - return self._output - - async def _on_client_connected(self, websocket): - """Handle client connected event.""" - await self._call_event_handler("on_client_connected", websocket) - - async def _on_client_disconnected(self, websocket): - """Handle client disconnected event.""" - await self._call_event_handler("on_client_disconnected", websocket) - - async def _on_session_timeout(self, websocket): - """Handle session timeout event.""" - await self._call_event_handler("on_session_timeout", websocket) diff --git a/src/pipecat/transports/network/small_webrtc.py b/src/pipecat/transports/network/small_webrtc.py index 348fa28d0..f928e81de 100644 --- a/src/pipecat/transports/network/small_webrtc.py +++ b/src/pipecat/transports/network/small_webrtc.py @@ -11,925 +11,15 @@ real-time audio and video communication. It supports bidirectional media streaming, application messaging, and client connection management. """ -import asyncio -import fractions -import time -from collections import deque -from typing import Any, Awaitable, Callable, Optional - -import numpy as np -from loguru import logger -from pydantic import BaseModel - -from pipecat.frames.frames import ( - CancelFrame, - EndFrame, - Frame, - InputAudioRawFrame, - OutputAudioRawFrame, - OutputImageRawFrame, - SpriteFrame, - StartFrame, - TransportMessageFrame, - TransportMessageUrgentFrame, - UserImageRawFrame, - UserImageRequestFrame, -) -from pipecat.processors.frame_processor import FrameDirection -from pipecat.transports.base_input import BaseInputTransport -from pipecat.transports.base_output import BaseOutputTransport -from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection - -try: - import cv2 - from aiortc import VideoStreamTrack - from aiortc.mediastreams import AudioStreamTrack, MediaStreamError - from av import AudioFrame, AudioResampler, VideoFrame -except ModuleNotFoundError as e: - logger.error(f"Exception: {e}") - logger.error("In order to use the SmallWebRTC, you need to `pip install pipecat-ai[webrtc]`.") - raise Exception(f"Missing module: {e}") - -CAM_VIDEO_SOURCE = "camera" -SCREEN_VIDEO_SOURCE = "screenVideo" -MIC_AUDIO_SOURCE = "microphone" - - -class SmallWebRTCCallbacks(BaseModel): - """Callback handlers for SmallWebRTC events. - - Parameters: - on_app_message: Called when an application message is received. - on_client_connected: Called when a client establishes connection. - on_client_disconnected: Called when a client disconnects. - """ - - on_app_message: Callable[[Any], Awaitable[None]] - on_client_connected: Callable[[SmallWebRTCConnection], Awaitable[None]] - on_client_disconnected: Callable[[SmallWebRTCConnection], Awaitable[None]] - - -class RawAudioTrack(AudioStreamTrack): - """Custom audio stream track for WebRTC output. - - Handles audio frame generation and timing for WebRTC transmission, - supporting queued audio data with proper synchronization. - """ - - def __init__(self, sample_rate): - """Initialize the raw audio track. - - Args: - sample_rate: The audio sample rate in Hz. - """ - super().__init__() - self._sample_rate = sample_rate - self._samples_per_10ms = sample_rate * 10 // 1000 - self._bytes_per_10ms = self._samples_per_10ms * 2 # 16-bit (2 bytes per sample) - self._timestamp = 0 - self._start = time.time() - # Queue of (bytes, future), broken into 10ms sub chunks as needed - self._chunk_queue = deque() - - def add_audio_bytes(self, audio_bytes: bytes): - """Add audio bytes to the buffer for transmission. - - Args: - audio_bytes: Raw audio data to queue for transmission. - - Returns: - A Future that completes when the data is processed. - - Raises: - ValueError: If audio bytes are not a multiple of 10ms size. - """ - if len(audio_bytes) % self._bytes_per_10ms != 0: - raise ValueError("Audio bytes must be a multiple of 10ms size.") - future = asyncio.get_running_loop().create_future() - - # Break input into 10ms chunks - for i in range(0, len(audio_bytes), self._bytes_per_10ms): - chunk = audio_bytes[i : i + self._bytes_per_10ms] - # Only the last chunk carries the future to be resolved once fully consumed - fut = future if i + self._bytes_per_10ms >= len(audio_bytes) else None - self._chunk_queue.append((chunk, fut)) - - return future - - async def recv(self): - """Return the next audio frame for WebRTC transmission. - - Returns: - An AudioFrame containing the next audio data or silence. - """ - # Compute required wait time for synchronization - if self._timestamp > 0: - wait = self._start + (self._timestamp / self._sample_rate) - time.time() - if wait > 0: - await asyncio.sleep(wait) - - if self._chunk_queue: - chunk, future = self._chunk_queue.popleft() - if future and not future.done(): - future.set_result(True) - else: - chunk = bytes(self._bytes_per_10ms) # silence - - # Convert the byte data to an ndarray of int16 samples - samples = np.frombuffer(chunk, dtype=np.int16) - - # Create AudioFrame - frame = AudioFrame.from_ndarray(samples[None, :], layout="mono") - frame.sample_rate = self._sample_rate - frame.pts = self._timestamp - frame.time_base = fractions.Fraction(1, self._sample_rate) - self._timestamp += self._samples_per_10ms - return frame - - -class RawVideoTrack(VideoStreamTrack): - """Custom video stream track for WebRTC output. - - Handles video frame queuing and conversion for WebRTC transmission. - """ - - def __init__(self, width, height): - """Initialize the raw video track. - - Args: - width: Video frame width in pixels. - height: Video frame height in pixels. - """ - super().__init__() - self._width = width - self._height = height - self._video_buffer = asyncio.Queue() - - def add_video_frame(self, frame): - """Add a video frame to the transmission buffer. - - Args: - frame: The video frame to queue for transmission. - """ - self._video_buffer.put_nowait(frame) - - async def recv(self): - """Return the next video frame for WebRTC transmission. - - Returns: - A VideoFrame ready for WebRTC transmission. - """ - raw_frame = await self._video_buffer.get() - - # Convert bytes to NumPy array - frame_data = np.frombuffer(raw_frame.image, dtype=np.uint8).reshape( - (self._height, self._width, 3) - ) - - frame = VideoFrame.from_ndarray(frame_data, format="rgb24") - - # Assign timestamp - frame.pts, frame.time_base = await self.next_timestamp() - - return frame - - -class SmallWebRTCClient: - """WebRTC client implementation for handling connections and media streams. - - Manages WebRTC peer connections, audio/video streaming, and application - messaging through the SmallWebRTCConnection interface. - """ - - FORMAT_CONVERSIONS = { - "yuv420p": cv2.COLOR_YUV2RGB_I420, - "yuvj420p": cv2.COLOR_YUV2RGB_I420, # OpenCV treats both the same - "nv12": cv2.COLOR_YUV2RGB_NV12, - "gray": cv2.COLOR_GRAY2RGB, - } - - def __init__(self, webrtc_connection: SmallWebRTCConnection, callbacks: SmallWebRTCCallbacks): - """Initialize the WebRTC client. - - Args: - webrtc_connection: The underlying WebRTC connection handler. - callbacks: Event callbacks for connection and message handling. - """ - self._webrtc_connection = webrtc_connection - self._closing = False - self._callbacks = callbacks - - self._audio_output_track = None - self._video_output_track = None - self._audio_input_track: Optional[AudioStreamTrack] = None - self._video_input_track: Optional[VideoStreamTrack] = None - self._screen_video_track: Optional[VideoStreamTrack] = None - - self._params = None - self._audio_in_channels = None - self._in_sample_rate = None - self._out_sample_rate = None - self._leave_counter = 0 - - # We are always resampling it for 16000 if the sample_rate that we receive is bigger than that. - # otherwise we face issues with Silero VAD - self._pipecat_resampler = AudioResampler("s16", "mono", 16000) - - @self._webrtc_connection.event_handler("connected") - async def on_connected(connection: SmallWebRTCConnection): - logger.debug("Peer connection established.") - await self._handle_client_connected() - - @self._webrtc_connection.event_handler("disconnected") - async def on_disconnected(connection: SmallWebRTCConnection): - logger.debug("Peer connection lost.") - await self._handle_peer_disconnected() - - @self._webrtc_connection.event_handler("closed") - async def on_closed(connection: SmallWebRTCConnection): - logger.debug("Client connection closed.") - await self._handle_client_closed() - - @self._webrtc_connection.event_handler("app-message") - async def on_app_message(connection: SmallWebRTCConnection, message: Any): - await self._handle_app_message(message) - - def _convert_frame(self, frame_array: np.ndarray, format_name: str) -> np.ndarray: - """Convert a video frame to RGB format based on the input format. - - Args: - frame_array: The input frame as a NumPy array. - format_name: The format of the input frame. - - Returns: - The converted RGB frame as a NumPy array. - - Raises: - ValueError: If the format is unsupported. - """ - if format_name.startswith("rgb"): # Already in RGB, no conversion needed - return frame_array - - conversion_code = SmallWebRTCClient.FORMAT_CONVERSIONS.get(format_name) - - if conversion_code is None: - raise ValueError(f"Unsupported format: {format_name}") - - return cv2.cvtColor(frame_array, conversion_code) - - async def read_video_frame(self, video_source: str): - """Read video frames from the WebRTC connection. - - Reads a video frame from the given MediaStreamTrack, converts it to RGB, - and creates an InputImageRawFrame. - - Args: - video_source: Video source to capture ("camera" or "screenVideo"). - - Yields: - UserImageRawFrame objects containing video data from the peer. - """ - while True: - video_track = ( - self._video_input_track - if video_source == CAM_VIDEO_SOURCE - else self._screen_video_track - ) - if video_track is None: - await asyncio.sleep(0.01) - continue - - try: - frame = await asyncio.wait_for(video_track.recv(), timeout=2.0) - except asyncio.TimeoutError: - if self._webrtc_connection.is_connected(): - logger.warning("Timeout: No video frame received within the specified time.") - # self._webrtc_connection.ask_to_renegotiate() - frame = None - except MediaStreamError: - logger.warning("Received an unexpected media stream error while reading the audio.") - frame = None - - if frame is None or not isinstance(frame, VideoFrame): - # If no valid frame, sleep for a bit - await asyncio.sleep(0.01) - continue - - format_name = frame.format.name - # Convert frame to NumPy array in its native format - frame_array = frame.to_ndarray(format=format_name) - frame_rgb = self._convert_frame(frame_array, format_name) - - image_frame = UserImageRawFrame( - user_id=self._webrtc_connection.pc_id, - image=frame_rgb.tobytes(), - size=(frame.width, frame.height), - format="RGB", - ) - image_frame.transport_source = video_source - - yield image_frame - - async def read_audio_frame(self): - """Read audio frames from the WebRTC connection. - - Reads 20ms of audio from the given MediaStreamTrack and creates an InputAudioRawFrame. - - Yields: - InputAudioRawFrame objects containing audio data from the peer. - """ - while True: - if self._audio_input_track is None: - await asyncio.sleep(0.01) - continue - - try: - frame = await asyncio.wait_for(self._audio_input_track.recv(), timeout=2.0) - except asyncio.TimeoutError: - if self._webrtc_connection.is_connected(): - logger.warning("Timeout: No audio frame received within the specified time.") - frame = None - except MediaStreamError: - logger.warning("Received an unexpected media stream error while reading the audio.") - frame = None - - if frame is None or not isinstance(frame, AudioFrame): - # If we don't read any audio let's sleep for a little bit (i.e. busy wait). - await asyncio.sleep(0.01) - continue - - if frame.sample_rate > self._in_sample_rate: - resampled_frames = self._pipecat_resampler.resample(frame) - for resampled_frame in resampled_frames: - # 16-bit PCM bytes - pcm_bytes = resampled_frame.to_ndarray().astype(np.int16).tobytes() - audio_frame = InputAudioRawFrame( - audio=pcm_bytes, - sample_rate=resampled_frame.sample_rate, - num_channels=self._audio_in_channels, - ) - yield audio_frame - else: - # 16-bit PCM bytes - pcm_bytes = frame.to_ndarray().astype(np.int16).tobytes() - audio_frame = InputAudioRawFrame( - audio=pcm_bytes, - sample_rate=frame.sample_rate, - num_channels=self._audio_in_channels, - ) - yield audio_frame - - async def write_audio_frame(self, frame: OutputAudioRawFrame): - """Write an audio frame to the WebRTC connection. - - Args: - frame: The audio frame to transmit. - """ - if self._can_send() and self._audio_output_track: - await self._audio_output_track.add_audio_bytes(frame.audio) - - async def write_video_frame(self, frame: OutputImageRawFrame): - """Write a video frame to the WebRTC connection. - - Args: - frame: The video frame to transmit. - """ - if self._can_send() and self._video_output_track: - self._video_output_track.add_video_frame(frame) - - async def setup(self, _params: TransportParams, frame): - """Set up the client with transport parameters. - - Args: - _params: Transport configuration parameters. - frame: The initialization frame containing setup data. - """ - self._audio_in_channels = _params.audio_in_channels - self._in_sample_rate = _params.audio_in_sample_rate or frame.audio_in_sample_rate - self._out_sample_rate = _params.audio_out_sample_rate or frame.audio_out_sample_rate - self._params = _params - self._leave_counter += 1 - - async def connect(self): - """Establish the WebRTC connection.""" - if self._webrtc_connection.is_connected(): - # already initialized - return - - logger.info(f"Connecting to Small WebRTC") - await self._webrtc_connection.connect() - - async def disconnect(self): - """Disconnect from the WebRTC peer.""" - self._leave_counter -= 1 - if self._leave_counter > 0: - return - - if self.is_connected and not self.is_closing: - logger.info(f"Disconnecting to Small WebRTC") - self._closing = True - await self._webrtc_connection.disconnect() - await self._handle_peer_disconnected() - - async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame): - """Send an application message through the WebRTC connection. - - Args: - frame: The message frame to send. - """ - if self._can_send(): - self._webrtc_connection.send_app_message(frame.message) - - async def _handle_client_connected(self): - """Handle client connection establishment.""" - # There is nothing to do here yet, the pipeline is still not ready - if not self._params: - return - - self._audio_input_track = self._webrtc_connection.audio_input_track() - self._video_input_track = self._webrtc_connection.video_input_track() - self._screen_video_track = self._webrtc_connection.screen_video_input_track() - if self._params.audio_out_enabled: - self._audio_output_track = RawAudioTrack(sample_rate=self._out_sample_rate) - self._webrtc_connection.replace_audio_track(self._audio_output_track) - - if self._params.video_out_enabled: - self._video_output_track = RawVideoTrack( - width=self._params.video_out_width, height=self._params.video_out_height - ) - self._webrtc_connection.replace_video_track(self._video_output_track) - - await self._callbacks.on_client_connected(self._webrtc_connection) - - async def _handle_peer_disconnected(self): - """Handle peer disconnection cleanup.""" - self._audio_input_track = None - self._video_input_track = None - self._screen_video_track = None - self._audio_output_track = None - self._video_output_track = None - - async def _handle_client_closed(self): - """Handle client connection closure.""" - self._audio_input_track = None - self._video_input_track = None - self._screen_video_track = None - self._audio_output_track = None - self._video_output_track = None - await self._callbacks.on_client_disconnected(self._webrtc_connection) - - async def _handle_app_message(self, message: Any): - """Handle incoming application messages.""" - await self._callbacks.on_app_message(message) - - def _can_send(self): - """Check if the connection is ready for sending data.""" - return self.is_connected and not self.is_closing - - @property - def is_connected(self) -> bool: - """Check if the WebRTC connection is established. - - Returns: - True if connected to the peer. - """ - return self._webrtc_connection.is_connected() - - @property - def is_closing(self) -> bool: - """Check if the connection is in the process of closing. - - Returns: - True if the connection is closing. - """ - return self._closing - - -class SmallWebRTCInputTransport(BaseInputTransport): - """Input transport implementation for SmallWebRTC. - - Handles incoming audio and video streams from WebRTC peers, - including user image requests and application message handling. - """ - - def __init__( - self, - client: SmallWebRTCClient, - params: TransportParams, - **kwargs, - ): - """Initialize the WebRTC input transport. - - Args: - client: The WebRTC client instance. - params: Transport configuration parameters. - **kwargs: Additional arguments passed to parent class. - """ - super().__init__(params, **kwargs) - self._client = client - self._params = params - self._receive_audio_task = None - self._receive_video_task = None - self._receive_screen_video_task = None - self._image_requests = {} - - # Whether we have seen a StartFrame already. - self._initialized = False - - async def process_frame(self, frame: Frame, direction: FrameDirection): - """Process incoming frames including user image requests. - - Args: - frame: The frame to process. - direction: The direction of frame flow in the pipeline. - """ - await super().process_frame(frame, direction) - - if isinstance(frame, UserImageRequestFrame): - await self.request_participant_image(frame) - - async def start(self, frame: StartFrame): - """Start the input transport and establish WebRTC connection. - - Args: - frame: The start frame containing initialization parameters. - """ - await super().start(frame) - - if self._initialized: - return - - self._initialized = True - - await self._client.setup(self._params, frame) - await self._client.connect() - await self.set_transport_ready(frame) - if not self._receive_audio_task and self._params.audio_in_enabled: - self._receive_audio_task = self.create_task(self._receive_audio()) - if not self._receive_video_task and self._params.video_in_enabled: - self._receive_video_task = self.create_task(self._receive_video(CAM_VIDEO_SOURCE)) - - async def _stop_tasks(self): - """Stop all background tasks.""" - if self._receive_audio_task: - await self.cancel_task(self._receive_audio_task) - self._receive_audio_task = None - if self._receive_video_task: - await self.cancel_task(self._receive_video_task) - self._receive_video_task = None - - async def stop(self, frame: EndFrame): - """Stop the input transport and disconnect from WebRTC. - - Args: - frame: The end frame signaling transport shutdown. - """ - await super().stop(frame) - await self._stop_tasks() - await self._client.disconnect() - - async def cancel(self, frame: CancelFrame): - """Cancel the input transport and disconnect immediately. - - Args: - frame: The cancel frame signaling immediate cancellation. - """ - await super().cancel(frame) - await self._stop_tasks() - await self._client.disconnect() - - async def _receive_audio(self): - """Background task for receiving audio frames from WebRTC.""" - try: - audio_iterator = self._client.read_audio_frame() - async for audio_frame in audio_iterator: - if audio_frame: - await self.push_audio_frame(audio_frame) - - except Exception as e: - logger.error(f"{self} exception receiving data: {e.__class__.__name__} ({e})") - - async def _receive_video(self, video_source: str): - """Background task for receiving video frames from WebRTC. - - Args: - video_source: Video source to capture ("camera" or "screenVideo"). - """ - try: - video_iterator = self._client.read_video_frame(video_source) - async for video_frame in video_iterator: - if video_frame: - await self.push_video_frame(video_frame) - - # Check if there are any pending image requests and create UserImageRawFrame - if self._image_requests: - for req_id, request_frame in list(self._image_requests.items()): - if request_frame.video_source == video_source: - # Create UserImageRawFrame using the current video frame - image_frame = UserImageRawFrame( - user_id=request_frame.user_id, - request=request_frame, - image=video_frame.image, - size=video_frame.size, - format=video_frame.format, - ) - image_frame.transport_source = video_source - # Push the frame to the pipeline - await self.push_video_frame(image_frame) - # Remove from pending requests - del self._image_requests[req_id] - - except Exception as e: - logger.error(f"{self} exception receiving data: {e.__class__.__name__} ({e})") - - async def push_app_message(self, message: Any): - """Push an application message into the pipeline. - - Args: - message: The application message to process. - """ - logger.debug(f"Received app message inside SmallWebRTCInputTransport {message}") - frame = TransportMessageUrgentFrame(message=message) - await self.push_frame(frame) - - # Add this method similar to DailyInputTransport.request_participant_image - async def request_participant_image(self, frame: UserImageRequestFrame): - """Request an image frame from the participant's video stream. - - When a UserImageRequestFrame is received, this method will store the request - and the next video frame received will be converted to a UserImageRawFrame. - - Args: - frame: The user image request frame. - """ - logger.debug(f"Requesting image from participant: {frame.user_id}") - - # Store the request - request_id = f"{frame.function_name}:{frame.tool_call_id}" - self._image_requests[request_id] = frame - - # Default to camera if no source specified - if frame.video_source is None: - frame.video_source = CAM_VIDEO_SOURCE - # If we're not already receiving video, try to get a frame now - if ( - frame.video_source == CAM_VIDEO_SOURCE - and not self._receive_video_task - and self._params.video_in_enabled - ): - # Start video reception if it's not already running - self._receive_video_task = self.create_task(self._receive_video(CAM_VIDEO_SOURCE)) - elif ( - frame.video_source == SCREEN_VIDEO_SOURCE - and not self._receive_screen_video_task - and self._params.video_in_enabled - ): - # Start screen video reception if it's not already running - self._receive_screen_video_task = self.create_task( - self._receive_video(SCREEN_VIDEO_SOURCE) - ) - - async def capture_participant_media( - self, - source: str = CAM_VIDEO_SOURCE, - ): - """Capture media from a specific participant. - - Args: - source: Media source to capture from. ("camera", "microphone", or "screenVideo") - """ - # If we're not already receiving video, try to get a frame now - if ( - source == MIC_AUDIO_SOURCE - and not self._receive_audio_task - and self._params.audio_in_enabled - ): - # Start audio reception if it's not already running - self._receive_audio_task = self.create_task(self._receive_audio()) - elif ( - source == CAM_VIDEO_SOURCE - and not self._receive_video_task - and self._params.video_in_enabled - ): - # Start video reception if it's not already running - self._receive_video_task = self.create_task(self._receive_video(CAM_VIDEO_SOURCE)) - elif ( - source == SCREEN_VIDEO_SOURCE - and not self._receive_screen_video_task - and self._params.video_in_enabled - ): - # Start screen video reception if it's not already running - self._receive_screen_video_task = self.create_task( - self._receive_video(SCREEN_VIDEO_SOURCE) - ) - - -class SmallWebRTCOutputTransport(BaseOutputTransport): - """Output transport implementation for SmallWebRTC. - - Handles outgoing audio and video streams to WebRTC peers, - including transport message sending. - """ - - def __init__( - self, - client: SmallWebRTCClient, - params: TransportParams, - **kwargs, - ): - """Initialize the WebRTC output transport. - - Args: - client: The WebRTC client instance. - params: Transport configuration parameters. - **kwargs: Additional arguments passed to parent class. - """ - super().__init__(params, **kwargs) - self._client = client - self._params = params - - # Whether we have seen a StartFrame already. - self._initialized = False - - async def start(self, frame: StartFrame): - """Start the output transport and establish WebRTC connection. - - Args: - frame: The start frame containing initialization parameters. - """ - await super().start(frame) - - if self._initialized: - return - - self._initialized = True - - await self._client.setup(self._params, frame) - await self._client.connect() - await self.set_transport_ready(frame) - - async def stop(self, frame: EndFrame): - """Stop the output transport and disconnect from WebRTC. - - Args: - frame: The end frame signaling transport shutdown. - """ - await super().stop(frame) - await self._client.disconnect() - - async def cancel(self, frame: CancelFrame): - """Cancel the output transport and disconnect immediately. - - Args: - frame: The cancel frame signaling immediate cancellation. - """ - await super().cancel(frame) - await self._client.disconnect() - - async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame): - """Send a transport message through the WebRTC connection. - - Args: - frame: The transport message frame to send. - """ - await self._client.send_message(frame) - - async def write_audio_frame(self, frame: OutputAudioRawFrame): - """Write an audio frame to the WebRTC connection. - - Args: - frame: The output audio frame to transmit. - """ - await self._client.write_audio_frame(frame) - - async def write_video_frame(self, frame: OutputImageRawFrame): - """Write a video frame to the WebRTC connection. - - Args: - frame: The output video frame to transmit. - """ - await self._client.write_video_frame(frame) - - -class SmallWebRTCTransport(BaseTransport): - """WebRTC transport implementation for real-time communication. - - Provides bidirectional audio and video streaming over WebRTC connections - with support for application messaging and connection event handling. - """ - - def __init__( - self, - webrtc_connection: SmallWebRTCConnection, - params: TransportParams, - input_name: Optional[str] = None, - output_name: Optional[str] = None, - ): - """Initialize the WebRTC transport. - - Args: - webrtc_connection: The underlying WebRTC connection handler. - params: Transport configuration parameters. - input_name: Optional name for the input processor. - output_name: Optional name for the output processor. - """ - super().__init__(input_name=input_name, output_name=output_name) - self._params = params - - self._callbacks = SmallWebRTCCallbacks( - on_app_message=self._on_app_message, - on_client_connected=self._on_client_connected, - on_client_disconnected=self._on_client_disconnected, - ) - - self._client = SmallWebRTCClient(webrtc_connection, self._callbacks) - - self._input: Optional[SmallWebRTCInputTransport] = None - self._output: Optional[SmallWebRTCOutputTransport] = None - - # Register supported handlers. The user will only be able to register - # these handlers. - self._register_event_handler("on_app_message") - self._register_event_handler("on_client_connected") - self._register_event_handler("on_client_disconnected") - - def input(self) -> SmallWebRTCInputTransport: - """Get the input transport processor. - - Returns: - The input transport for handling incoming media streams. - """ - if not self._input: - self._input = SmallWebRTCInputTransport( - self._client, self._params, name=self._input_name - ) - return self._input - - def output(self) -> SmallWebRTCOutputTransport: - """Get the output transport processor. - - Returns: - The output transport for handling outgoing media streams. - """ - if not self._output: - self._output = SmallWebRTCOutputTransport( - self._client, self._params, name=self._input_name - ) - return self._output - - async def send_image(self, frame: OutputImageRawFrame | SpriteFrame): - """Send an image frame through the transport. - - Args: - frame: The image frame to send. - """ - if self._output: - await self._output.queue_frame(frame, FrameDirection.DOWNSTREAM) - - async def send_audio(self, frame: OutputAudioRawFrame): - """Send an audio frame through the transport. - - Args: - frame: The audio frame to send. - """ - if self._output: - await self._output.queue_frame(frame, FrameDirection.DOWNSTREAM) - - async def _on_app_message(self, message: Any): - """Handle incoming application messages.""" - if self._input: - await self._input.push_app_message(message) - await self._call_event_handler("on_app_message", message) - - async def _on_client_connected(self, webrtc_connection): - """Handle client connection events.""" - await self._call_event_handler("on_client_connected", webrtc_connection) - - async def _on_client_disconnected(self, webrtc_connection): - """Handle client disconnection events.""" - await self._call_event_handler("on_client_disconnected", webrtc_connection) - - async def capture_participant_video( - self, - video_source: str = CAM_VIDEO_SOURCE, - ): - """Capture video from a specific participant. - - Args: - video_source: Video source to capture from ("camera" or "screenVideo"). - """ - if self._input: - await self._input.capture_participant_media(source=video_source) - - async def capture_participant_audio( - self, - audio_source: str = MIC_AUDIO_SOURCE, - ): - """Capture audio from a specific participant. - - Args: - audio_source: Audio source to capture from. (currently, "microphone" is the only supported option) - """ - if self._input: - await self._input.capture_participant_media(source=audio_source) +import warnings + +from pipecat.transports.smallwebrtc.transport import * + +with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "Module `pipecat.transports.network.small_webrtc` is deprecated, " + "use `pipecat.transports.smallwebrtc.transport` instead.", + DeprecationWarning, + stacklevel=2, + ) diff --git a/src/pipecat/transports/network/webrtc_connection.py b/src/pipecat/transports/network/webrtc_connection.py index 420656f85..7e6c1782d 100644 --- a/src/pipecat/transports/network/webrtc_connection.py +++ b/src/pipecat/transports/network/webrtc_connection.py @@ -11,602 +11,15 @@ with support for audio/video tracks, data channels, and signaling for real-time communication applications. """ -import asyncio -import json -import time -from typing import Any, List, Literal, Optional, Union +import warnings -from loguru import logger -from pydantic import BaseModel, TypeAdapter +from pipecat.transports.smallwebrtc.connection import * -from pipecat.utils.base_object import BaseObject - -try: - from aiortc import ( - MediaStreamTrack, - RTCConfiguration, - RTCIceServer, - RTCPeerConnection, - RTCSessionDescription, +with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "Module `pipecat.transports.network.webrtc_connection` is deprecated, " + "use `pipecat.transports.smallwebrtc.connection` instead.", + DeprecationWarning, + stacklevel=2, ) - from aiortc.rtcrtpreceiver import RemoteStreamTrack - from av.frame import Frame -except ModuleNotFoundError as e: - logger.error(f"Exception: {e}") - logger.error("In order to use the SmallWebRTC, you need to `pip install pipecat-ai[webrtc]`.") - raise Exception(f"Missing module: {e}") - -SIGNALLING_TYPE = "signalling" -AUDIO_TRANSCEIVER_INDEX = 0 -VIDEO_TRANSCEIVER_INDEX = 1 -SCREEN_VIDEO_TRANSCEIVER_INDEX = 2 - - -class TrackStatusMessage(BaseModel): - """Message for updating track enabled/disabled status. - - Parameters: - type: Message type identifier. - receiver_index: Index of the track receiver to update. - enabled: Whether the track should be enabled or disabled. - """ - - type: Literal["trackStatus"] - receiver_index: int - enabled: bool - - -class RenegotiateMessage(BaseModel): - """Message requesting WebRTC renegotiation. - - Parameters: - type: Message type identifier for renegotiation requests. - """ - - type: Literal["renegotiate"] = "renegotiate" - - -class PeerLeftMessage(BaseModel): - """Message indicating a peer has left the connection. - - Parameters: - type: Message type identifier for peer departure. - """ - - type: Literal["peerLeft"] = "peerLeft" - - -class SignallingMessage: - """Union types for signaling message handling. - - Parameters: - Inbound: Types of messages that can be received from peers. - outbound: Types of messages that can be sent to peers. - """ - - Inbound = Union[TrackStatusMessage] # in case we need to add new messages in the future - outbound = Union[RenegotiateMessage] - - -class SmallWebRTCTrack: - """Wrapper for WebRTC media tracks with enabled/disabled state management. - - Provides additional functionality on top of aiortc MediaStreamTrack including - enable/disable control and frame discarding for audio and video streams. - """ - - def __init__(self, track: MediaStreamTrack): - """Initialize the WebRTC track wrapper. - - Args: - track: The underlying MediaStreamTrack to wrap. - index: The index of the track in the transceiver (0 for mic, 1 for cam, 2 for screen) - """ - self._track = track - self._enabled = True - - def set_enabled(self, enabled: bool) -> None: - """Enable or disable the track. - - Args: - enabled: Whether the track should be enabled for receiving frames. - """ - self._enabled = enabled - - def is_enabled(self) -> bool: - """Check if the track is currently enabled. - - Returns: - True if the track is enabled for receiving frames. - """ - return self._enabled - - async def discard_old_frames(self): - """Discard old frames from the track queue to reduce latency.""" - remote_track = self._track - if isinstance(remote_track, RemoteStreamTrack): - if not hasattr(remote_track, "_queue") or not isinstance( - remote_track._queue, asyncio.Queue - ): - print("Warning: _queue does not exist or has changed in aiortc.") - return - logger.debug("Discarding old frames") - while not remote_track._queue.empty(): - remote_track._queue.get_nowait() # Remove the oldest frame - remote_track._queue.task_done() - - async def recv(self) -> Optional[Frame]: - """Receive the next frame from the track. - - Returns: - The next frame, except for video tracks, where it returns the frame only if the track is enabled, otherwise, returns None. - """ - if not self._enabled and self._track.kind == "video": - return None - return await self._track.recv() - - def __getattr__(self, name): - """Forward attribute access to the underlying track. - - Args: - name: The attribute name to access. - - Returns: - The attribute value from the underlying track. - """ - # Forward other attribute/method calls to the underlying track - return getattr(self._track, name) - - -# Alias so we don't need to expose RTCIceServer -IceServer = RTCIceServer - - -class SmallWebRTCConnection(BaseObject): - """WebRTC connection implementation using aiortc. - - Provides WebRTC peer connection functionality including ICE server configuration, - track management, data channel communication, and connection state handling - for real-time audio/video communication. - """ - - def __init__(self, ice_servers: Optional[Union[List[str], List[IceServer]]] = None): - """Initialize the WebRTC connection. - - Args: - ice_servers: List of ICE servers as URLs or IceServer objects. - - Raises: - TypeError: If ice_servers contains mixed types or unsupported types. - """ - super().__init__() - if not ice_servers: - self.ice_servers: List[IceServer] = [] - elif all(isinstance(s, IceServer) for s in ice_servers): - self.ice_servers = ice_servers - elif all(isinstance(s, str) for s in ice_servers): - self.ice_servers = [IceServer(urls=s) for s in ice_servers] - else: - raise TypeError("ice_servers must be either List[str] or List[RTCIceServer]") - self._connect_invoked = False - self._track_map = {} - self._track_getters = { - AUDIO_TRANSCEIVER_INDEX: self.audio_input_track, - VIDEO_TRANSCEIVER_INDEX: self.video_input_track, - SCREEN_VIDEO_TRANSCEIVER_INDEX: self.screen_video_input_track, - } - - self._initialize() - - # Register supported handlers. The user will only be able to register - # these handlers. - self._register_event_handler("app-message") - self._register_event_handler("track-started") - self._register_event_handler("track-ended") - # connection states - self._register_event_handler("connecting") - self._register_event_handler("connected") - self._register_event_handler("disconnected") - self._register_event_handler("closed") - self._register_event_handler("failed") - self._register_event_handler("new") - - @property - def pc(self) -> RTCPeerConnection: - """Get the underlying RTCPeerConnection. - - Returns: - The aiortc RTCPeerConnection instance. - """ - return self._pc - - @property - def pc_id(self) -> str: - """Get the peer connection identifier. - - Returns: - The unique identifier for this peer connection. - """ - return self._pc_id - - def _initialize(self): - """Initialize the peer connection and associated components.""" - logger.debug("Initializing new peer connection") - rtc_config = RTCConfiguration(iceServers=self.ice_servers) - - self._answer: Optional[RTCSessionDescription] = None - self._pc = RTCPeerConnection(rtc_config) - self._pc_id = self.name - self._setup_listeners() - self._data_channel = None - self._renegotiation_in_progress = False - self._last_received_time = None - self._message_queue = [] - self._pending_app_messages = [] - - def _setup_listeners(self): - """Set up event listeners for the peer connection.""" - - @self._pc.on("datachannel") - def on_datachannel(channel): - self._data_channel = channel - - # Flush queued messages once the data channel is open - @channel.on("open") - async def on_open(): - logger.debug("Data channel is open, flushing queued messages") - while self._message_queue: - message = self._message_queue.pop(0) - self._data_channel.send(message) - - @channel.on("message") - async def on_message(message): - try: - # aiortc does not provide any way so we can be aware when we are disconnected, - # so we are using this keep alive message as a way to implement that - if isinstance(message, str) and message.startswith("ping"): - self._last_received_time = time.time() - else: - json_message = json.loads(message) - if json_message["type"] == SIGNALLING_TYPE and json_message.get("message"): - self._handle_signalling_message(json_message["message"]) - else: - if self.is_connected(): - await self._call_event_handler("app-message", json_message) - else: - logger.debug("Client not connected. Queuing app-message.") - self._pending_app_messages.append(json_message) - except Exception as e: - logger.exception(f"Error parsing JSON message {message}, {e}") - - # Despite the fact that aiortc provides this listener, they don't have a status for "disconnected" - # So, in case we loose connection, this event will not be triggered - @self._pc.on("connectionstatechange") - async def on_connectionstatechange(): - await self._handle_new_connection_state() - - # Despite the fact that aiortc provides this listener, they don't have a status for "disconnected" - # So, in case we loose connection, this event will not be triggered - @self._pc.on("iceconnectionstatechange") - async def on_iceconnectionstatechange(): - logger.debug( - f"ICE connection state is {self._pc.iceConnectionState}, connection is {self._pc.connectionState}" - ) - - @self._pc.on("icegatheringstatechange") - async def on_icegatheringstatechange(): - logger.debug(f"ICE gathering state is {self._pc.iceGatheringState}") - - @self._pc.on("track") - async def on_track(track): - logger.debug(f"Track {track.kind} received") - await self._call_event_handler("track-started", track) - - @track.on("ended") - async def on_ended(): - logger.debug(f"Track {track.kind} ended") - await self._call_event_handler("track-ended", track) - - async def _create_answer(self, sdp: str, type: str): - """Create an SDP answer for the given offer.""" - offer = RTCSessionDescription(sdp=sdp, type=type) - await self._pc.setRemoteDescription(offer) - - # For some reason, aiortc is not respecting the SDP for the transceivers to be sendrcv - # so we are basically forcing it to act this way - self.force_transceivers_to_send_recv() - - # this answer does not contain the ice candidates, which will be gathered later, after the setLocalDescription - logger.debug(f"Creating answer") - local_answer = await self._pc.createAnswer() - await self._pc.setLocalDescription(local_answer) - logger.debug(f"Setting the answer after the local description is created") - self._answer = self._pc.localDescription - - async def initialize(self, sdp: str, type: str): - """Initialize the connection with an SDP offer. - - Args: - sdp: The SDP offer string. - type: The SDP type (usually "offer"). - """ - await self._create_answer(sdp, type) - - async def connect(self): - """Connect the WebRTC peer connection and handle initial setup.""" - self._connect_invoked = True - # If we already connected, trigger again the connected event - if self.is_connected(): - await self._call_event_handler("connected") - logger.debug("Flushing pending app-messages") - for message in self._pending_app_messages: - await self._call_event_handler("app-message", message) - # We are renegotiating here, because likely we have loose the first video frames - # and aiortc does not handle that pretty well. - video_input_track = self.video_input_track() - if video_input_track: - await self.video_input_track().discard_old_frames() - screen_video_input_track = self.screen_video_input_track() - if screen_video_input_track: - await self.screen_video_input_track().discard_old_frames() - if video_input_track or screen_video_input_track: - # This prevents an issue where sometimes the WebRTC connection can be established - # before the bot is ready to receive video. When that happens, we can lose a couple - # of seconds of video before we received a key frame to finally start displaying it. - self.ask_to_renegotiate() - - async def renegotiate(self, sdp: str, type: str, restart_pc: bool = False): - """Renegotiate the WebRTC connection with new parameters. - - Args: - sdp: The new SDP offer string. - type: The SDP type (usually "offer"). - restart_pc: Whether to restart the peer connection entirely. - """ - logger.debug(f"Renegotiating {self._pc_id}") - - if restart_pc: - await self._call_event_handler("disconnected") - logger.debug("Closing old peer connection") - # removing the listeners to prevent the bot from closing - self._pc.remove_all_listeners() - await self._close() - # we are initializing a new peer connection in this case. - self._initialize() - - await self._create_answer(sdp, type) - - # Maybe we should refactor to receive a message from the client side when the renegotiation is completed. - # or look at the peer connection listeners - # but this is good enough for now for testing. - async def delayed_task(): - await asyncio.sleep(2) - self._renegotiation_in_progress = False - - asyncio.create_task(delayed_task()) - - def force_transceivers_to_send_recv(self): - """Force all transceivers to bidirectional send/receive mode.""" - transceivers = self._pc.getTransceivers() - # For now, we only support sendrecv for camera audio and video (the first two transceivers) - for i, transceiver in enumerate(transceivers): - if i < 2: # First two transceivers (camera audio and video) - transceiver.direction = "sendrecv" - else: - transceiver.direction = "recvonly" - # logger.debug( - # f"Transceiver: {transceiver}, Mid: {transceiver.mid}, Direction: {transceiver.direction}" - # ) - # logger.debug(f"Sender track: {transceiver.sender.track}") - - def replace_audio_track(self, track): - """Replace the audio track in the first transceiver. - - Args: - track: The new audio track to use for sending. - """ - logger.debug(f"Replacing audio track {track.kind}") - # Transceivers always appear in creation-order for both peers - # For now we are only considering that we are going to have 02 transceivers, - # one for audio and one for video - transceivers = self._pc.getTransceivers() - if len(transceivers) > 0 and transceivers[0].sender: - transceivers[0].sender.replaceTrack(track) - else: - logger.warning("Audio transceiver not found. Cannot replace audio track.") - - def replace_video_track(self, track): - """Replace the video track in the second transceiver. - - Args: - track: The new video track to use for sending. - """ - logger.debug(f"Replacing video track {track.kind}") - # Transceivers always appear in creation-order for both peers - # For now we are only considering that we are going to have 02 transceivers, - # one for audio and one for video - transceivers = self._pc.getTransceivers() - if len(transceivers) > 1 and transceivers[1].sender: - transceivers[1].sender.replaceTrack(track) - else: - logger.warning("Video transceiver not found. Cannot replace video track.") - - def replace_screen_video_track(self, track): - """Replace the screen video track in the second transceiver. - - Args: - track: The new screen video track to use for sending. - """ - logger.debug(f"Replacing screen video track {track.kind}") - # Transceivers always appear in creation-order for both peers - # For now we are only considering that we are going to have 02 transceivers, - # one for audio and one for video - transceivers = self._pc.getTransceivers() - if len(transceivers) > 2 and transceivers[2].sender: - transceivers[2].sender.replaceTrack(track) - else: - logger.warning("Screen video transceiver not found. Cannot replace screen video track.") - - async def disconnect(self): - """Disconnect from the WebRTC peer connection.""" - self.send_app_message({"type": SIGNALLING_TYPE, "message": PeerLeftMessage().model_dump()}) - await self._close() - - async def _close(self): - """Close the peer connection and cleanup resources.""" - if self._pc: - await self._pc.close() - self._message_queue.clear() - self._pending_app_messages.clear() - self._track_map = {} - - def get_answer(self): - """Get the SDP answer for the current connection. - - Returns: - Dictionary containing SDP answer, type, and peer connection ID, - or None if no answer is available. - """ - if not self._answer: - return None - - return { - "sdp": self._answer.sdp, - "type": self._answer.type, - "pc_id": self._pc_id, - } - - async def _handle_new_connection_state(self): - """Handle changes in the peer connection state.""" - state = self._pc.connectionState - if state == "connected" and not self._connect_invoked: - # We are going to wait until the pipeline is ready before triggering the event - return - logger.debug(f"Connection state changed to: {state}") - await self._call_event_handler(state) - if state == "failed": - logger.warning("Connection failed, closing peer connection.") - await self._close() - - # Despite the fact that aiortc provides this listener, they don't have a status for "disconnected" - # So, there is no advantage in looking at self._pc.connectionState - # That is why we are trying to keep our own state - def is_connected(self) -> bool: - """Check if the WebRTC connection is currently active. - - Returns: - True if the connection is active and receiving data. - """ - # If the small webrtc transport has never invoked to connect - # we are acting like if we are not connected - if not self._connect_invoked: - return False - - if self._last_received_time is None: - # if we have never received a message, it is probably because the client has not created a data channel - # so we are going to trust aiortc in this case - return self._pc.connectionState == "connected" - # Checks if the last received ping was within the last 3 seconds. - return (time.time() - self._last_received_time) < 3 - - def audio_input_track(self): - """Get the audio input track wrapper. - - Returns: - SmallWebRTCTrack wrapper for the audio track, or None if unavailable. - """ - if self._track_map.get(AUDIO_TRANSCEIVER_INDEX): - return self._track_map[AUDIO_TRANSCEIVER_INDEX] - - # Transceivers always appear in creation-order for both peers - # For support 3 receivers in the following order: - # audio, video, screenVideo - transceivers = self._pc.getTransceivers() - if len(transceivers) == 0 or not transceivers[AUDIO_TRANSCEIVER_INDEX].receiver: - logger.warning("No audio transceiver is available") - return None - - track = transceivers[AUDIO_TRANSCEIVER_INDEX].receiver.track - audio_track = SmallWebRTCTrack(track) if track else None - self._track_map[AUDIO_TRANSCEIVER_INDEX] = audio_track - return audio_track - - def video_input_track(self): - """Get the video input track wrapper. - - Returns: - SmallWebRTCTrack wrapper for the video track, or None if unavailable. - """ - if self._track_map.get(VIDEO_TRANSCEIVER_INDEX): - return self._track_map[VIDEO_TRANSCEIVER_INDEX] - - # Transceivers always appear in creation-order for both peers - # For support 3 receivers in the following order: - # audio, video, screenVideo - transceivers = self._pc.getTransceivers() - if len(transceivers) <= 1 or not transceivers[VIDEO_TRANSCEIVER_INDEX].receiver: - logger.warning("No video transceiver is available") - return None - - track = transceivers[VIDEO_TRANSCEIVER_INDEX].receiver.track - video_track = SmallWebRTCTrack(track) if track else None - self._track_map[VIDEO_TRANSCEIVER_INDEX] = video_track - return video_track - - def screen_video_input_track(self): - """Get the screen video input track wrapper. - - Returns: - SmallWebRTCTrack wrapper for the screen video track, or None if unavailable. - """ - if self._track_map.get(SCREEN_VIDEO_TRANSCEIVER_INDEX): - return self._track_map[SCREEN_VIDEO_TRANSCEIVER_INDEX] - - # Transceivers always appear in creation-order for both peers - # For support 3 receivers in the following order: - # audio, video, screenVideo - transceivers = self._pc.getTransceivers() - if len(transceivers) <= 2 or not transceivers[SCREEN_VIDEO_TRANSCEIVER_INDEX].receiver: - logger.warning("No screen video transceiver is available") - return None - - track = transceivers[SCREEN_VIDEO_TRANSCEIVER_INDEX].receiver.track - video_track = SmallWebRTCTrack(track) if track else None - self._track_map[SCREEN_VIDEO_TRANSCEIVER_INDEX] = video_track - return video_track - - def send_app_message(self, message: Any): - """Send an application message through the data channel. - - Args: - message: The message to send (will be JSON serialized). - """ - json_message = json.dumps(message) - if self._data_channel and self._data_channel.readyState == "open": - self._data_channel.send(json_message) - else: - logger.debug("Data channel not ready, queuing message") - self._message_queue.append(json_message) - - def ask_to_renegotiate(self): - """Request renegotiation of the WebRTC connection.""" - if self._renegotiation_in_progress: - return - - self._renegotiation_in_progress = True - self.send_app_message( - {"type": SIGNALLING_TYPE, "message": RenegotiateMessage().model_dump()} - ) - - def _handle_signalling_message(self, message): - """Handle incoming signaling messages.""" - logger.debug(f"Signalling message received: {message}") - inbound_adapter = TypeAdapter(SignallingMessage.Inbound) - signalling_message = inbound_adapter.validate_python(message) - match signalling_message: - case TrackStatusMessage(): - track = ( - self._track_getters.get(signalling_message.receiver_index) or (lambda: None) - )() - if track: - track.set_enabled(signalling_message.enabled) diff --git a/src/pipecat/transports/network/websocket_client.py b/src/pipecat/transports/network/websocket_client.py index d141b52f3..4ce22a68d 100644 --- a/src/pipecat/transports/network/websocket_client.py +++ b/src/pipecat/transports/network/websocket_client.py @@ -11,484 +11,15 @@ communication over WebSocket connections, with support for audio streaming, frame serialization, and connection management. """ -import asyncio -import io -import time -import wave -from typing import Awaitable, Callable, Optional - -import websockets -from loguru import logger -from pydantic.main import BaseModel -from websockets.asyncio.client import connect as websocket_connect - -from pipecat.frames.frames import ( - CancelFrame, - EndFrame, - Frame, - InputAudioRawFrame, - OutputAudioRawFrame, - StartFrame, - TransportMessageFrame, - TransportMessageUrgentFrame, -) -from pipecat.processors.frame_processor import FrameProcessorSetup -from pipecat.serializers.base_serializer import FrameSerializer -from pipecat.serializers.protobuf import ProtobufFrameSerializer -from pipecat.transports.base_input import BaseInputTransport -from pipecat.transports.base_output import BaseOutputTransport -from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.utils.asyncio.task_manager import BaseTaskManager - - -class WebsocketClientParams(TransportParams): - """Configuration parameters for WebSocket client transport. - - Parameters: - add_wav_header: Whether to add WAV headers to audio frames. - serializer: Frame serializer for encoding/decoding messages. - """ - - add_wav_header: bool = True - serializer: Optional[FrameSerializer] = None - - -class WebsocketClientCallbacks(BaseModel): - """Callback functions for WebSocket client events. - - Parameters: - on_connected: Called when WebSocket connection is established. - on_disconnected: Called when WebSocket connection is closed. - on_message: Called when a message is received from the WebSocket. - """ - - on_connected: Callable[[websockets.WebSocketClientProtocol], Awaitable[None]] - on_disconnected: Callable[[websockets.WebSocketClientProtocol], Awaitable[None]] - on_message: Callable[[websockets.WebSocketClientProtocol, websockets.Data], Awaitable[None]] - - -class WebsocketClientSession: - """Manages a WebSocket client connection session. - - Handles connection lifecycle, message sending/receiving, and provides - callback mechanisms for connection events. - """ - - def __init__( - self, - uri: str, - params: WebsocketClientParams, - callbacks: WebsocketClientCallbacks, - transport_name: str, - ): - """Initialize the WebSocket client session. - - Args: - uri: The WebSocket URI to connect to. - params: Configuration parameters for the session. - callbacks: Callback functions for session events. - transport_name: Name of the parent transport for logging. - """ - self._uri = uri - self._params = params - self._callbacks = callbacks - self._transport_name = transport_name - - self._leave_counter = 0 - self._task_manager: Optional[BaseTaskManager] = None - self._websocket: Optional[websockets.WebSocketClientProtocol] = None - - @property - def task_manager(self) -> BaseTaskManager: - """Get the task manager for this session. - - Returns: - The task manager instance. - - Raises: - Exception: If task manager is not initialized. - """ - if not self._task_manager: - raise Exception( - f"{self._transport_name}::WebsocketClientSession: TaskManager not initialized (pipeline not started?)" - ) - return self._task_manager - - async def setup(self, task_manager: BaseTaskManager): - """Set up the session with a task manager. - - Args: - task_manager: The task manager to use for session tasks. - """ - self._leave_counter += 1 - if not self._task_manager: - self._task_manager = task_manager - - async def connect(self): - """Connect to the WebSocket server.""" - if self._websocket: - return - - try: - self._websocket = await websocket_connect(uri=self._uri, open_timeout=10) - self._client_task = self.task_manager.create_task( - self._client_task_handler(), - f"{self._transport_name}::WebsocketClientSession::_client_task_handler", - ) - await self._callbacks.on_connected(self._websocket) - except TimeoutError: - logger.error(f"Timeout connecting to {self._uri}") - - async def disconnect(self): - """Disconnect from the WebSocket server.""" - self._leave_counter -= 1 - if not self._websocket or self._leave_counter > 0: - return - - await self.task_manager.cancel_task(self._client_task) - - await self._websocket.close() - self._websocket = None - - async def send(self, message: websockets.Data): - """Send a message through the WebSocket connection. - - Args: - message: The message data to send. - """ - try: - if self._websocket: - await self._websocket.send(message) - except Exception as e: - logger.error(f"{self} exception sending data: {e.__class__.__name__} ({e})") - - async def _client_task_handler(self): - """Handle incoming messages from the WebSocket connection.""" - try: - # Handle incoming messages - async for message in self._websocket: - await self._callbacks.on_message(self._websocket, message) - except Exception as e: - logger.error(f"{self} exception receiving data: {e.__class__.__name__} ({e})") - - await self._callbacks.on_disconnected(self._websocket) - - def __str__(self): - """String representation of the WebSocket client session.""" - return f"{self._transport_name}::WebsocketClientSession" - - -class WebsocketClientInputTransport(BaseInputTransport): - """WebSocket client input transport for receiving frames. - - Handles incoming WebSocket messages, deserializes them to frames, - and pushes them downstream in the processing pipeline. - """ - - def __init__( - self, - transport: BaseTransport, - session: WebsocketClientSession, - params: WebsocketClientParams, - ): - """Initialize the WebSocket client input transport. - - Args: - transport: The parent transport instance. - session: The WebSocket session to use for communication. - params: Configuration parameters for the transport. - """ - super().__init__(params) - - self._transport = transport - self._session = session - self._params = params - - # Whether we have seen a StartFrame already. - self._initialized = False - - async def setup(self, setup: FrameProcessorSetup): - """Set up the input transport with the frame processor setup. - - Args: - setup: The frame processor setup configuration. - """ - await super().setup(setup) - await self._session.setup(setup.task_manager) - - async def start(self, frame: StartFrame): - """Start the input transport and initialize the WebSocket connection. - - Args: - frame: The start frame containing initialization parameters. - """ - await super().start(frame) - - if self._initialized: - return - - self._initialized = True - - if self._params.serializer: - await self._params.serializer.setup(frame) - await self._session.connect() - await self.set_transport_ready(frame) - - async def stop(self, frame: EndFrame): - """Stop the input transport and disconnect from WebSocket. - - Args: - frame: The end frame signaling transport shutdown. - """ - await super().stop(frame) - await self._session.disconnect() - - async def cancel(self, frame: CancelFrame): - """Cancel the input transport and disconnect from WebSocket. - - Args: - frame: The cancel frame signaling immediate cancellation. - """ - await super().cancel(frame) - await self._session.disconnect() - - async def cleanup(self): - """Clean up the input transport resources.""" - await super().cleanup() - await self._transport.cleanup() - - async def on_message(self, websocket, message): - """Handle incoming WebSocket messages. - - Args: - websocket: The WebSocket connection that received the message. - message: The received message data. - """ - if not self._params.serializer: - return - frame = await self._params.serializer.deserialize(message) - if not frame: - return - if isinstance(frame, InputAudioRawFrame) and self._params.audio_in_enabled: - await self.push_audio_frame(frame) - else: - await self.push_frame(frame) - - -class WebsocketClientOutputTransport(BaseOutputTransport): - """WebSocket client output transport for sending frames. - - Handles outgoing frames, serializes them for WebSocket transmission, - and manages audio streaming with proper timing simulation. - """ - - def __init__( - self, - transport: BaseTransport, - session: WebsocketClientSession, - params: WebsocketClientParams, - ): - """Initialize the WebSocket client output transport. - - Args: - transport: The parent transport instance. - session: The WebSocket session to use for communication. - params: Configuration parameters for the transport. - """ - super().__init__(params) - - self._transport = transport - self._session = session - self._params = params - - # write_audio_frame() is called quickly, as soon as we get audio - # (e.g. from the TTS), and since this is just a network connection we - # would be sending it to quickly. Instead, we want to block to emulate - # an audio device, this is what the send interval is. It will be - # computed on StartFrame. - self._send_interval = 0 - self._next_send_time = 0 - - # Whether we have seen a StartFrame already. - self._initialized = False - - async def setup(self, setup: FrameProcessorSetup): - """Set up the output transport with the frame processor setup. - - Args: - setup: The frame processor setup configuration. - """ - await super().setup(setup) - await self._session.setup(setup.task_manager) - - async def start(self, frame: StartFrame): - """Start the output transport and initialize the WebSocket connection. - - Args: - frame: The start frame containing initialization parameters. - """ - await super().start(frame) - - if self._initialized: - return - - self._initialized = True - - self._send_interval = (self.audio_chunk_size / self.sample_rate) / 2 - if self._params.serializer: - await self._params.serializer.setup(frame) - await self._session.connect() - await self.set_transport_ready(frame) - - async def stop(self, frame: EndFrame): - """Stop the output transport and disconnect from WebSocket. - - Args: - frame: The end frame signaling transport shutdown. - """ - await super().stop(frame) - await self._session.disconnect() - - async def cancel(self, frame: CancelFrame): - """Cancel the output transport and disconnect from WebSocket. - - Args: - frame: The cancel frame signaling immediate cancellation. - """ - await super().cancel(frame) - await self._session.disconnect() - - async def cleanup(self): - """Clean up the output transport resources.""" - await super().cleanup() - await self._transport.cleanup() - - async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame): - """Send a transport message through the WebSocket. - - Args: - frame: The transport message frame to send. - """ - await self._write_frame(frame) - - async def write_audio_frame(self, frame: OutputAudioRawFrame): - """Write an audio frame to the WebSocket with optional WAV header. - - Args: - frame: The output audio frame to write. - """ - frame = OutputAudioRawFrame( - audio=frame.audio, - sample_rate=self.sample_rate, - num_channels=self._params.audio_out_channels, - ) - - if self._params.add_wav_header: - with io.BytesIO() as buffer: - with wave.open(buffer, "wb") as wf: - wf.setsampwidth(2) - wf.setnchannels(frame.num_channels) - wf.setframerate(frame.sample_rate) - wf.writeframes(frame.audio) - wav_frame = OutputAudioRawFrame( - buffer.getvalue(), - sample_rate=frame.sample_rate, - num_channels=frame.num_channels, - ) - frame = wav_frame - - await self._write_frame(frame) - - # Simulate audio playback with a sleep. - await self._write_audio_sleep() - - async def _write_frame(self, frame: Frame): - """Write a frame to the WebSocket after serialization.""" - if not self._params.serializer: - return - payload = await self._params.serializer.serialize(frame) - if payload: - await self._session.send(payload) - - async def _write_audio_sleep(self): - """Simulate audio playback timing with sleep delays.""" - # Simulate a clock. - current_time = time.monotonic() - sleep_duration = max(0, self._next_send_time - current_time) - await asyncio.sleep(sleep_duration) - if sleep_duration == 0: - self._next_send_time = time.monotonic() + self._send_interval - else: - self._next_send_time += self._send_interval - - -class WebsocketClientTransport(BaseTransport): - """WebSocket client transport for bidirectional communication. - - Provides a complete WebSocket client transport implementation with - input and output capabilities, connection management, and event handling. - """ - - def __init__( - self, - uri: str, - params: Optional[WebsocketClientParams] = None, - ): - """Initialize the WebSocket client transport. - - Args: - uri: The WebSocket URI to connect to. - params: Optional configuration parameters for the transport. - """ - super().__init__() - - self._params = params or WebsocketClientParams() - self._params.serializer = self._params.serializer or ProtobufFrameSerializer() - - callbacks = WebsocketClientCallbacks( - on_connected=self._on_connected, - on_disconnected=self._on_disconnected, - on_message=self._on_message, - ) - - self._session = WebsocketClientSession(uri, self._params, callbacks, self.name) - self._input: Optional[WebsocketClientInputTransport] = None - self._output: Optional[WebsocketClientOutputTransport] = None - - # Register supported handlers. The user will only be able to register - # these handlers. - self._register_event_handler("on_connected") - self._register_event_handler("on_disconnected") - - def input(self) -> WebsocketClientInputTransport: - """Get the input transport for receiving frames. - - Returns: - The WebSocket client input transport instance. - """ - if not self._input: - self._input = WebsocketClientInputTransport(self, self._session, self._params) - return self._input - - def output(self) -> WebsocketClientOutputTransport: - """Get the output transport for sending frames. - - Returns: - The WebSocket client output transport instance. - """ - if not self._output: - self._output = WebsocketClientOutputTransport(self, self._session, self._params) - return self._output - - async def _on_connected(self, websocket): - """Handle WebSocket connection established event.""" - await self._call_event_handler("on_connected", websocket) - - async def _on_disconnected(self, websocket): - """Handle WebSocket connection closed event.""" - await self._call_event_handler("on_disconnected", websocket) - - async def _on_message(self, websocket, message): - """Handle incoming WebSocket message.""" - if self._input: - await self._input.on_message(websocket, message) +import warnings + +from pipecat.transports.websocket.client import * + +with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "Module `pipecat.transports.network.websocket_client` is deprecated, " + "use `pipecat.transports.websocket.client` instead.", + DeprecationWarning, + stacklevel=2, + ) diff --git a/src/pipecat/transports/network/websocket_server.py b/src/pipecat/transports/network/websocket_server.py index 8e73fb47e..02ffda3e5 100644 --- a/src/pipecat/transports/network/websocket_server.py +++ b/src/pipecat/transports/network/websocket_server.py @@ -11,490 +11,15 @@ audio and data streaming, including client connection management, session handling, and frame serialization. """ -import asyncio -import io -import time -import wave -from typing import Awaitable, Callable, Optional - -from loguru import logger -from pydantic import BaseModel - -from pipecat.frames.frames import ( - CancelFrame, - EndFrame, - Frame, - InputAudioRawFrame, - OutputAudioRawFrame, - StartFrame, - StartInterruptionFrame, - TransportMessageFrame, - TransportMessageUrgentFrame, -) -from pipecat.processors.frame_processor import FrameDirection -from pipecat.serializers.base_serializer import FrameSerializer -from pipecat.transports.base_input import BaseInputTransport -from pipecat.transports.base_output import BaseOutputTransport -from pipecat.transports.base_transport import BaseTransport, TransportParams - -try: - import websockets - from websockets.asyncio.server import serve as websocket_serve - from websockets.protocol import State -except ModuleNotFoundError as e: - logger.error(f"Exception: {e}") - logger.error("In order to use websockets, you need to `pip install pipecat-ai[websocket]`.") - raise Exception(f"Missing module: {e}") - - -class WebsocketServerParams(TransportParams): - """Configuration parameters for WebSocket server transport. - - Parameters: - add_wav_header: Whether to add WAV headers to audio frames. - serializer: Frame serializer for message encoding/decoding. - session_timeout: Timeout in seconds for client sessions. - """ - - add_wav_header: bool = False - serializer: Optional[FrameSerializer] = None - session_timeout: Optional[int] = None - - -class WebsocketServerCallbacks(BaseModel): - """Callback functions for WebSocket server events. - - Parameters: - on_client_connected: Called when a client connects to the server. - on_client_disconnected: Called when a client disconnects from the server. - on_session_timeout: Called when a client session times out. - on_websocket_ready: Called when the WebSocket server is ready to accept connections. - """ - - on_client_connected: Callable[[websockets.WebSocketServerProtocol], Awaitable[None]] - on_client_disconnected: Callable[[websockets.WebSocketServerProtocol], Awaitable[None]] - on_session_timeout: Callable[[websockets.WebSocketServerProtocol], Awaitable[None]] - on_websocket_ready: Callable[[], Awaitable[None]] - - -class WebsocketServerInputTransport(BaseInputTransport): - """WebSocket server input transport for receiving client data. - - Handles incoming WebSocket connections, message processing, and client - session management including timeout monitoring and connection lifecycle. - """ - - def __init__( - self, - transport: BaseTransport, - host: str, - port: int, - params: WebsocketServerParams, - callbacks: WebsocketServerCallbacks, - **kwargs, - ): - """Initialize the WebSocket server input transport. - - Args: - transport: The parent transport instance. - host: Host address to bind the WebSocket server to. - port: Port number to bind the WebSocket server to. - params: WebSocket server configuration parameters. - callbacks: Callback functions for WebSocket events. - **kwargs: Additional arguments passed to parent class. - """ - super().__init__(params, **kwargs) - - self._transport = transport - self._host = host - self._port = port - self._params = params - self._callbacks = callbacks - - self._websocket: Optional[websockets.WebSocketServerProtocol] = None - - self._server_task = None - - # This task will monitor the websocket connection periodically. - self._monitor_task = None - - self._stop_server_event = asyncio.Event() - - # Whether we have seen a StartFrame already. - self._initialized = False - - async def start(self, frame: StartFrame): - """Start the WebSocket server and initialize components. - - Args: - frame: The start frame containing initialization parameters. - """ - await super().start(frame) - - if self._initialized: - return - - self._initialized = True - - if self._params.serializer: - await self._params.serializer.setup(frame) - if not self._server_task: - self._server_task = self.create_task(self._server_task_handler()) - await self.set_transport_ready(frame) - - async def stop(self, frame: EndFrame): - """Stop the WebSocket server and cleanup resources. - - Args: - frame: The end frame signaling transport shutdown. - """ - await super().stop(frame) - self._stop_server_event.set() - if self._monitor_task: - await self.cancel_task(self._monitor_task) - self._monitor_task = None - if self._server_task: - await self._server_task - self._server_task = None - - async def cancel(self, frame: CancelFrame): - """Cancel the WebSocket server and stop all processing. - - Args: - frame: The cancel frame signaling immediate cancellation. - """ - await super().cancel(frame) - if self._monitor_task: - await self.cancel_task(self._monitor_task) - self._monitor_task = None - if self._server_task: - await self.cancel_task(self._server_task) - self._server_task = None - - async def cleanup(self): - """Cleanup resources and parent transport.""" - await super().cleanup() - await self._transport.cleanup() - - async def _server_task_handler(self): - """Handle WebSocket server startup and client connections.""" - logger.info(f"Starting websocket server on {self._host}:{self._port}") - async with websocket_serve(self._client_handler, self._host, self._port) as server: - await self._callbacks.on_websocket_ready() - await self._stop_server_event.wait() - - async def _client_handler(self, websocket: websockets.WebSocketServerProtocol): - """Handle individual client connections and message processing.""" - logger.info(f"New client connection from {websocket.remote_address}") - if self._websocket: - await self._websocket.close() - logger.warning("Only one client connected, using new connection") - - self._websocket = websocket - - # Notify - await self._callbacks.on_client_connected(websocket) - - # Create a task to monitor the websocket connection - if not self._monitor_task and self._params.session_timeout: - self._monitor_task = self.create_task( - self._monitor_websocket(websocket, self._params.session_timeout) - ) - - # Handle incoming messages - try: - async for message in websocket: - if not self._params.serializer: - continue - - frame = await self._params.serializer.deserialize(message) - - if not frame: - continue - - if isinstance(frame, InputAudioRawFrame): - await self.push_audio_frame(frame) - else: - await self.push_frame(frame) - except Exception as e: - logger.error(f"{self} exception receiving data: {e.__class__.__name__} ({e})") - - # Notify disconnection - await self._callbacks.on_client_disconnected(websocket) - - await self._websocket.close() - self._websocket = None - - logger.info(f"Client {websocket.remote_address} disconnected") - - async def _monitor_websocket( - self, websocket: websockets.WebSocketServerProtocol, session_timeout: int - ): - """Monitor WebSocket connection for session timeout.""" - try: - await asyncio.sleep(session_timeout) - if websocket.state is not State.CLOSED: - await self._callbacks.on_session_timeout(websocket) - except asyncio.CancelledError: - logger.info(f"Monitoring task cancelled for: {websocket.remote_address}") - raise - - -class WebsocketServerOutputTransport(BaseOutputTransport): - """WebSocket server output transport for sending data to clients. - - Handles outgoing frame serialization, audio streaming with timing control, - and client connection management for WebSocket communication. - """ - - def __init__(self, transport: BaseTransport, params: WebsocketServerParams, **kwargs): - """Initialize the WebSocket server output transport. - - Args: - transport: The parent transport instance. - params: WebSocket server configuration parameters. - **kwargs: Additional arguments passed to parent class. - """ - super().__init__(params, **kwargs) - - self._transport = transport - self._params = params - - self._websocket: Optional[websockets.WebSocketServerProtocol] = None - - # write_audio_frame() is called quickly, as soon as we get audio - # (e.g. from the TTS), and since this is just a network connection we - # would be sending it to quickly. Instead, we want to block to emulate - # an audio device, this is what the send interval is. It will be - # computed on StartFrame. - self._send_interval = 0 - self._next_send_time = 0 - - # Whether we have seen a StartFrame already. - self._initialized = False - - async def set_client_connection(self, websocket: Optional[websockets.WebSocketServerProtocol]): - """Set the active client WebSocket connection. - - Args: - websocket: The WebSocket connection to set as active, or None to clear. - """ - if self._websocket: - await self._websocket.close() - logger.warning("Only one client allowed, using new connection") - self._websocket = websocket - - async def start(self, frame: StartFrame): - """Start the output transport and initialize components. - - Args: - frame: The start frame containing initialization parameters. - """ - await super().start(frame) - - if self._initialized: - return - - self._initialized = True - - if self._params.serializer: - await self._params.serializer.setup(frame) - self._send_interval = (self.audio_chunk_size / self.sample_rate) / 2 - await self.set_transport_ready(frame) - - async def stop(self, frame: EndFrame): - """Stop the output transport and send final frame. - - Args: - frame: The end frame signaling transport shutdown. - """ - await super().stop(frame) - await self._write_frame(frame) - - async def cancel(self, frame: CancelFrame): - """Cancel the output transport and send cancellation frame. - - Args: - frame: The cancel frame signaling immediate cancellation. - """ - await super().cancel(frame) - await self._write_frame(frame) - - async def cleanup(self): - """Cleanup resources and parent transport.""" - await super().cleanup() - await self._transport.cleanup() - - async def process_frame(self, frame: Frame, direction: FrameDirection): - """Process frames and handle interruption timing. - - Args: - frame: The frame to process. - direction: The direction of frame flow in the pipeline. - """ - await super().process_frame(frame, direction) - - if isinstance(frame, StartInterruptionFrame): - await self._write_frame(frame) - self._next_send_time = 0 - - async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame): - """Send a transport message frame to the client. - - Args: - frame: The transport message frame to send. - """ - await self._write_frame(frame) - - async def write_audio_frame(self, frame: OutputAudioRawFrame): - """Write an audio frame to the WebSocket client with timing control. - - Args: - frame: The output audio frame to write. - """ - if not self._websocket: - return - - frame = OutputAudioRawFrame( - audio=frame.audio, - sample_rate=self.sample_rate, - num_channels=self._params.audio_out_channels, - ) - - if self._params.add_wav_header: - with io.BytesIO() as buffer: - with wave.open(buffer, "wb") as wf: - wf.setsampwidth(2) - wf.setnchannels(frame.num_channels) - wf.setframerate(frame.sample_rate) - wf.writeframes(frame.audio) - wav_frame = OutputAudioRawFrame( - buffer.getvalue(), - sample_rate=frame.sample_rate, - num_channels=frame.num_channels, - ) - frame = wav_frame - - await self._write_frame(frame) - - # Simulate audio playback with a sleep. - await self._write_audio_sleep() - - async def _write_frame(self, frame: Frame): - """Serialize and send a frame to the WebSocket client.""" - if not self._params.serializer: - return - - try: - payload = await self._params.serializer.serialize(frame) - if payload and self._websocket: - await self._websocket.send(payload) - except Exception as e: - logger.error(f"{self} exception sending data: {e.__class__.__name__} ({e})") - - async def _write_audio_sleep(self): - """Simulate audio device timing by sleeping between audio chunks.""" - # Simulate a clock. - current_time = time.monotonic() - sleep_duration = max(0, self._next_send_time - current_time) - await asyncio.sleep(sleep_duration) - if sleep_duration == 0: - self._next_send_time = time.monotonic() + self._send_interval - else: - self._next_send_time += self._send_interval - - -class WebsocketServerTransport(BaseTransport): - """WebSocket server transport for bidirectional real-time communication. - - Provides a complete WebSocket server implementation with separate input and - output transports, client connection management, and event handling for - real-time audio and data streaming applications. - """ - - def __init__( - self, - params: WebsocketServerParams, - host: str = "localhost", - port: int = 8765, - input_name: Optional[str] = None, - output_name: Optional[str] = None, - ): - """Initialize the WebSocket server transport. - - Args: - params: WebSocket server configuration parameters. - host: Host address to bind the server to. Defaults to "localhost". - port: Port number to bind the server to. Defaults to 8765. - input_name: Optional name for the input processor. - output_name: Optional name for the output processor. - """ - super().__init__(input_name=input_name, output_name=output_name) - self._host = host - self._port = port - self._params = params - - self._callbacks = WebsocketServerCallbacks( - on_client_connected=self._on_client_connected, - on_client_disconnected=self._on_client_disconnected, - on_session_timeout=self._on_session_timeout, - on_websocket_ready=self._on_websocket_ready, - ) - self._input: Optional[WebsocketServerInputTransport] = None - self._output: Optional[WebsocketServerOutputTransport] = None - self._websocket: Optional[websockets.WebSocketServerProtocol] = None - - # Register supported handlers. The user will only be able to register - # these handlers. - self._register_event_handler("on_client_connected") - self._register_event_handler("on_client_disconnected") - self._register_event_handler("on_session_timeout") - self._register_event_handler("on_websocket_ready") - - def input(self) -> WebsocketServerInputTransport: - """Get the input transport for receiving client data. - - Returns: - The WebSocket server input transport instance. - """ - if not self._input: - self._input = WebsocketServerInputTransport( - self, self._host, self._port, self._params, self._callbacks, name=self._input_name - ) - return self._input - - def output(self) -> WebsocketServerOutputTransport: - """Get the output transport for sending data to clients. - - Returns: - The WebSocket server output transport instance. - """ - if not self._output: - self._output = WebsocketServerOutputTransport( - self, self._params, name=self._output_name - ) - return self._output - - async def _on_client_connected(self, websocket): - """Handle client connection events.""" - if self._output: - await self._output.set_client_connection(websocket) - await self._call_event_handler("on_client_connected", websocket) - else: - logger.error("A WebsocketServerTransport output is missing in the pipeline") - - async def _on_client_disconnected(self, websocket): - """Handle client disconnection events.""" - if self._output: - await self._output.set_client_connection(None) - await self._call_event_handler("on_client_disconnected", websocket) - else: - logger.error("A WebsocketServerTransport output is missing in the pipeline") - - async def _on_session_timeout(self, websocket): - """Handle client session timeout events.""" - await self._call_event_handler("on_session_timeout", websocket) - - async def _on_websocket_ready(self): - """Handle WebSocket server ready events.""" - await self._call_event_handler("on_websocket_ready") +import warnings + +from pipecat.transports.websocket.server import * + +with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "Module `pipecat.transports.network.websocket_server` is deprecated, " + "use `pipecat.transports.websocket.server` instead.", + DeprecationWarning, + stacklevel=2, + ) diff --git a/src/pipecat/transports/services/daily.py b/src/pipecat/transports/services/daily.py index cfc7998ef..02fdb2930 100644 --- a/src/pipecat/transports/services/daily.py +++ b/src/pipecat/transports/services/daily.py @@ -11,2328 +11,15 @@ audio/video streaming, transcription, recording, dial-in/out functionality, and real-time communication features. """ -import asyncio -import time -from concurrent.futures import CancelledError as FuturesCancelledError -from concurrent.futures import ThreadPoolExecutor -from dataclasses import dataclass -from typing import Any, Awaitable, Callable, Dict, Mapping, Optional +import warnings -import aiohttp -from loguru import logger -from pydantic import BaseModel +from pipecat.transports.daily.transport import * -from pipecat.audio.vad.vad_analyzer import VADAnalyzer, VADParams -from pipecat.frames.frames import ( - CancelFrame, - EndFrame, - ErrorFrame, - Frame, - InputAudioRawFrame, - InterimTranscriptionFrame, - OutputAudioRawFrame, - OutputImageRawFrame, - SpriteFrame, - StartFrame, - TranscriptionFrame, - TransportMessageFrame, - TransportMessageUrgentFrame, - UserAudioRawFrame, - UserImageRawFrame, - UserImageRequestFrame, -) -from pipecat.processors.frame_processor import FrameDirection, FrameProcessorSetup -from pipecat.transcriptions.language import Language -from pipecat.transports.base_input import BaseInputTransport -from pipecat.transports.base_output import BaseOutputTransport -from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.utils.asyncio.task_manager import BaseTaskManager - -try: - from daily import ( - AudioData, - CallClient, - CustomAudioSource, - CustomAudioTrack, - Daily, - EventHandler, - VideoFrame, - VirtualCameraDevice, - VirtualSpeakerDevice, +with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "Module `pipecat.transports.services.daily` is deprecated, " + "use `pipecat.transports.daily.transport` instead.", + DeprecationWarning, + stacklevel=2, ) - from daily import ( - LogLevel as DailyLogLevel, - ) -except ModuleNotFoundError as e: - logger.error(f"Exception: {e}") - logger.error( - "In order to use the Daily transport, you need to `pip install pipecat-ai[daily]`." - ) - raise Exception(f"Missing module: {e}") - -VAD_RESET_PERIOD_MS = 2000 - - -@dataclass -class DailyTransportMessageFrame(TransportMessageFrame): - """Frame for transport messages in Daily calls. - - Parameters: - participant_id: Optional ID of the participant this message is for/from. - """ - - participant_id: Optional[str] = None - - -@dataclass -class DailyTransportMessageUrgentFrame(TransportMessageUrgentFrame): - """Frame for urgent transport messages in Daily calls. - - Parameters: - participant_id: Optional ID of the participant this message is for/from. - """ - - participant_id: Optional[str] = None - - -class WebRTCVADAnalyzer(VADAnalyzer): - """Voice Activity Detection analyzer using WebRTC. - - Implements voice activity detection using Daily's native WebRTC VAD. - """ - - def __init__(self, *, sample_rate: Optional[int] = None, params: Optional[VADParams] = None): - """Initialize the WebRTC VAD analyzer. - - Args: - sample_rate: Audio sample rate in Hz. - params: VAD configuration parameters. - """ - super().__init__(sample_rate=sample_rate, params=params) - - self._webrtc_vad = Daily.create_native_vad( - reset_period_ms=VAD_RESET_PERIOD_MS, sample_rate=self.sample_rate, channels=1 - ) - logger.debug("Loaded native WebRTC VAD") - - def num_frames_required(self) -> int: - """Get the number of audio frames required for VAD analysis. - - Returns: - The number of frames needed (equivalent to 10ms of audio). - """ - return int(self.sample_rate / 100.0) - - def voice_confidence(self, buffer) -> float: - """Analyze audio buffer and return voice confidence score. - - Args: - buffer: Audio buffer to analyze. - - Returns: - Voice confidence score between 0.0 and 1.0. - """ - confidence = 0 - if len(buffer) > 0: - confidence = self._webrtc_vad.analyze_frames(buffer) - return confidence - - -class DailyDialinSettings(BaseModel): - """Settings for Daily's dial-in functionality. - - Parameters: - call_id: CallId is represented by UUID and represents the sessionId in the SIP Network. - call_domain: Call Domain is represented by UUID and represents your Daily Domain on the SIP Network. - """ - - call_id: str = "" - call_domain: str = "" - - -class DailyTranscriptionSettings(BaseModel): - """Configuration settings for Daily's transcription service. - - Parameters: - language: ISO language code for transcription (e.g. "en"). - model: Transcription model to use (e.g. "nova-2-general"). - profanity_filter: Whether to filter profanity from transcripts. - redact: Whether to redact sensitive information. - endpointing: Whether to use endpointing to determine speech segments. - punctuate: Whether to add punctuation to transcripts. - includeRawResponse: Whether to include raw response data. - extra: Additional parameters passed to the Deepgram transcription service. - """ - - language: str = "en" - model: str = "nova-2-general" - profanity_filter: bool = True - redact: bool = False - endpointing: bool = True - punctuate: bool = True - includeRawResponse: bool = True - extra: Mapping[str, Any] = {"interim_results": True} - - -class DailyParams(TransportParams): - """Configuration parameters for Daily transport. - - Parameters: - api_url: Daily API base URL. - api_key: Daily API authentication key. - audio_in_user_tracks: Receive users' audio in separate tracks - dialin_settings: Optional settings for dial-in functionality. - camera_out_enabled: Whether to enable the main camera output track. - microphone_out_enabled: Whether to enable the main microphone track. - transcription_enabled: Whether to enable speech transcription. - transcription_settings: Configuration for transcription service. - """ - - api_url: str = "https://api.daily.co/v1" - api_key: str = "" - audio_in_user_tracks: bool = True - dialin_settings: Optional[DailyDialinSettings] = None - camera_out_enabled: bool = True - microphone_out_enabled: bool = True - transcription_enabled: bool = False - transcription_settings: DailyTranscriptionSettings = DailyTranscriptionSettings() - - -class DailyCallbacks(BaseModel): - """Callback handlers for Daily events. - - Parameters: - on_active_speaker_changed: Called when the active speaker of the call has changed. - on_joined: Called when bot successfully joined a room. - on_left: Called when bot left a room. - on_error: Called when an error occurs. - on_app_message: Called when receiving an app message. - on_call_state_updated: Called when call state changes. - on_client_connected: Called when a client (participant) connects. - on_client_disconnected: Called when a client (participant) disconnects. - on_dialin_connected: Called when dial-in is connected. - on_dialin_ready: Called when dial-in is ready. - on_dialin_stopped: Called when dial-in is stopped. - on_dialin_error: Called when dial-in encounters an error. - on_dialin_warning: Called when dial-in has a warning. - on_dialout_answered: Called when dial-out is answered. - on_dialout_connected: Called when dial-out is connected. - on_dialout_stopped: Called when dial-out is stopped. - on_dialout_error: Called when dial-out encounters an error. - on_dialout_warning: Called when dial-out has a warning. - on_participant_joined: Called when a participant joins. - on_participant_left: Called when a participant leaves. - on_participant_updated: Called when participant info is updated. - on_transcription_message: Called when receiving transcription. - on_transcription_stopped: Called when transcription is stopped. - on_transcription_error: Called when transcription encounters an error. - on_recording_started: Called when recording starts. - on_recording_stopped: Called when recording stops. - on_recording_error: Called when recording encounters an error. - """ - - on_active_speaker_changed: Callable[[Mapping[str, Any]], Awaitable[None]] - on_joined: Callable[[Mapping[str, Any]], Awaitable[None]] - on_left: Callable[[], Awaitable[None]] - on_error: Callable[[str], Awaitable[None]] - on_app_message: Callable[[Any, str], Awaitable[None]] - on_call_state_updated: Callable[[str], Awaitable[None]] - on_client_connected: Callable[[Mapping[str, Any]], Awaitable[None]] - on_client_disconnected: Callable[[Mapping[str, Any]], Awaitable[None]] - on_dialin_connected: Callable[[Any], Awaitable[None]] - on_dialin_ready: Callable[[str], Awaitable[None]] - on_dialin_stopped: Callable[[Any], Awaitable[None]] - on_dialin_error: Callable[[Any], Awaitable[None]] - on_dialin_warning: Callable[[Any], Awaitable[None]] - on_dialout_answered: Callable[[Any], Awaitable[None]] - on_dialout_connected: Callable[[Any], Awaitable[None]] - on_dialout_stopped: Callable[[Any], Awaitable[None]] - on_dialout_error: Callable[[Any], Awaitable[None]] - on_dialout_warning: Callable[[Any], Awaitable[None]] - on_participant_joined: Callable[[Mapping[str, Any]], Awaitable[None]] - on_participant_left: Callable[[Mapping[str, Any], str], Awaitable[None]] - on_participant_updated: Callable[[Mapping[str, Any]], Awaitable[None]] - on_transcription_message: Callable[[Mapping[str, Any]], Awaitable[None]] - on_transcription_stopped: Callable[[str, bool], Awaitable[None]] - on_transcription_error: Callable[[str], Awaitable[None]] - on_recording_started: Callable[[Mapping[str, Any]], Awaitable[None]] - on_recording_stopped: Callable[[str], Awaitable[None]] - on_recording_error: Callable[[str, str], Awaitable[None]] - - -def completion_callback(future): - """Create a completion callback for Daily API calls. - - Args: - future: The asyncio Future to set the result on. - - Returns: - A callback function that sets the future result. - """ - - def _callback(*args): - def set_result(future, *args): - try: - if len(args) > 1: - future.set_result(args) - else: - future.set_result(*args) - except asyncio.InvalidStateError: - pass - - future.get_loop().call_soon_threadsafe(set_result, future, *args) - - return _callback - - -@dataclass -class DailyAudioTrack: - """Container for Daily audio track components. - - Parameters: - source: The custom audio source for the track. - track: The custom audio track instance. - """ - - source: CustomAudioSource - track: CustomAudioTrack - - -class DailyTransportClient(EventHandler): - """Core client for interacting with Daily's API. - - Manages the connection to Daily rooms and handles all low-level API interactions - including room management, media streaming, transcription, and event handling. - """ - - _daily_initialized: bool = False - - def __new__(cls, *args, **kwargs): - """Override EventHandler's __new__ method to ensure Daily is initialized only once.""" - return super().__new__(cls) - - def __init__( - self, - room_url: str, - token: Optional[str], - bot_name: str, - params: DailyParams, - callbacks: DailyCallbacks, - transport_name: str, - ): - """Initialize the Daily transport client. - - Args: - room_url: URL of the Daily room to connect to. - token: Optional authentication token for the room. - bot_name: Display name for the bot in the call. - params: Configuration parameters for the transport. - callbacks: Event callback handlers. - transport_name: Name identifier for the transport. - """ - super().__init__() - - if not DailyTransportClient._daily_initialized: - DailyTransportClient._daily_initialized = True - Daily.init() - - self._room_url: str = room_url - self._token: Optional[str] = token - self._bot_name: str = bot_name - self._params: DailyParams = params - self._callbacks = callbacks - self._transport_name = transport_name - - self._participant_id: str = "" - self._audio_renderers = {} - self._video_renderers = {} - self._transcription_ids = [] - self._transcription_status = None - self._dial_out_session_id: str = "" - - self._joining = False - self._joined = False - self._joined_event = asyncio.Event() - self._leave_counter = 0 - - self._task_manager: Optional[BaseTaskManager] = None - - # We use the executor to cleanup the client. We just do it from one - # place, so only one thread is really needed. - self._executor = ThreadPoolExecutor(max_workers=1) - - self._client: CallClient = CallClient(event_handler=self) - - # We use separate tasks to execute callbacks (events, audio or - # video). In the case of events, if we call a `CallClient` function - # inside the callback and wait for its completion this will result in a - # deadlock (because we haven't exited the event callback). The deadlocks - # occur because `daily-python` is holding the GIL when calling the - # callbacks. So, if our callback handler makes a `CallClient` call and - # waits for it to finish using completions (and a future) we will - # deadlock because completions use event handlers (which are holding the - # GIL). - self._event_task = None - self._audio_task = None - self._video_task = None - - # Input and ouput sample rates. They will be initialize on setup(). - self._in_sample_rate = 0 - self._out_sample_rate = 0 - - self._camera: Optional[VirtualCameraDevice] = None - self._speaker: Optional[VirtualSpeakerDevice] = None - self._microphone_track: Optional[DailyAudioTrack] = None - self._custom_audio_tracks: Dict[str, DailyAudioTrack] = {} - - def _camera_name(self): - """Generate a unique virtual camera name for this client instance.""" - return f"camera-{self}" - - def _speaker_name(self): - """Generate a unique virtual speaker name for this client instance.""" - return f"speaker-{self}" - - @property - def room_url(self) -> str: - """Get the Daily room URL. - - Returns: - The room URL this client is connected to. - """ - return self._room_url - - @property - def participant_id(self) -> str: - """Get the participant ID for this client. - - Returns: - The participant ID assigned by Daily. - """ - return self._participant_id - - @property - def in_sample_rate(self) -> int: - """Get the input audio sample rate. - - Returns: - The input sample rate in Hz. - """ - return self._in_sample_rate - - @property - def out_sample_rate(self) -> int: - """Get the output audio sample rate. - - Returns: - The output sample rate in Hz. - """ - return self._out_sample_rate - - async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame): - """Send an application message to participants. - - Args: - frame: The message frame to send. - """ - if not self._joined: - return - - participant_id = None - if isinstance(frame, (DailyTransportMessageFrame, DailyTransportMessageUrgentFrame)): - participant_id = frame.participant_id - - future = self._get_event_loop().create_future() - self._client.send_app_message( - frame.message, participant_id, completion=completion_callback(future) - ) - await future - - async def read_next_audio_frame(self) -> Optional[InputAudioRawFrame]: - """Reads the next 20ms audio frame from the virtual speaker.""" - if not self._speaker: - return None - - sample_rate = self._in_sample_rate - num_channels = self._params.audio_in_channels - num_frames = int(sample_rate / 100) * 2 # 20ms of audio - - future = self._get_event_loop().create_future() - self._speaker.read_frames(num_frames, completion=completion_callback(future)) - audio = await future - - if len(audio) > 0: - return InputAudioRawFrame( - audio=audio, sample_rate=sample_rate, num_channels=num_channels - ) - else: - # If we don't read any audio it could be there's no participant - # connected. daily-python will return immediately if that's the - # case, so let's sleep for a little bit (i.e. busy wait). - await asyncio.sleep(0.01) - return None - - async def register_audio_destination(self, destination: str): - """Register a custom audio destination for multi-track output. - - Args: - destination: The destination identifier to register. - """ - self._custom_audio_tracks[destination] = await self.add_custom_audio_track(destination) - self._client.update_publishing({"customAudio": {destination: True}}) - - async def write_audio_frame(self, frame: OutputAudioRawFrame): - """Write an audio frame to the appropriate audio track. - - Args: - frame: The audio frame to write. - """ - future = self._get_event_loop().create_future() - - destination = frame.transport_destination - audio_source: Optional[CustomAudioSource] = None - if not destination and self._microphone_track: - audio_source = self._microphone_track.source - elif destination and destination in self._custom_audio_tracks: - track = self._custom_audio_tracks[destination] - audio_source = track.source - - if audio_source: - audio_source.write_frames(frame.audio, completion=completion_callback(future)) - else: - logger.warning(f"{self} unable to write audio frames to destination [{destination}]") - future.set_result(None) - - await future - - async def write_video_frame(self, frame: OutputImageRawFrame): - """Write a video frame to the camera device. - - Args: - frame: The image frame to write. - """ - if not frame.transport_destination and self._camera: - self._camera.write_frame(frame.image) - - async def setup(self, setup: FrameProcessorSetup): - """Setup the client with task manager and event queues. - - Args: - setup: The frame processor setup configuration. - """ - if self._task_manager: - return - - self._task_manager = setup.task_manager - - self._event_queue = asyncio.Queue() - self._event_task = self._task_manager.create_task( - self._callback_task_handler(self._event_queue), - f"{self}::event_callback_task", - ) - - async def cleanup(self): - """Cleanup client resources and cancel tasks.""" - if self._event_task and self._task_manager: - await self._task_manager.cancel_task(self._event_task) - self._event_task = None - if self._audio_task and self._task_manager: - await self._task_manager.cancel_task(self._audio_task) - self._audio_task = None - if self._video_task and self._task_manager: - await self._task_manager.cancel_task(self._video_task) - self._video_task = None - # Make sure we don't block the event loop in case `client.release()` - # takes extra time. - await self._get_event_loop().run_in_executor(self._executor, self._cleanup) - - async def start(self, frame: StartFrame): - """Start the client and initialize audio/video components. - - Args: - frame: The start frame containing initialization parameters. - """ - self._in_sample_rate = self._params.audio_in_sample_rate or frame.audio_in_sample_rate - self._out_sample_rate = self._params.audio_out_sample_rate or frame.audio_out_sample_rate - - if self._params.audio_in_enabled: - if self._params.audio_in_user_tracks and not self._audio_task and self._task_manager: - self._audio_queue = asyncio.Queue() - self._audio_task = self._task_manager.create_task( - self._callback_task_handler(self._audio_queue), - f"{self}::audio_callback_task", - ) - elif not self._speaker: - self._speaker = Daily.create_speaker_device( - self._speaker_name(), - sample_rate=self._in_sample_rate, - channels=self._params.audio_in_channels, - non_blocking=True, - ) - Daily.select_speaker_device(self._speaker_name()) - - if self._params.video_in_enabled and not self._video_task and self._task_manager: - self._video_queue = asyncio.Queue() - self._video_task = self._task_manager.create_task( - self._callback_task_handler(self._video_queue), - f"{self}::video_callback_task", - ) - if self._params.video_out_enabled and not self._camera: - self._camera = Daily.create_camera_device( - self._camera_name(), - width=self._params.video_out_width, - height=self._params.video_out_height, - color_format=self._params.video_out_color_format, - ) - - if self._params.audio_out_enabled and not self._microphone_track: - audio_source = CustomAudioSource(self._out_sample_rate, self._params.audio_out_channels) - audio_track = CustomAudioTrack(audio_source) - self._microphone_track = DailyAudioTrack(source=audio_source, track=audio_track) - - async def join(self): - """Join the Daily room with configured settings.""" - # Transport already joined or joining, ignore. - if self._joined or self._joining: - # Increment leave counter if we already joined. - self._leave_counter += 1 - return - - logger.info(f"Joining {self._room_url}") - self._joining = True - - # For performance reasons, never subscribe to video streams (unless a - # video renderer is registered). - self._client.update_subscription_profiles( - {"base": {"camera": "unsubscribed", "screenVideo": "unsubscribed"}} - ) - - self._client.set_user_name(self._bot_name) - - try: - (data, error) = await self._join() - - if not error: - self._joined = True - self._joining = False - # Increment leave counter if we successfully joined. - self._leave_counter += 1 - - logger.info(f"Joined {self._room_url}") - - if self._params.transcription_enabled: - await self.start_transcription(self._params.transcription_settings) - - await self._callbacks.on_joined(data) - - self._joined_event.set() - else: - error_msg = f"Error joining {self._room_url}: {error}" - logger.error(error_msg) - await self._callbacks.on_error(error_msg) - except asyncio.TimeoutError: - error_msg = f"Time out joining {self._room_url}" - logger.error(error_msg) - self._joining = False - await self._callbacks.on_error(error_msg) - - async def _join(self): - """Execute the actual room join operation.""" - if not self._client: - return - - future = self._get_event_loop().create_future() - - camera_enabled = self._params.video_out_enabled and self._params.camera_out_enabled - microphone_enabled = self._params.audio_out_enabled and self._params.microphone_out_enabled - - self._client.join( - self._room_url, - self._token, - completion=completion_callback(future), - client_settings={ - "inputs": { - "camera": { - "isEnabled": camera_enabled, - "settings": { - "deviceId": self._camera_name(), - }, - }, - "microphone": { - "isEnabled": microphone_enabled, - "settings": { - "customTrack": { - "id": self._microphone_track.track.id - if self._microphone_track - else "no-microphone-track" - } - }, - }, - }, - "publishing": { - "camera": { - "sendSettings": { - "maxQuality": "low", - "encodings": { - "low": { - "maxBitrate": self._params.video_out_bitrate, - "maxFramerate": self._params.video_out_framerate, - } - }, - } - }, - "microphone": { - "sendSettings": { - "channelConfig": "stereo" - if self._params.audio_out_channels == 2 - else "mono", - "bitrate": self._params.audio_out_bitrate, - } - }, - }, - }, - ) - - return await asyncio.wait_for(future, timeout=10) - - async def leave(self): - """Leave the Daily room and cleanup resources.""" - # Decrement leave counter when leaving. - self._leave_counter -= 1 - - # Transport not joined, ignore. - if not self._joined or self._leave_counter > 0: - return - - self._joined = False - self._joined_event.clear() - - logger.info(f"Leaving {self._room_url}") - - if self._params.transcription_enabled: - await self.stop_transcription() - - # Remove any custom tracks, if any. - for track_name, _ in self._custom_audio_tracks.items(): - await self.remove_custom_audio_track(track_name) - - try: - error = await self._leave() - if not error: - logger.info(f"Left {self._room_url}") - await self._callbacks.on_left() - else: - error_msg = f"Error leaving {self._room_url}: {error}" - logger.error(error_msg) - await self._callbacks.on_error(error_msg) - except asyncio.TimeoutError: - error_msg = f"Time out leaving {self._room_url}" - logger.error(error_msg) - await self._callbacks.on_error(error_msg) - - async def _leave(self): - """Execute the actual room leave operation.""" - if not self._client: - return - - future = self._get_event_loop().create_future() - self._client.leave(completion=completion_callback(future)) - return await asyncio.wait_for(future, timeout=10) - - def _cleanup(self): - """Cleanup the Daily client instance.""" - if self._client: - self._client.release() - self._client = None - - def participants(self): - """Get current participants in the room. - - Returns: - Dictionary of participants keyed by participant ID. - """ - return self._client.participants() - - def participant_counts(self): - """Get participant count information. - - Returns: - Dictionary with participant count details. - """ - return self._client.participant_counts() - - async def start_dialout(self, settings): - """Start a dial-out call to a phone number. - - Args: - settings: Dial-out configuration settings. - """ - logger.debug(f"Starting dialout: settings={settings}") - - future = self._get_event_loop().create_future() - self._client.start_dialout(settings, completion=completion_callback(future)) - error = await future - if error: - logger.error(f"Unable to start dialout: {error}") - - async def stop_dialout(self, participant_id): - """Stop a dial-out call for a specific participant. - - Args: - participant_id: ID of the participant to stop dial-out for. - """ - logger.debug(f"Stopping dialout: participant_id={participant_id}") - - future = self._get_event_loop().create_future() - self._client.stop_dialout(participant_id, completion=completion_callback(future)) - error = await future - if error: - logger.error(f"Unable to stop dialout: {error}") - - async def send_dtmf(self, settings): - """Send DTMF tones during a call. - - Args: - settings: DTMF settings including tones and target session. - """ - session_id = settings.get("sessionId") or self._dial_out_session_id - if not session_id: - logger.error("Unable to send DTMF: 'sessionId' is not set") - return - - # Update 'sessionId' field. - settings["sessionId"] = session_id - - future = self._get_event_loop().create_future() - self._client.send_dtmf(settings, completion=completion_callback(future)) - await future - - async def sip_call_transfer(self, settings): - """Transfer a SIP call to another destination. - - Args: - settings: SIP call transfer settings. - """ - future = self._get_event_loop().create_future() - self._client.sip_call_transfer(settings, completion=completion_callback(future)) - await future - - async def sip_refer(self, settings): - """Send a SIP REFER request. - - Args: - settings: SIP REFER settings. - """ - future = self._get_event_loop().create_future() - self._client.sip_refer(settings, completion=completion_callback(future)) - await future - - async def start_recording(self, streaming_settings, stream_id, force_new): - """Start recording the call. - - Args: - streaming_settings: Recording configuration settings. - stream_id: Unique identifier for the recording stream. - force_new: Whether to force a new recording session. - """ - logger.debug( - f"Starting recording: stream_id={stream_id} force_new={force_new} settings={streaming_settings}" - ) - - future = self._get_event_loop().create_future() - self._client.start_recording( - streaming_settings, stream_id, force_new, completion=completion_callback(future) - ) - error = await future - if error: - logger.error(f"Unable to start recording: {error}") - - async def stop_recording(self, stream_id): - """Stop recording the call. - - Args: - stream_id: Unique identifier for the recording stream to stop. - """ - logger.debug(f"Stopping recording: stream_id={stream_id}") - - future = self._get_event_loop().create_future() - self._client.stop_recording(stream_id, completion=completion_callback(future)) - error = await future - if error: - logger.error(f"Unable to stop recording: {error}") - - async def start_transcription(self, settings): - """Start transcription for the call. - - Args: - settings: Transcription configuration settings. - """ - if not self._token: - logger.warning("Transcription can't be started without a room token") - return - - logger.debug(f"Starting transcription: settings={settings}") - - future = self._get_event_loop().create_future() - self._client.start_transcription( - settings=self._params.transcription_settings.model_dump(exclude_none=True), - completion=completion_callback(future), - ) - error = await future - if error: - logger.error(f"Unable to start transcription: {error}") - - async def stop_transcription(self): - """Stop transcription for the call.""" - if not self._token: - return - - logger.debug(f"Stopping transcription") - - future = self._get_event_loop().create_future() - self._client.stop_transcription(completion=completion_callback(future)) - error = await future - if error: - logger.error(f"Unable to stop transcription: {error}") - - async def send_prebuilt_chat_message(self, message: str, user_name: Optional[str] = None): - """Send a chat message to Daily's Prebuilt main room. - - Args: - message: The chat message to send. - user_name: Optional user name that will appear as sender of the message. - """ - if not self._joined: - return - - future = self._get_event_loop().create_future() - self._client.send_prebuilt_chat_message( - message, user_name=user_name, completion=completion_callback(future) - ) - await future - - async def capture_participant_transcription(self, participant_id: str): - """Enable transcription capture for a specific participant. - - Args: - participant_id: ID of the participant to capture transcription for. - """ - if not self._params.transcription_enabled: - return - - self._transcription_ids.append(participant_id) - if self._joined and self._transcription_status: - await self.update_transcription(self._transcription_ids) - - async def capture_participant_audio( - self, - participant_id: str, - callback: Callable, - audio_source: str = "microphone", - sample_rate: int = 16000, - callback_interval_ms: int = 20, - ): - """Capture audio from a specific participant. - - Args: - participant_id: ID of the participant to capture audio from. - callback: Callback function to handle audio data. - audio_source: Audio source to capture (microphone, screenAudio, or custom). - sample_rate: Desired sample rate for audio capture. - callback_interval_ms: Interval between audio callbacks in milliseconds. - """ - # Only enable the desired audio source subscription on this participant. - if audio_source in ("microphone", "screenAudio"): - media = {"media": {audio_source: "subscribed"}} - else: - media = {"media": {"customAudio": {audio_source: "subscribed"}}} - - await self.update_subscriptions(participant_settings={participant_id: media}) - - self._audio_renderers.setdefault(participant_id, {})[audio_source] = callback - - logger.debug( - f"Starting to capture [{audio_source}] audio from participant {participant_id}" - ) - - self._client.set_audio_renderer( - participant_id, - self._audio_data_received, - audio_source=audio_source, - sample_rate=sample_rate, - callback_interval_ms=callback_interval_ms, - ) - - async def capture_participant_video( - self, - participant_id: str, - callback: Callable, - framerate: int = 30, - video_source: str = "camera", - color_format: str = "RGB", - ): - """Capture video from a specific participant. - - Args: - participant_id: ID of the participant to capture video from. - callback: Callback function to handle video frames. - framerate: Desired framerate for video capture. - video_source: Video source to capture (camera, screenVideo, or custom). - color_format: Color format for video frames. - """ - # Only enable the desired audio source subscription on this participant. - if video_source in ("camera", "screenVideo"): - media = {"media": {video_source: "subscribed"}} - else: - media = {"media": {"customVideo": {video_source: "subscribed"}}} - - await self.update_subscriptions(participant_settings={participant_id: media}) - - self._video_renderers.setdefault(participant_id, {})[video_source] = callback - - logger.debug( - f"Starting to capture [{video_source}] video from participant {participant_id}" - ) - - self._client.set_video_renderer( - participant_id, - self._video_frame_received, - video_source=video_source, - color_format=color_format, - ) - - async def add_custom_audio_track(self, track_name: str) -> DailyAudioTrack: - """Add a custom audio track for multi-stream output. - - Args: - track_name: Name for the custom audio track. - - Returns: - The created DailyAudioTrack instance. - """ - future = self._get_event_loop().create_future() - - audio_source = CustomAudioSource(self._out_sample_rate, 1) - - audio_track = CustomAudioTrack(audio_source) - - self._client.add_custom_audio_track( - track_name=track_name, - audio_track=audio_track, - ignore_audio_level=True, - completion=completion_callback(future), - ) - - await future - - track = DailyAudioTrack(source=audio_source, track=audio_track) - - return track - - async def remove_custom_audio_track(self, track_name: str): - """Remove a custom audio track. - - Args: - track_name: Name of the custom audio track to remove. - """ - future = self._get_event_loop().create_future() - self._client.remove_custom_audio_track( - track_name=track_name, - completion=completion_callback(future), - ) - await future - - async def update_transcription(self, participants=None, instance_id=None): - """Update transcription settings for specific participants. - - Args: - participants: List of participant IDs to enable transcription for. - instance_id: Optional transcription instance ID. - """ - future = self._get_event_loop().create_future() - self._client.update_transcription( - participants, instance_id, completion=completion_callback(future) - ) - await future - - async def update_subscriptions(self, participant_settings=None, profile_settings=None): - """Update media subscription settings. - - Args: - participant_settings: Per-participant subscription settings. - profile_settings: Global subscription profile settings. - """ - future = self._get_event_loop().create_future() - self._client.update_subscriptions( - participant_settings=participant_settings, - profile_settings=profile_settings, - completion=completion_callback(future), - ) - await future - - async def update_publishing(self, publishing_settings: Mapping[str, Any]): - """Update media publishing settings. - - Args: - publishing_settings: Publishing configuration settings. - """ - future = self._get_event_loop().create_future() - self._client.update_publishing( - publishing_settings=publishing_settings, - completion=completion_callback(future), - ) - await future - - async def update_remote_participants(self, remote_participants: Mapping[str, Any]): - """Update settings for remote participants. - - Args: - remote_participants: Remote participant configuration settings. - """ - future = self._get_event_loop().create_future() - self._client.update_remote_participants( - remote_participants=remote_participants, completion=completion_callback(future) - ) - await future - - # - # - # Daily (EventHandler) - # - - def on_active_speaker_changed(self, participant): - """Handle active speaker change events. - - Args: - participant: The new active speaker participant info. - """ - self._call_event_callback(self._callbacks.on_active_speaker_changed, participant) - - def on_app_message(self, message: Any, sender: str): - """Handle application message events. - - Args: - message: The received message data. - sender: ID of the message sender. - """ - self._call_event_callback(self._callbacks.on_app_message, message, sender) - - def on_call_state_updated(self, state: str): - """Handle call state update events. - - Args: - state: The new call state. - """ - self._call_event_callback(self._callbacks.on_call_state_updated, state) - - def on_dialin_connected(self, data: Any): - """Handle dial-in connected events. - - Args: - data: Dial-in connection data. - """ - self._call_event_callback(self._callbacks.on_dialin_connected, data) - - def on_dialin_ready(self, sip_endpoint: str): - """Handle dial-in ready events. - - Args: - sip_endpoint: The SIP endpoint for dial-in. - """ - self._call_event_callback(self._callbacks.on_dialin_ready, sip_endpoint) - - def on_dialin_stopped(self, data: Any): - """Handle dial-in stopped events. - - Args: - data: Dial-in stop data. - """ - self._call_event_callback(self._callbacks.on_dialin_stopped, data) - - def on_dialin_error(self, data: Any): - """Handle dial-in error events. - - Args: - data: Dial-in error data. - """ - self._call_event_callback(self._callbacks.on_dialin_error, data) - - def on_dialin_warning(self, data: Any): - """Handle dial-in warning events. - - Args: - data: Dial-in warning data. - """ - self._call_event_callback(self._callbacks.on_dialin_warning, data) - - def on_dialout_answered(self, data: Any): - """Handle dial-out answered events. - - Args: - data: Dial-out answered data. - """ - self._call_event_callback(self._callbacks.on_dialout_answered, data) - - def on_dialout_connected(self, data: Any): - """Handle dial-out connected events. - - Args: - data: Dial-out connection data. - """ - self._dial_out_session_id = data["sessionId"] if "sessionId" in data else "" - self._call_event_callback(self._callbacks.on_dialout_connected, data) - - def on_dialout_stopped(self, data: Any): - """Handle dial-out stopped events. - - Args: - data: Dial-out stop data. - """ - # Cleanup only if our session stopped. - if data["sessionId"] == self._dial_out_session_id: - self._dial_out_session_id = "" - self._call_event_callback(self._callbacks.on_dialout_stopped, data) - - def on_dialout_error(self, data: Any): - """Handle dial-out error events. - - Args: - data: Dial-out error data. - """ - # Cleanup only if our session errored out. - if data["sessionId"] == self._dial_out_session_id: - self._dial_out_session_id = "" - self._call_event_callback(self._callbacks.on_dialout_error, data) - - def on_dialout_warning(self, data: Any): - """Handle dial-out warning events. - - Args: - data: Dial-out warning data. - """ - self._call_event_callback(self._callbacks.on_dialout_warning, data) - - def on_participant_joined(self, participant): - """Handle participant joined events. - - Args: - participant: The participant that joined. - """ - self._call_event_callback(self._callbacks.on_participant_joined, participant) - - def on_participant_left(self, participant, reason): - """Handle participant left events. - - Args: - participant: The participant that left. - reason: Reason for leaving. - """ - self._call_event_callback(self._callbacks.on_participant_left, participant, reason) - - def on_participant_updated(self, participant): - """Handle participant updated events. - - Args: - participant: The updated participant info. - """ - self._call_event_callback(self._callbacks.on_participant_updated, participant) - - def on_transcription_started(self, status): - """Handle transcription started events. - - Args: - status: Transcription start status. - """ - logger.debug(f"Transcription started: {status}") - self._transcription_status = status - self._call_event_callback(self.update_transcription, self._transcription_ids) - - def on_transcription_stopped(self, stopped_by, stopped_by_error): - """Handle transcription stopped events. - - Args: - stopped_by: Who stopped the transcription. - stopped_by_error: Whether stopped due to error. - """ - logger.debug("Transcription stopped") - self._call_event_callback( - self._callbacks.on_transcription_stopped, stopped_by, stopped_by_error - ) - - def on_transcription_error(self, message): - """Handle transcription error events. - - Args: - message: Error message. - """ - logger.error(f"Transcription error: {message}") - self._call_event_callback(self._callbacks.on_transcription_error, message) - - def on_transcription_message(self, message): - """Handle transcription message events. - - Args: - message: The transcription message data. - """ - self._call_event_callback(self._callbacks.on_transcription_message, message) - - def on_recording_started(self, status): - """Handle recording started events. - - Args: - status: Recording start status. - """ - logger.debug(f"Recording started: {status}") - self._call_event_callback(self._callbacks.on_recording_started, status) - - def on_recording_stopped(self, stream_id): - """Handle recording stopped events. - - Args: - stream_id: ID of the stopped recording stream. - """ - logger.debug(f"Recording stopped: {stream_id}") - self._call_event_callback(self._callbacks.on_recording_stopped, stream_id) - - def on_recording_error(self, stream_id, message): - """Handle recording error events. - - Args: - stream_id: ID of the recording stream with error. - message: Error message. - """ - logger.error(f"Recording error for {stream_id}: {message}") - self._call_event_callback(self._callbacks.on_recording_error, stream_id, message) - - # - # Daily (CallClient callbacks) - # - - def _audio_data_received(self, participant_id: str, audio_data: AudioData, audio_source: str): - """Handle received audio data from participants.""" - callback = self._audio_renderers[participant_id][audio_source] - self._call_audio_callback(callback, participant_id, audio_data, audio_source) - - def _video_frame_received( - self, participant_id: str, video_frame: VideoFrame, video_source: str - ): - """Handle received video frames from participants.""" - callback = self._video_renderers[participant_id][video_source] - self._call_video_callback(callback, participant_id, video_frame, video_source) - - # - # Queue callbacks handling - # - - def _call_audio_callback(self, callback, *args): - """Queue an audio callback for async execution.""" - self._call_async_callback(self._audio_queue, callback, *args) - - def _call_video_callback(self, callback, *args): - """Queue a video callback for async execution.""" - self._call_async_callback(self._video_queue, callback, *args) - - def _call_event_callback(self, callback, *args): - """Queue an event callback for async execution.""" - self._call_async_callback(self._event_queue, callback, *args) - - def _call_async_callback(self, queue: asyncio.Queue, callback, *args): - """Queue a callback for async execution on the event loop.""" - try: - future = asyncio.run_coroutine_threadsafe( - queue.put((callback, *args)), self._get_event_loop() - ) - future.result() - except FuturesCancelledError: - pass - - async def _callback_task_handler(self, queue: asyncio.Queue): - """Handle queued callbacks from the specified queue.""" - while True: - # Wait to process any callback until we are joined. - await self._joined_event.wait() - (callback, *args) = await queue.get() - await callback(*args) - queue.task_done() - - def _get_event_loop(self) -> asyncio.AbstractEventLoop: - """Get the event loop from the task manager.""" - if not self._task_manager: - raise Exception(f"{self}: missing task manager (pipeline not started?)") - return self._task_manager.get_event_loop() - - def __str__(self): - """String representation of the DailyTransportClient.""" - return f"{self._transport_name}::DailyTransportClient" - - -class DailyInputTransport(BaseInputTransport): - """Handles incoming media streams and events from Daily calls. - - Processes incoming audio, video, transcriptions and other events from Daily - room participants, including participant media capture and event forwarding. - """ - - def __init__( - self, - transport: BaseTransport, - client: DailyTransportClient, - params: DailyParams, - **kwargs, - ): - """Initialize the Daily input transport. - - Args: - transport: The parent transport instance. - client: DailyTransportClient instance. - params: Configuration parameters. - **kwargs: Additional arguments passed to parent class. - """ - super().__init__(params, **kwargs) - - self._transport = transport - self._client = client - self._params = params - - self._video_renderers = {} - - # Whether we have seen a StartFrame already. - self._initialized = False - - # Whether we have started audio streaming. - self._streaming_started = False - - # Store the list of participants we should stream. This is necessary in - # case we don't start streaming right away. - self._capture_participant_audio = [] - - # Audio task when using a virtual speaker (i.e. no user tracks). - self._audio_in_task: Optional[asyncio.Task] = None - - self._vad_analyzer: Optional[VADAnalyzer] = params.vad_analyzer - - @property - def vad_analyzer(self) -> Optional[VADAnalyzer]: - """Get the Voice Activity Detection analyzer. - - Returns: - The VAD analyzer instance if configured. - """ - return self._vad_analyzer - - async def start_audio_in_streaming(self): - """Start receiving audio from participants.""" - if not self._params.audio_in_enabled: - return - - logger.debug(f"Start receiving audio") - - if self._params.audio_in_enabled: - if self._params.audio_in_user_tracks: - # Capture invididual participant tracks. - for participant_id, audio_source, sample_rate in self._capture_participant_audio: - await self._client.capture_participant_audio( - participant_id, self._on_participant_audio_data, audio_source, sample_rate - ) - elif not self._audio_in_task: - # Create audio task. It reads audio frames from a single room - # track and pushes them internally for VAD processing. - self._audio_in_task = self.create_task(self._audio_in_task_handler()) - - self._streaming_started = True - - async def setup(self, setup: FrameProcessorSetup): - """Setup the input transport with shared client setup. - - Args: - setup: The frame processor setup configuration. - """ - await super().setup(setup) - await self._client.setup(setup) - - async def cleanup(self): - """Cleanup input transport and shared resources.""" - await super().cleanup() - await self._client.cleanup() - await self._transport.cleanup() - - async def start(self, frame: StartFrame): - """Start the input transport and join the Daily room. - - Args: - frame: The start frame containing initialization parameters. - """ - # Parent start. - await super().start(frame) - - if self._initialized: - return - - self._initialized = True - - # Setup client. - await self._client.start(frame) - - # Join the room. - await self._client.join() - - # Indicate the transport that we are connected. - await self.set_transport_ready(frame) - - if self._params.audio_in_stream_on_start: - await self.start_audio_in_streaming() - - async def stop(self, frame: EndFrame): - """Stop the input transport and leave the Daily room. - - Args: - frame: The end frame signaling transport shutdown. - """ - # Parent stop. - await super().stop(frame) - # Leave the room. - await self._client.leave() - # Stop audio thread. - if self._audio_in_task: - await self.cancel_task(self._audio_in_task) - self._audio_in_task = None - - async def cancel(self, frame: CancelFrame): - """Cancel the input transport and leave the Daily room. - - Args: - frame: The cancel frame signaling immediate cancellation. - """ - # Parent stop. - await super().cancel(frame) - # Leave the room. - await self._client.leave() - # Stop audio thread. - if self._audio_in_task: - await self.cancel_task(self._audio_in_task) - self._audio_in_task = None - - # - # FrameProcessor - # - - async def process_frame(self, frame: Frame, direction: FrameDirection): - """Process incoming frames, including user image requests. - - Args: - frame: The frame to process. - direction: The direction of frame flow in the pipeline. - """ - await super().process_frame(frame, direction) - - if isinstance(frame, UserImageRequestFrame): - await self.request_participant_image(frame) - - # - # Frames - # - - async def push_transcription_frame(self, frame: TranscriptionFrame | InterimTranscriptionFrame): - """Push a transcription frame downstream. - - Args: - frame: The transcription frame to push. - """ - await self.push_frame(frame) - - async def push_app_message(self, message: Any, sender: str): - """Push an application message as an urgent transport frame. - - Args: - message: The message data to send. - sender: ID of the message sender. - """ - frame = DailyTransportMessageUrgentFrame(message=message, participant_id=sender) - await self.push_frame(frame) - - # - # Audio in - # - - async def capture_participant_audio( - self, - participant_id: str, - audio_source: str = "microphone", - sample_rate: int = 16000, - ): - """Capture audio from a specific participant. - - Args: - participant_id: ID of the participant to capture audio from. - audio_source: Audio source to capture from. - sample_rate: Desired sample rate for audio capture. - """ - if self._streaming_started: - await self._client.capture_participant_audio( - participant_id, self._on_participant_audio_data, audio_source, sample_rate - ) - else: - self._capture_participant_audio.append((participant_id, audio_source, sample_rate)) - - async def _on_participant_audio_data( - self, participant_id: str, audio: AudioData, audio_source: str - ): - """Handle received participant audio data.""" - frame = UserAudioRawFrame( - user_id=participant_id, - audio=audio.audio_frames, - sample_rate=audio.sample_rate, - num_channels=audio.num_channels, - ) - frame.transport_source = audio_source - await self.push_audio_frame(frame) - - async def _audio_in_task_handler(self): - while True: - frame = await self._client.read_next_audio_frame() - if frame: - await self.push_audio_frame(frame) - - # - # Camera in - # - - async def capture_participant_video( - self, - participant_id: str, - framerate: int = 30, - video_source: str = "camera", - color_format: str = "RGB", - ): - """Capture video from a specific participant. - - Args: - participant_id: ID of the participant to capture video from. - framerate: Desired framerate for video capture. - video_source: Video source to capture from. - color_format: Color format for video frames. - """ - if participant_id not in self._video_renderers: - self._video_renderers[participant_id] = {} - - self._video_renderers[participant_id][video_source] = { - "framerate": framerate, - "timestamp": 0, - "render_next_frame": [], - } - - await self._client.capture_participant_video( - participant_id, self._on_participant_video_frame, framerate, video_source, color_format - ) - - async def request_participant_image(self, frame: UserImageRequestFrame): - """Request a video frame from a specific participant. - - Args: - frame: The user image request frame. - """ - if frame.user_id in self._video_renderers: - video_source = frame.video_source if frame.video_source else "camera" - self._video_renderers[frame.user_id][video_source]["render_next_frame"].append(frame) - - async def _on_participant_video_frame( - self, participant_id: str, video_frame: VideoFrame, video_source: str - ): - """Handle received participant video frames.""" - render_frame = False - - curr_time = time.time() - prev_time = self._video_renderers[participant_id][video_source]["timestamp"] - framerate = self._video_renderers[participant_id][video_source]["framerate"] - - # Some times we render frames because of a request. - request_frame = None - - if framerate > 0: - next_time = prev_time + 1 / framerate - render_frame = (next_time - curr_time) < 0.1 - - if self._video_renderers[participant_id][video_source]["render_next_frame"]: - request_frame = self._video_renderers[participant_id][video_source][ - "render_next_frame" - ].pop(0) - render_frame = True - - if render_frame: - frame = UserImageRawFrame( - user_id=participant_id, - request=request_frame, - image=video_frame.buffer, - size=(video_frame.width, video_frame.height), - format=video_frame.color_format, - ) - frame.transport_source = video_source - await self.push_video_frame(frame) - self._video_renderers[participant_id][video_source]["timestamp"] = curr_time - - -class DailyOutputTransport(BaseOutputTransport): - """Handles outgoing media streams and events to Daily calls. - - Manages sending audio, video and other data to Daily calls, - including audio destination registration and message transmission. - """ - - def __init__( - self, transport: BaseTransport, client: DailyTransportClient, params: DailyParams, **kwargs - ): - """Initialize the Daily output transport. - - Args: - transport: The parent transport instance. - client: DailyTransportClient instance. - params: Configuration parameters. - **kwargs: Additional arguments passed to parent class. - """ - super().__init__(params, **kwargs) - - self._transport = transport - self._client = client - - # Whether we have seen a StartFrame already. - self._initialized = False - - async def setup(self, setup: FrameProcessorSetup): - """Setup the output transport with shared client setup. - - Args: - setup: The frame processor setup configuration. - """ - await super().setup(setup) - await self._client.setup(setup) - - async def cleanup(self): - """Cleanup output transport and shared resources.""" - await super().cleanup() - await self._client.cleanup() - await self._transport.cleanup() - - async def start(self, frame: StartFrame): - """Start the output transport and join the Daily room. - - Args: - frame: The start frame containing initialization parameters. - """ - # Parent start. - await super().start(frame) - - if self._initialized: - return - - self._initialized = True - - # Setup client. - await self._client.start(frame) - - # Join the room. - await self._client.join() - - # Indicate the transport that we are connected. - await self.set_transport_ready(frame) - - async def stop(self, frame: EndFrame): - """Stop the output transport and leave the Daily room. - - Args: - frame: The end frame signaling transport shutdown. - """ - # Parent stop. - await super().stop(frame) - # Leave the room. - await self._client.leave() - - async def cancel(self, frame: CancelFrame): - """Cancel the output transport and leave the Daily room. - - Args: - frame: The cancel frame signaling immediate cancellation. - """ - # Parent stop. - await super().cancel(frame) - # Leave the room. - await self._client.leave() - - async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame): - """Send a transport message to participants. - - Args: - frame: The transport message frame to send. - """ - await self._client.send_message(frame) - - async def register_video_destination(self, destination: str): - """Register a video output destination. - - Args: - destination: The destination identifier to register. - """ - logger.warning(f"{self} registering video destinations is not supported yet") - - async def register_audio_destination(self, destination: str): - """Register an audio output destination. - - Args: - destination: The destination identifier to register. - """ - await self._client.register_audio_destination(destination) - - async def write_audio_frame(self, frame: OutputAudioRawFrame): - """Write an audio frame to the Daily call. - - Args: - frame: The audio frame to write. - """ - await self._client.write_audio_frame(frame) - - async def write_video_frame(self, frame: OutputImageRawFrame): - """Write a video frame to the Daily call. - - Args: - frame: The video frame to write. - """ - await self._client.write_video_frame(frame) - - -class DailyTransport(BaseTransport): - """Transport implementation for Daily audio and video calls. - - Provides comprehensive Daily integration including audio/video streaming, - transcription, recording, dial-in/out functionality, and real-time communication - features for conversational AI applications. - """ - - def __init__( - self, - room_url: str, - token: Optional[str], - bot_name: str, - params: Optional[DailyParams] = None, - input_name: Optional[str] = None, - output_name: Optional[str] = None, - ): - """Initialize the Daily transport. - - Args: - room_url: URL of the Daily room to connect to. - token: Optional authentication token for the room. - bot_name: Display name for the bot in the call. - params: Configuration parameters for the transport. - input_name: Optional name for the input transport. - output_name: Optional name for the output transport. - """ - super().__init__(input_name=input_name, output_name=output_name) - - callbacks = DailyCallbacks( - on_active_speaker_changed=self._on_active_speaker_changed, - on_joined=self._on_joined, - on_left=self._on_left, - on_error=self._on_error, - on_app_message=self._on_app_message, - on_call_state_updated=self._on_call_state_updated, - on_client_connected=self._on_client_connected, - on_client_disconnected=self._on_client_disconnected, - on_dialin_connected=self._on_dialin_connected, - on_dialin_ready=self._on_dialin_ready, - on_dialin_stopped=self._on_dialin_stopped, - on_dialin_error=self._on_dialin_error, - on_dialin_warning=self._on_dialin_warning, - on_dialout_answered=self._on_dialout_answered, - on_dialout_connected=self._on_dialout_connected, - on_dialout_stopped=self._on_dialout_stopped, - on_dialout_error=self._on_dialout_error, - on_dialout_warning=self._on_dialout_warning, - on_participant_joined=self._on_participant_joined, - on_participant_left=self._on_participant_left, - on_participant_updated=self._on_participant_updated, - on_transcription_message=self._on_transcription_message, - on_transcription_stopped=self._on_transcription_stopped, - on_transcription_error=self._on_transcription_error, - on_recording_started=self._on_recording_started, - on_recording_stopped=self._on_recording_stopped, - on_recording_error=self._on_recording_error, - ) - self._params = params or DailyParams() - - self._client = DailyTransportClient( - room_url, token, bot_name, self._params, callbacks, self.name - ) - self._input: Optional[DailyInputTransport] = None - self._output: Optional[DailyOutputTransport] = None - - self._other_participant_has_joined = False - - # Register supported handlers. The user will only be able to register - # these handlers. - self._register_event_handler("on_active_speaker_changed") - self._register_event_handler("on_joined") - self._register_event_handler("on_left") - self._register_event_handler("on_error") - self._register_event_handler("on_app_message") - self._register_event_handler("on_call_state_updated") - self._register_event_handler("on_client_connected") - self._register_event_handler("on_client_disconnected") - self._register_event_handler("on_dialin_connected") - self._register_event_handler("on_dialin_ready") - self._register_event_handler("on_dialin_stopped") - self._register_event_handler("on_dialin_error") - self._register_event_handler("on_dialin_warning") - self._register_event_handler("on_dialout_answered") - self._register_event_handler("on_dialout_connected") - self._register_event_handler("on_dialout_stopped") - self._register_event_handler("on_dialout_error") - self._register_event_handler("on_dialout_warning") - self._register_event_handler("on_first_participant_joined") - self._register_event_handler("on_participant_joined") - self._register_event_handler("on_participant_left") - self._register_event_handler("on_participant_updated") - self._register_event_handler("on_transcription_message") - self._register_event_handler("on_recording_started") - self._register_event_handler("on_recording_stopped") - self._register_event_handler("on_recording_error") - - # - # BaseTransport - # - - def input(self) -> DailyInputTransport: - """Get the input transport for receiving media and events. - - Returns: - The Daily input transport instance. - """ - if not self._input: - self._input = DailyInputTransport( - self, self._client, self._params, name=self._input_name - ) - return self._input - - def output(self) -> DailyOutputTransport: - """Get the output transport for sending media and events. - - Returns: - The Daily output transport instance. - """ - if not self._output: - self._output = DailyOutputTransport( - self, self._client, self._params, name=self._output_name - ) - return self._output - - # - # DailyTransport - # - - @property - def room_url(self) -> str: - """Get the Daily room URL. - - Returns: - The room URL this transport is connected to. - """ - return self._client.room_url - - @property - def participant_id(self) -> str: - """Get the participant ID for this transport. - - Returns: - The participant ID assigned by Daily. - """ - return self._client.participant_id - - def set_log_level(self, level: DailyLogLevel): - """Set the logging level for Daily's internal logging system. - - Args: - level: The log level to set. Should be a member of the DailyLogLevel enum, - such as DailyLogLevel.Info, DailyLogLevel.Debug, etc. - - Example: - transport.set_log_level(DailyLogLevel.Info) - """ - Daily.set_log_level(level) - - async def send_image(self, frame: OutputImageRawFrame | SpriteFrame): - """Send an image frame to the Daily call. - - Args: - frame: The image frame to send. - """ - if self._output: - await self._output.queue_frame(frame, FrameDirection.DOWNSTREAM) - - async def send_audio(self, frame: OutputAudioRawFrame): - """Send an audio frame to the Daily call. - - Args: - frame: The audio frame to send. - """ - if self._output: - await self._output.queue_frame(frame, FrameDirection.DOWNSTREAM) - - def participants(self): - """Get current participants in the room. - - Returns: - Dictionary of participants keyed by participant ID. - """ - return self._client.participants() - - def participant_counts(self): - """Get participant count information. - - Returns: - Dictionary with participant count details. - """ - return self._client.participant_counts() - - async def start_dialout(self, settings=None): - """Start a dial-out call to a phone number. - - Args: - settings: Dial-out configuration settings. - """ - await self._client.start_dialout(settings) - - async def stop_dialout(self, participant_id): - """Stop a dial-out call for a specific participant. - - Args: - participant_id: ID of the participant to stop dial-out for. - """ - await self._client.stop_dialout(participant_id) - - async def sip_call_transfer(self, settings): - """Transfer a SIP call to another destination. - - Args: - settings: SIP call transfer settings. - """ - await self._client.sip_call_transfer(settings) - - async def sip_refer(self, settings): - """Send a SIP REFER request. - - Args: - settings: SIP REFER settings. - """ - await self._client.sip_refer(settings) - - async def start_recording(self, streaming_settings=None, stream_id=None, force_new=None): - """Start recording the call. - - Args: - streaming_settings: Recording configuration settings. - stream_id: Unique identifier for the recording stream. - force_new: Whether to force a new recording session. - """ - await self._client.start_recording(streaming_settings, stream_id, force_new) - - async def stop_recording(self, stream_id=None): - """Stop recording the call. - - Args: - stream_id: Unique identifier for the recording stream to stop. - """ - await self._client.stop_recording(stream_id) - - async def start_transcription(self, settings=None): - """Start transcription for the call. - - Args: - settings: Transcription configuration settings. - """ - await self._client.start_transcription(settings) - - async def stop_transcription(self): - """Stop transcription for the call.""" - await self._client.stop_transcription() - - async def send_prebuilt_chat_message(self, message: str, user_name: Optional[str] = None): - """Send a chat message to Daily's Prebuilt main room. - - Args: - message: The chat message to send. - user_name: Optional user name that will appear as sender of the message. - """ - await self._client.send_prebuilt_chat_message(message, user_name) - - async def capture_participant_transcription(self, participant_id: str): - """Enable transcription capture for a specific participant. - - Args: - participant_id: ID of the participant to capture transcription for. - """ - await self._client.capture_participant_transcription(participant_id) - - async def capture_participant_audio( - self, - participant_id: str, - audio_source: str = "microphone", - sample_rate: int = 16000, - ): - """Capture audio from a specific participant. - - Args: - participant_id: ID of the participant to capture audio from. - audio_source: Audio source to capture from. - sample_rate: Desired sample rate for audio capture. - """ - if self._input: - await self._input.capture_participant_audio(participant_id, audio_source, sample_rate) - - async def capture_participant_video( - self, - participant_id: str, - framerate: int = 30, - video_source: str = "camera", - color_format: str = "RGB", - ): - """Capture video from a specific participant. - - Args: - participant_id: ID of the participant to capture video from. - framerate: Desired framerate for video capture. - video_source: Video source to capture from. - color_format: Color format for video frames. - """ - if self._input: - await self._input.capture_participant_video( - participant_id, framerate, video_source, color_format - ) - - async def update_publishing(self, publishing_settings: Mapping[str, Any]): - """Update media publishing settings. - - Args: - publishing_settings: Publishing configuration settings. - """ - await self._client.update_publishing(publishing_settings=publishing_settings) - - async def update_subscriptions(self, participant_settings=None, profile_settings=None): - """Update media subscription settings. - - Args: - participant_settings: Per-participant subscription settings. - profile_settings: Global subscription profile settings. - """ - await self._client.update_subscriptions( - participant_settings=participant_settings, profile_settings=profile_settings - ) - - async def update_remote_participants(self, remote_participants: Mapping[str, Any]): - """Update settings for remote participants. - - Args: - remote_participants: Remote participant configuration settings. - """ - await self._client.update_remote_participants(remote_participants=remote_participants) - - async def _on_active_speaker_changed(self, participant: Any): - """Handle active speaker change events.""" - await self._call_event_handler("on_active_speaker_changed", participant) - - async def _on_joined(self, data): - """Handle room joined events.""" - await self._call_event_handler("on_joined", data) - - async def _on_left(self): - """Handle room left events.""" - await self._call_event_handler("on_left") - - async def _on_error(self, error): - """Handle error events and push error frames.""" - await self._call_event_handler("on_error", error) - # Push error frame to notify the pipeline - error_frame = ErrorFrame(error) - - if self._input: - await self._input.push_error(error_frame) - elif self._output: - await self._output.push_error(error_frame) - else: - logger.error("Both input and output are None while trying to push error") - raise Exception("No valid input or output channel to push error") - - async def _on_app_message(self, message: Any, sender: str): - """Handle application message events.""" - if self._input: - await self._input.push_app_message(message, sender) - await self._call_event_handler("on_app_message", message, sender) - - async def _on_call_state_updated(self, state: str): - """Handle call state update events.""" - await self._call_event_handler("on_call_state_updated", state) - - async def _on_client_connected(self, participant: Any): - """Handle client connected events.""" - await self._call_event_handler("on_client_connected", participant) - - async def _on_client_disconnected(self, participant: Any): - """Handle client disconnected events.""" - await self._call_event_handler("on_client_disconnected", participant) - - async def _handle_dialin_ready(self, sip_endpoint: str): - """Handle dial-in ready events by updating SIP configuration.""" - if not self._params.dialin_settings: - return - - async with aiohttp.ClientSession() as session: - headers = { - "Authorization": f"Bearer {self._params.api_key}", - "Content-Type": "application/json", - } - data = { - "callId": self._params.dialin_settings.call_id, - "callDomain": self._params.dialin_settings.call_domain, - "sipUri": sip_endpoint, - } - - url = f"{self._params.api_url}/dialin/pinlessCallUpdate" - - try: - async with session.post( - url, headers=headers, json=data, timeout=aiohttp.ClientTimeout(total=10) - ) as r: - if r.status != 200: - text = await r.text() - logger.error( - f"Unable to handle dialin-ready event (status: {r.status}, error: {text})" - ) - return - - logger.debug("Event dialin-ready was handled successfully") - except asyncio.TimeoutError: - logger.error(f"Timeout handling dialin-ready event ({url})") - except Exception as e: - logger.exception(f"Error handling dialin-ready event ({url}): {e}") - - async def _on_dialin_connected(self, data): - """Handle dial-in connected events.""" - await self._call_event_handler("on_dialin_connected", data) - - async def _on_dialin_ready(self, sip_endpoint): - """Handle dial-in ready events.""" - if self._params.dialin_settings: - await self._handle_dialin_ready(sip_endpoint) - await self._call_event_handler("on_dialin_ready", sip_endpoint) - - async def _on_dialin_stopped(self, data): - """Handle dial-in stopped events.""" - await self._call_event_handler("on_dialin_stopped", data) - - async def _on_dialin_error(self, data): - """Handle dial-in error events.""" - await self._call_event_handler("on_dialin_error", data) - - async def _on_dialin_warning(self, data): - """Handle dial-in warning events.""" - await self._call_event_handler("on_dialin_warning", data) - - async def _on_dialout_answered(self, data): - """Handle dial-out answered events.""" - await self._call_event_handler("on_dialout_answered", data) - - async def _on_dialout_connected(self, data): - """Handle dial-out connected events.""" - await self._call_event_handler("on_dialout_connected", data) - - async def _on_dialout_stopped(self, data): - """Handle dial-out stopped events.""" - await self._call_event_handler("on_dialout_stopped", data) - - async def _on_dialout_error(self, data): - """Handle dial-out error events.""" - await self._call_event_handler("on_dialout_error", data) - - async def _on_dialout_warning(self, data): - """Handle dial-out warning events.""" - await self._call_event_handler("on_dialout_warning", data) - - async def _on_participant_joined(self, participant): - """Handle participant joined events.""" - id = participant["id"] - logger.info(f"Participant joined {id}") - - if self._input and self._params.audio_in_enabled and self._params.audio_in_user_tracks: - await self._input.capture_participant_audio( - id, "microphone", self._client.in_sample_rate - ) - - if not self._other_participant_has_joined: - self._other_participant_has_joined = True - await self._call_event_handler("on_first_participant_joined", participant) - - await self._call_event_handler("on_participant_joined", participant) - # Also call on_client_connected for compatibility with other transports - await self._call_event_handler("on_client_connected", participant) - - async def _on_participant_left(self, participant, reason): - """Handle participant left events.""" - id = participant["id"] - logger.info(f"Participant left {id}") - await self._call_event_handler("on_participant_left", participant, reason) - # Also call on_client_disconnected for compatibility with other transports - await self._call_event_handler("on_client_disconnected", participant) - - async def _on_participant_updated(self, participant): - """Handle participant updated events.""" - await self._call_event_handler("on_participant_updated", participant) - - async def _on_transcription_message(self, message): - """Handle transcription message events.""" - await self._call_event_handler("on_transcription_message", message) - - participant_id = "" - if "participantId" in message: - participant_id = message["participantId"] - if not participant_id: - return - - text = message["text"] - timestamp = message["timestamp"] - is_final = message["rawResponse"]["is_final"] - try: - language = message["rawResponse"]["channel"]["alternatives"][0]["languages"][0] - language = Language(language) - except KeyError: - language = None - if is_final: - frame = TranscriptionFrame(text, participant_id, timestamp, language, result=message) - logger.debug(f"Transcription (from: {participant_id}): [{text}]") - else: - frame = InterimTranscriptionFrame( - text, - participant_id, - timestamp, - language, - result=message, - ) - - if self._input: - await self._input.push_transcription_frame(frame) - - async def _on_transcription_stopped(self, stopped_by, stopped_by_error): - """Handle transcription stopped events.""" - await self._call_event_handler("on_transcription_stopped", stopped_by, stopped_by_error) - - async def _on_transcription_error(self, message): - """Handle transcription error events.""" - await self._call_event_handler("on_transcription_error", message) - - async def _on_recording_started(self, status): - """Handle recording started events.""" - await self._call_event_handler("on_recording_started", status) - - async def _on_recording_stopped(self, stream_id): - """Handle recording stopped events.""" - await self._call_event_handler("on_recording_stopped", stream_id) - - async def _on_recording_error(self, stream_id, message): - """Handle recording error events.""" - await self._call_event_handler("on_recording_error", stream_id, message) diff --git a/src/pipecat/transports/services/helpers/daily_rest.py b/src/pipecat/transports/services/helpers/daily_rest.py index 2003b2743..1efcf22a3 100644 --- a/src/pipecat/transports/services/helpers/daily_rest.py +++ b/src/pipecat/transports/services/helpers/daily_rest.py @@ -9,402 +9,15 @@ Methods that wrap the Daily API to create rooms, check room URLs, and get meeting tokens. """ -import time -from typing import Dict, List, Literal, Optional -from urllib.parse import urlparse - -import aiohttp -from pydantic import BaseModel, Field, ValidationError - - -class DailyRoomSipParams(BaseModel): - """SIP configuration parameters for Daily rooms. - - Parameters: - display_name: Name shown for the SIP endpoint. - video: Whether video is enabled for SIP. - sip_mode: SIP connection mode, typically 'dial-in'. - num_endpoints: Number of allowed SIP endpoints. - codecs: Codecs to support for audio and video. If None, uses Daily defaults. - Example: {"audio": ["OPUS"], "video": ["H264"]} - """ - - display_name: str = "sw-sip-dialin" - video: bool = False - sip_mode: str = "dial-in" - num_endpoints: int = 1 - codecs: Optional[Dict[str, List[str]]] = None - - -class RecordingsBucketConfig(BaseModel): - """Configuration for storing Daily recordings in a custom S3 bucket. - - Refer to the Daily API documentation for more information: - https://docs.daily.co/guides/products/live-streaming-recording/storing-recordings-in-a-custom-s3-bucket - - Parameters: - bucket_name: Name of the S3 bucket for storing recordings. - bucket_region: AWS region where the S3 bucket is located. - assume_role_arn: ARN of the IAM role to assume for S3 access. - allow_api_access: Whether to allow API access to the recordings. - """ - - bucket_name: str - bucket_region: str - assume_role_arn: str - allow_api_access: bool = False - - -class TranscriptionBucketConfig(BaseModel): - """Configuration for storing Daily transcription in a custom S3 bucket. - - Refer to the Daily API documentation for more information: - https://docs.daily.co/guides/products/live-streaming-recording/storing-recordings-in-a-custom-s3-bucket - - Parameters: - bucket_name: Name of the S3 bucket for storing transcription. - bucket_region: AWS region where the S3 bucket is located. - assume_role_arn: ARN of the IAM role to assume for S3 access. - allow_api_access: Whether to allow API access to the transcription. - """ - - bucket_name: str - bucket_region: str - assume_role_arn: str - allow_api_access: bool = False - - -class DailyRoomProperties(BaseModel, extra="allow"): - """Properties for configuring a Daily room. - - Reference: https://docs.daily.co/reference/rest-api/rooms/create-room#properties - - Parameters: - exp: Optional Unix epoch timestamp for room expiration (e.g., time.time() + 300 for 5 minutes). - enable_chat: Whether chat is enabled in the room. - enable_prejoin_ui: Whether the pre-join UI is enabled. - enable_emoji_reactions: Whether emoji reactions are enabled. - eject_at_room_exp: Whether to remove participants when room expires. - enable_dialout: Whether SIP dial-out is enabled. - enable_recording: Recording settings ('cloud', 'local', 'raw-tracks'). - enable_transcription_storage: Whether transcription storage is enabled. - geo: Geographic region for room. - max_participants: Maximum number of participants allowed in the room. - recordings_bucket: Configuration for custom S3 bucket recordings. - transcription_bucket: Configuration for custom S3 bucket transcription. - sip: SIP configuration parameters. - sip_uri: SIP URI information returned by Daily. - start_video_off: Whether video is off by default. - """ - - exp: Optional[float] = None - enable_chat: bool = False - enable_prejoin_ui: bool = False - enable_emoji_reactions: bool = False - eject_at_room_exp: bool = False - enable_dialout: Optional[bool] = None - enable_recording: Optional[Literal["cloud", "local", "raw-tracks"]] = None - enable_transcription_storage: Optional[bool] = None - geo: Optional[str] = None - max_participants: Optional[int] = None - recordings_bucket: Optional[RecordingsBucketConfig] = None - transcription_bucket: Optional[TranscriptionBucketConfig] = None - sip: Optional[DailyRoomSipParams] = None - sip_uri: Optional[dict] = None - start_video_off: bool = False - - @property - def sip_endpoint(self) -> str: - """Get the SIP endpoint URI if available. - - Returns: - SIP endpoint URI or empty string if not available. - """ - if not self.sip_uri: - return "" - else: - return "sip:%s" % self.sip_uri["endpoint"] - - -class DailyRoomParams(BaseModel): - """Parameters for creating a Daily room. - - Parameters: - name: Optional custom name for the room. - privacy: Room privacy setting ('private' or 'public'). - properties: Room configuration properties. - """ - - name: Optional[str] = None - privacy: Literal["private", "public"] = "public" - properties: DailyRoomProperties = Field(default_factory=DailyRoomProperties) - - -class DailyRoomObject(BaseModel): - """Represents a Daily room returned by the API. - - Parameters: - id: Unique room identifier. - name: Room name. - api_created: Whether room was created via API. - privacy: Room privacy setting ('private' or 'public'). - url: Full URL for joining the room. - created_at: Timestamp of room creation in ISO 8601 format (e.g., "2019-01-26T09:01:22.000Z"). - config: Room configuration properties. - """ - - id: str - name: str - api_created: bool - privacy: str - url: str - created_at: str - config: DailyRoomProperties - - -class DailyMeetingTokenProperties(BaseModel): - """Properties for configuring a Daily meeting token. - - Refer to the Daily API documentation for more information: - https://docs.daily.co/reference/rest-api/meeting-tokens/create-meeting-token#properties - - Parameters: - room_name: The room for which this token is valid. If not set, the token is valid for all rooms in your domain. - eject_at_token_exp: If True, the user will be ejected from the room when the token expires. - eject_after_elapsed: The number of seconds after which the user will be ejected from the room. - nbf: Not before timestamp - users cannot join with this token before this time. - exp: Expiration time (unix timestamp in seconds). Strongly recommended for security. - is_owner: If True, the token will grant owner privileges in the room. - user_name: The name of the user. This will be added to the token payload. - user_id: A unique identifier for the user. This will be added to the token payload. - enable_screenshare: If True, the user will be able to share their screen. - start_video_off: If True, the user's video will be turned off when they join the room. - start_audio_off: If True, the user's audio will be turned off when they join the room. - enable_recording: Recording settings for the token. Must be one of 'cloud', 'local' or 'raw-tracks'. - enable_prejoin_ui: If True, the user will see the prejoin UI before joining the room. - start_cloud_recording: Start cloud recording when the user joins the room. - permissions: Specifies the initial default permissions for a non-meeting-owner participant. - """ - - room_name: Optional[str] = None - eject_at_token_exp: Optional[bool] = None - eject_after_elapsed: Optional[int] = None - nbf: Optional[int] = None - exp: Optional[int] = None - is_owner: Optional[bool] = None - user_name: Optional[str] = None - user_id: Optional[str] = None - enable_screenshare: Optional[bool] = None - start_video_off: Optional[bool] = None - start_audio_off: Optional[bool] = None - enable_recording: Optional[Literal["cloud", "local", "raw-tracks"]] = None - enable_prejoin_ui: Optional[bool] = None - start_cloud_recording: Optional[bool] = None - permissions: Optional[dict] = None - - -class DailyMeetingTokenParams(BaseModel): - """Parameters for creating a Daily meeting token. - - Refer to the Daily API documentation for more information: - https://docs.daily.co/reference/rest-api/meeting-tokens/create-meeting-token#body-params - - Parameters: - properties: Meeting token configuration properties. - """ - - properties: DailyMeetingTokenProperties = Field(default_factory=DailyMeetingTokenProperties) - - -class DailyRESTHelper: - """Helper class for interacting with Daily's REST API. - - Provides methods for creating, managing, and accessing Daily rooms. - """ - - def __init__( - self, - *, - daily_api_key: str, - daily_api_url: str = "https://api.daily.co/v1", - aiohttp_session: aiohttp.ClientSession, - ): - """Initialize the Daily REST helper. - - Args: - daily_api_key: Your Daily API key. - daily_api_url: Daily API base URL (e.g. "https://api.daily.co/v1"). - aiohttp_session: Async HTTP session for making requests. - """ - self.daily_api_key = daily_api_key - self.daily_api_url = daily_api_url - self.aiohttp_session = aiohttp_session - - def get_name_from_url(self, room_url: str) -> str: - """Extract room name from a Daily room URL. - - Args: - room_url: Full Daily room URL. - - Returns: - Room name portion of the URL. - """ - return urlparse(room_url).path[1:] - - async def get_room_from_url(self, room_url: str) -> DailyRoomObject: - """Get room details from a Daily room URL. - - Args: - room_url: Full Daily room URL. - - Returns: - DailyRoomObject instance for the room. - """ - room_name = self.get_name_from_url(room_url) - return await self._get_room_from_name(room_name) - - async def create_room(self, params: DailyRoomParams) -> DailyRoomObject: - """Create a new Daily room. - - Args: - params: Room configuration parameters. - - Returns: - DailyRoomObject instance for the created room. - - Raises: - Exception: If room creation fails or response is invalid. - """ - headers = {"Authorization": f"Bearer {self.daily_api_key}"} - json = params.model_dump(exclude_none=True) - async with self.aiohttp_session.post( - f"{self.daily_api_url}/rooms", headers=headers, json=json - ) as r: - if r.status != 200: - text = await r.text() - raise Exception(f"Unable to create room (status: {r.status}): {text}") - - data = await r.json() - - try: - room = DailyRoomObject(**data) - except ValidationError as e: - raise Exception(f"Invalid response: {e}") - - return room - - async def get_token( - self, - room_url: str, - expiry_time: float = 60 * 60, - eject_at_token_exp: bool = False, - owner: bool = True, - params: Optional[DailyMeetingTokenParams] = None, - ) -> str: - """Generate a meeting token for user to join a Daily room. - - Args: - room_url: Daily room URL. - expiry_time: Token validity duration in seconds (default: 1 hour). - eject_at_token_exp: Whether to eject user when token expires. - owner: Whether token has owner privileges. - params: Optional additional token properties. Note that room_name, - exp, and is_owner will be set based on the other function - parameters regardless of values in params. - - Returns: - Meeting token. - - Raises: - Exception: If token generation fails or room URL is missing. - """ - if not room_url: - raise Exception( - "No Daily room specified. You must specify a Daily room in order a token to be generated." - ) - - expiration: int = int(time.time() + expiry_time) - - room_name = self.get_name_from_url(room_url) - - headers = {"Authorization": f"Bearer {self.daily_api_key}"} - - if params is None: - params = DailyMeetingTokenParams( - properties=DailyMeetingTokenProperties( - room_name=room_name, - is_owner=owner, - exp=expiration, - eject_at_token_exp=eject_at_token_exp, - ) - ) - else: - params.properties.room_name = room_name - params.properties.exp = expiration - params.properties.eject_at_token_exp = eject_at_token_exp - params.properties.is_owner = owner - - json = params.model_dump(exclude_none=True) - - async with self.aiohttp_session.post( - f"{self.daily_api_url}/meeting-tokens", headers=headers, json=json - ) as r: - if r.status != 200: - text = await r.text() - raise Exception(f"Failed to create meeting token (status: {r.status}): {text}") - - data = await r.json() - - return data["token"] - - async def delete_room_by_url(self, room_url: str) -> bool: - """Delete a room using its URL. - - Args: - room_url: Daily room URL. - - Returns: - True if deletion was successful. - """ - room_name = self.get_name_from_url(room_url) - return await self.delete_room_by_name(room_name) - - async def delete_room_by_name(self, room_name: str) -> bool: - """Delete a room using its name. - - Args: - room_name: Name of the room to delete. - - Returns: - True if deletion was successful. - - Raises: - Exception: If deletion fails (excluding 404 Not Found). - """ - headers = {"Authorization": f"Bearer {self.daily_api_key}"} - async with self.aiohttp_session.delete( - f"{self.daily_api_url}/rooms/{room_name}", headers=headers - ) as r: - if r.status != 200 and r.status != 404: - text = await r.text() - raise Exception(f"Failed to delete room [{room_name}] (status: {r.status}): {text}") - - return True - - async def _get_room_from_name(self, room_name: str) -> DailyRoomObject: - """Internal method to get room details by name.""" - headers = {"Authorization": f"Bearer {self.daily_api_key}"} - async with self.aiohttp_session.get( - f"{self.daily_api_url}/rooms/{room_name}", headers=headers - ) as r: - if r.status != 200: - raise Exception(f"Room not found: {room_name}") - - data = await r.json() - - try: - room = DailyRoomObject(**data) - except ValidationError as e: - raise Exception(f"Invalid response: {e}") - - return room +import warnings + +from pipecat.transports.daily.utils import * + +with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "Module `pipecat.transports.services.helpers.daily_rest` is deprecated, " + "use `pipecat.transports.daily.utils` instead.", + DeprecationWarning, + stacklevel=2, + ) diff --git a/src/pipecat/transports/services/livekit.py b/src/pipecat/transports/services/livekit.py index f21775cf6..8d9ff7427 100644 --- a/src/pipecat/transports/services/livekit.py +++ b/src/pipecat/transports/services/livekit.py @@ -11,978 +11,15 @@ including audio streaming, data messaging, participant management, and room event handling for conversational AI applications. """ -import asyncio -from dataclasses import dataclass -from typing import Any, Awaitable, Callable, List, Optional - -from loguru import logger -from pydantic import BaseModel - -from pipecat.audio.utils import create_stream_resampler -from pipecat.audio.vad.vad_analyzer import VADAnalyzer -from pipecat.frames.frames import ( - AudioRawFrame, - CancelFrame, - EndFrame, - OutputAudioRawFrame, - StartFrame, - TransportMessageFrame, - TransportMessageUrgentFrame, - UserAudioRawFrame, -) -from pipecat.processors.frame_processor import FrameDirection, FrameProcessorSetup -from pipecat.transports.base_input import BaseInputTransport -from pipecat.transports.base_output import BaseOutputTransport -from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.utils.asyncio.task_manager import BaseTaskManager - -try: - from livekit import rtc - from tenacity import retry, stop_after_attempt, wait_exponential -except ModuleNotFoundError as e: - logger.error(f"Exception: {e}") - logger.error("In order to use LiveKit, you need to `pip install pipecat-ai[livekit]`.") - raise Exception(f"Missing module: {e}") - - -@dataclass -class LiveKitTransportMessageFrame(TransportMessageFrame): - """Frame for transport messages in LiveKit rooms. - - Parameters: - participant_id: Optional ID of the participant this message is for/from. - """ - - participant_id: Optional[str] = None - - -@dataclass -class LiveKitTransportMessageUrgentFrame(TransportMessageUrgentFrame): - """Frame for urgent transport messages in LiveKit rooms. - - Parameters: - participant_id: Optional ID of the participant this message is for/from. - """ - - participant_id: Optional[str] = None - - -class LiveKitParams(TransportParams): - """Configuration parameters for LiveKit transport. - - Inherits all parameters from TransportParams without additional configuration. - """ - - pass - - -class LiveKitCallbacks(BaseModel): - """Callback handlers for LiveKit events. - - Parameters: - on_connected: Called when connected to the LiveKit room. - on_disconnected: Called when disconnected from the LiveKit room. - on_participant_connected: Called when a participant joins the room. - on_participant_disconnected: Called when a participant leaves the room. - on_audio_track_subscribed: Called when an audio track is subscribed. - on_audio_track_unsubscribed: Called when an audio track is unsubscribed. - on_data_received: Called when data is received from a participant. - on_first_participant_joined: Called when the first participant joins. - """ - - on_connected: Callable[[], Awaitable[None]] - on_disconnected: Callable[[], Awaitable[None]] - on_participant_connected: Callable[[str], Awaitable[None]] - on_participant_disconnected: Callable[[str], Awaitable[None]] - on_audio_track_subscribed: Callable[[str], Awaitable[None]] - on_audio_track_unsubscribed: Callable[[str], Awaitable[None]] - on_data_received: Callable[[bytes, str], Awaitable[None]] - on_first_participant_joined: Callable[[str], Awaitable[None]] - - -class LiveKitTransportClient: - """Core client for interacting with LiveKit rooms. - - Manages the connection to LiveKit rooms and handles all low-level API interactions - including room management, audio streaming, data messaging, and event handling. - """ - - def __init__( - self, - url: str, - token: str, - room_name: str, - params: LiveKitParams, - callbacks: LiveKitCallbacks, - transport_name: str, - ): - """Initialize the LiveKit transport client. - - Args: - url: LiveKit server URL to connect to. - token: Authentication token for the room. - room_name: Name of the LiveKit room to join. - params: Configuration parameters for the transport. - callbacks: Event callback handlers. - transport_name: Name identifier for the transport. - """ - self._url = url - self._token = token - self._room_name = room_name - self._params = params - self._callbacks = callbacks - self._transport_name = transport_name - self._room: Optional[rtc.Room] = None - self._participant_id: str = "" - self._connected = False - self._disconnect_counter = 0 - self._audio_source: Optional[rtc.AudioSource] = None - self._audio_track: Optional[rtc.LocalAudioTrack] = None - self._audio_tracks = {} - self._audio_queue = asyncio.Queue() - self._other_participant_has_joined = False - self._task_manager: Optional[BaseTaskManager] = None - - @property - def participant_id(self) -> str: - """Get the participant ID for this client. - - Returns: - The participant ID assigned by LiveKit. - """ - return self._participant_id - - @property - def room(self) -> rtc.Room: - """Get the LiveKit room instance. - - Returns: - The LiveKit room object. - - Raises: - Exception: If room object is not available. - """ - if not self._room: - raise Exception(f"{self}: missing room object (pipeline not started?)") - return self._room - - async def setup(self, setup: FrameProcessorSetup): - """Setup the client with task manager and room initialization. - - Args: - setup: The frame processor setup configuration. - """ - if self._task_manager: - return - - self._task_manager = setup.task_manager - self._room = rtc.Room(loop=self._task_manager.get_event_loop()) - - # Set up room event handlers - self.room.on("participant_connected")(self._on_participant_connected_wrapper) - self.room.on("participant_disconnected")(self._on_participant_disconnected_wrapper) - self.room.on("track_subscribed")(self._on_track_subscribed_wrapper) - self.room.on("track_unsubscribed")(self._on_track_unsubscribed_wrapper) - self.room.on("data_received")(self._on_data_received_wrapper) - self.room.on("connected")(self._on_connected_wrapper) - self.room.on("disconnected")(self._on_disconnected_wrapper) - - async def cleanup(self): - """Cleanup client resources.""" - await self.disconnect() - - async def start(self, frame: StartFrame): - """Start the client and initialize audio components. - - Args: - frame: The start frame containing initialization parameters. - """ - self._out_sample_rate = self._params.audio_out_sample_rate or frame.audio_out_sample_rate - - @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) - async def connect(self): - """Connect to the LiveKit room with retry logic.""" - if self._connected: - # Increment disconnect counter if already connected. - self._disconnect_counter += 1 - return - - logger.info(f"Connecting to {self._room_name}") - - try: - await self.room.connect( - self._url, - self._token, - options=rtc.RoomOptions(auto_subscribe=True), - ) - self._connected = True - # Increment disconnect counter if we successfully connected. - self._disconnect_counter += 1 - - self._participant_id = self.room.local_participant.sid - logger.info(f"Connected to {self._room_name}") - - # Set up audio source and track - self._audio_source = rtc.AudioSource( - self._out_sample_rate, self._params.audio_out_channels - ) - self._audio_track = rtc.LocalAudioTrack.create_audio_track( - "pipecat-audio", self._audio_source - ) - options = rtc.TrackPublishOptions() - options.source = rtc.TrackSource.SOURCE_MICROPHONE - await self.room.local_participant.publish_track(self._audio_track, options) - - await self._callbacks.on_connected() - - # Check if there are already participants in the room - participants = self.get_participants() - if participants and not self._other_participant_has_joined: - self._other_participant_has_joined = True - await self._callbacks.on_first_participant_joined(participants[0]) - except Exception as e: - logger.error(f"Error connecting to {self._room_name}: {e}") - raise - - async def disconnect(self): - """Disconnect from the LiveKit room.""" - # Decrement leave counter when leaving. - self._disconnect_counter -= 1 - - if not self._connected or self._disconnect_counter > 0: - return - - logger.info(f"Disconnecting from {self._room_name}") - await self.room.disconnect() - self._connected = False - logger.info(f"Disconnected from {self._room_name}") - await self._callbacks.on_disconnected() - - async def send_data(self, data: bytes, participant_id: Optional[str] = None): - """Send data to participants in the room. - - Args: - data: The data bytes to send. - participant_id: Optional specific participant to send to. - """ - if not self._connected: - return - - try: - if participant_id: - await self.room.local_participant.publish_data( - data, reliable=True, destination_identities=[participant_id] - ) - else: - await self.room.local_participant.publish_data(data, reliable=True) - except Exception as e: - logger.error(f"Error sending data: {e}") - - async def publish_audio(self, audio_frame: rtc.AudioFrame): - """Publish an audio frame to the room. - - Args: - audio_frame: The LiveKit audio frame to publish. - """ - if not self._connected or not self._audio_source: - return - - try: - await self._audio_source.capture_frame(audio_frame) - except Exception as e: - logger.error(f"Error publishing audio: {e}") - - def get_participants(self) -> List[str]: - """Get list of participant IDs in the room. - - Returns: - List of participant IDs. - """ - return [p.sid for p in self.room.remote_participants.values()] - - async def get_participant_metadata(self, participant_id: str) -> dict: - """Get metadata for a specific participant. - - Args: - participant_id: ID of the participant to get metadata for. - - Returns: - Dictionary containing participant metadata. - """ - participant = self.room.remote_participants.get(participant_id) - if participant: - return { - "id": participant.sid, - "name": participant.name, - "metadata": participant.metadata, - "is_speaking": participant.is_speaking, - } - return {} - - async def set_participant_metadata(self, metadata: str): - """Set metadata for the local participant. - - Args: - metadata: Metadata string to set. - """ - await self.room.local_participant.set_metadata(metadata) - - async def mute_participant(self, participant_id: str): - """Mute a specific participant's audio tracks. - - Args: - participant_id: ID of the participant to mute. - """ - participant = self.room.remote_participants.get(participant_id) - if participant: - for track in participant.tracks.values(): - if track.kind == "audio": - await track.set_enabled(False) - - async def unmute_participant(self, participant_id: str): - """Unmute a specific participant's audio tracks. - - Args: - participant_id: ID of the participant to unmute. - """ - participant = self.room.remote_participants.get(participant_id) - if participant: - for track in participant.tracks.values(): - if track.kind == "audio": - await track.set_enabled(True) - - # Wrapper methods for event handlers - def _on_participant_connected_wrapper(self, participant: rtc.RemoteParticipant): - """Wrapper for participant connected events.""" - self._task_manager.create_task( - self._async_on_participant_connected(participant), - f"{self}::_async_on_participant_connected", - ) - - def _on_participant_disconnected_wrapper(self, participant: rtc.RemoteParticipant): - """Wrapper for participant disconnected events.""" - self._task_manager.create_task( - self._async_on_participant_disconnected(participant), - f"{self}::_async_on_participant_disconnected", - ) - - def _on_track_subscribed_wrapper( - self, - track: rtc.Track, - publication: rtc.RemoteTrackPublication, - participant: rtc.RemoteParticipant, - ): - """Wrapper for track subscribed events.""" - self._task_manager.create_task( - self._async_on_track_subscribed(track, publication, participant), - f"{self}::_async_on_track_subscribed", - ) - - def _on_track_unsubscribed_wrapper( - self, - track: rtc.Track, - publication: rtc.RemoteTrackPublication, - participant: rtc.RemoteParticipant, - ): - """Wrapper for track unsubscribed events.""" - self._task_manager.create_task( - self._async_on_track_unsubscribed(track, publication, participant), - f"{self}::_async_on_track_unsubscribed", - ) - - def _on_data_received_wrapper(self, data: rtc.DataPacket): - """Wrapper for data received events.""" - self._task_manager.create_task( - self._async_on_data_received(data), - f"{self}::_async_on_data_received", - ) - - def _on_connected_wrapper(self): - """Wrapper for connected events.""" - self._task_manager.create_task(self._async_on_connected(), f"{self}::_async_on_connected") - - def _on_disconnected_wrapper(self): - """Wrapper for disconnected events.""" - self._task_manager.create_task( - self._async_on_disconnected(), f"{self}::_async_on_disconnected" - ) - - # Async methods for event handling - async def _async_on_participant_connected(self, participant: rtc.RemoteParticipant): - """Handle participant connected events.""" - logger.info(f"Participant connected: {participant.identity}") - await self._callbacks.on_participant_connected(participant.sid) - if not self._other_participant_has_joined: - self._other_participant_has_joined = True - await self._callbacks.on_first_participant_joined(participant.sid) - - async def _async_on_participant_disconnected(self, participant: rtc.RemoteParticipant): - """Handle participant disconnected events.""" - logger.info(f"Participant disconnected: {participant.identity}") - await self._callbacks.on_participant_disconnected(participant.sid) - if len(self.get_participants()) == 0: - self._other_participant_has_joined = False - - async def _async_on_track_subscribed( - self, - track: rtc.Track, - publication: rtc.RemoteTrackPublication, - participant: rtc.RemoteParticipant, - ): - """Handle track subscribed events.""" - if track.kind == rtc.TrackKind.KIND_AUDIO: - logger.info(f"Audio track subscribed: {track.sid} from participant {participant.sid}") - self._audio_tracks[participant.sid] = track - audio_stream = rtc.AudioStream(track) - self._task_manager.create_task( - self._process_audio_stream(audio_stream, participant.sid), - f"{self}::_process_audio_stream", - ) - await self._callbacks.on_audio_track_subscribed(participant.sid) - - async def _async_on_track_unsubscribed( - self, - track: rtc.Track, - publication: rtc.RemoteTrackPublication, - participant: rtc.RemoteParticipant, - ): - """Handle track unsubscribed events.""" - logger.info(f"Track unsubscribed: {publication.sid} from {participant.identity}") - if track.kind == rtc.TrackKind.KIND_AUDIO: - await self._callbacks.on_audio_track_unsubscribed(participant.sid) - - async def _async_on_data_received(self, data: rtc.DataPacket): - """Handle data received events.""" - await self._callbacks.on_data_received(data.data, data.participant.sid) - - async def _async_on_connected(self): - """Handle connected events.""" - await self._callbacks.on_connected() - - async def _async_on_disconnected(self, reason=None): - """Handle disconnected events.""" - self._connected = False - logger.info(f"Disconnected from {self._room_name}. Reason: {reason}") - await self._callbacks.on_disconnected() - - async def _process_audio_stream(self, audio_stream: rtc.AudioStream, participant_id: str): - """Process incoming audio stream from a participant.""" - logger.info(f"Started processing audio stream for participant {participant_id}") - async for event in audio_stream: - if isinstance(event, rtc.AudioFrameEvent): - await self._audio_queue.put((event, participant_id)) - else: - logger.warning(f"Received unexpected event type: {type(event)}") - - async def get_next_audio_frame(self): - """Get the next audio frame from the queue.""" - while True: - frame, participant_id = await self._audio_queue.get() - yield frame, participant_id - - def __str__(self): - """String representation of the LiveKit transport client.""" - return f"{self._transport_name}::LiveKitTransportClient" - - -class LiveKitInputTransport(BaseInputTransport): - """Handles incoming media streams and events from LiveKit rooms. - - Processes incoming audio streams from room participants and forwards them - as Pipecat frames, including audio resampling and VAD integration. - """ - - def __init__( - self, - transport: BaseTransport, - client: LiveKitTransportClient, - params: LiveKitParams, - **kwargs, - ): - """Initialize the LiveKit input transport. - - Args: - transport: The parent transport instance. - client: LiveKitTransportClient instance. - params: Configuration parameters. - **kwargs: Additional arguments passed to parent class. - """ - super().__init__(params, **kwargs) - self._transport = transport - self._client = client - - self._audio_in_task = None - self._vad_analyzer: Optional[VADAnalyzer] = params.vad_analyzer - self._resampler = create_stream_resampler() - - # Whether we have seen a StartFrame already. - self._initialized = False - - @property - def vad_analyzer(self) -> Optional[VADAnalyzer]: - """Get the Voice Activity Detection analyzer. - - Returns: - The VAD analyzer instance if configured. - """ - return self._vad_analyzer - - async def start(self, frame: StartFrame): - """Start the input transport and connect to LiveKit room. - - Args: - frame: The start frame containing initialization parameters. - """ - await super().start(frame) - - if self._initialized: - return - - self._initialized = True - - await self._client.start(frame) - await self._client.connect() - if not self._audio_in_task and self._params.audio_in_enabled: - self._audio_in_task = self.create_task(self._audio_in_task_handler()) - await self.set_transport_ready(frame) - logger.info("LiveKitInputTransport started") - - async def stop(self, frame: EndFrame): - """Stop the input transport and disconnect from LiveKit room. - - Args: - frame: The end frame signaling transport shutdown. - """ - await super().stop(frame) - await self._client.disconnect() - if self._audio_in_task: - await self.cancel_task(self._audio_in_task) - logger.info("LiveKitInputTransport stopped") - - async def cancel(self, frame: CancelFrame): - """Cancel the input transport and disconnect from LiveKit room. - - Args: - frame: The cancel frame signaling immediate cancellation. - """ - await super().cancel(frame) - await self._client.disconnect() - if self._audio_in_task and self._params.audio_in_enabled: - await self.cancel_task(self._audio_in_task) - - async def setup(self, setup: FrameProcessorSetup): - """Setup the input transport with shared client setup. - - Args: - setup: The frame processor setup configuration. - """ - await super().setup(setup) - await self._client.setup(setup) - - async def cleanup(self): - """Cleanup input transport and shared resources.""" - await super().cleanup() - await self._transport.cleanup() - - async def push_app_message(self, message: Any, sender: str): - """Push an application message as an urgent transport frame. - - Args: - message: The message data to send. - sender: ID of the message sender. - """ - frame = LiveKitTransportMessageUrgentFrame(message=message, participant_id=sender) - await self.push_frame(frame) - - async def _audio_in_task_handler(self): - """Handle incoming audio frames from participants.""" - logger.info("Audio input task started") - audio_iterator = self._client.get_next_audio_frame() - async for audio_data in audio_iterator: - if audio_data: - audio_frame_event, participant_id = audio_data - pipecat_audio_frame = await self._convert_livekit_audio_to_pipecat( - audio_frame_event - ) - - # Skip frames with no audio data - if len(pipecat_audio_frame.audio) == 0: - continue - - input_audio_frame = UserAudioRawFrame( - user_id=participant_id, - audio=pipecat_audio_frame.audio, - sample_rate=pipecat_audio_frame.sample_rate, - num_channels=pipecat_audio_frame.num_channels, - ) - await self.push_audio_frame(input_audio_frame) - - async def _convert_livekit_audio_to_pipecat( - self, audio_frame_event: rtc.AudioFrameEvent - ) -> AudioRawFrame: - """Convert LiveKit audio frame to Pipecat audio frame.""" - audio_frame = audio_frame_event.frame - - audio_data = await self._resampler.resample( - audio_frame.data.tobytes(), audio_frame.sample_rate, self.sample_rate - ) - - return AudioRawFrame( - audio=audio_data, - sample_rate=self.sample_rate, - num_channels=audio_frame.num_channels, - ) - - -class LiveKitOutputTransport(BaseOutputTransport): - """Handles outgoing media streams and events to LiveKit rooms. - - Manages sending audio frames and data messages to LiveKit room participants, - including audio format conversion for LiveKit compatibility. - """ - - def __init__( - self, - transport: BaseTransport, - client: LiveKitTransportClient, - params: LiveKitParams, - **kwargs, - ): - """Initialize the LiveKit output transport. - - Args: - transport: The parent transport instance. - client: LiveKitTransportClient instance. - params: Configuration parameters. - **kwargs: Additional arguments passed to parent class. - """ - super().__init__(params, **kwargs) - self._transport = transport - self._client = client - - # Whether we have seen a StartFrame already. - self._initialized = False - - async def start(self, frame: StartFrame): - """Start the output transport and connect to LiveKit room. - - Args: - frame: The start frame containing initialization parameters. - """ - await super().start(frame) - - if self._initialized: - return - - self._initialized = True - - await self._client.start(frame) - await self._client.connect() - await self.set_transport_ready(frame) - logger.info("LiveKitOutputTransport started") - - async def stop(self, frame: EndFrame): - """Stop the output transport and disconnect from LiveKit room. - - Args: - frame: The end frame signaling transport shutdown. - """ - await super().stop(frame) - await self._client.disconnect() - logger.info("LiveKitOutputTransport stopped") - - async def cancel(self, frame: CancelFrame): - """Cancel the output transport and disconnect from LiveKit room. - - Args: - frame: The cancel frame signaling immediate cancellation. - """ - await super().cancel(frame) - await self._client.disconnect() - - async def setup(self, setup: FrameProcessorSetup): - """Setup the output transport with shared client setup. - - Args: - setup: The frame processor setup configuration. - """ - await super().setup(setup) - await self._client.setup(setup) - - async def cleanup(self): - """Cleanup output transport and shared resources.""" - await super().cleanup() - await self._transport.cleanup() - - async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame): - """Send a transport message to participants. - - Args: - frame: The transport message frame to send. - """ - if isinstance(frame, (LiveKitTransportMessageFrame, LiveKitTransportMessageUrgentFrame)): - await self._client.send_data(frame.message.encode(), frame.participant_id) - else: - await self._client.send_data(frame.message.encode()) - - async def write_audio_frame(self, frame: OutputAudioRawFrame): - """Write an audio frame to the LiveKit room. - - Args: - frame: The audio frame to write. - """ - livekit_audio = self._convert_pipecat_audio_to_livekit(frame.audio) - await self._client.publish_audio(livekit_audio) - - def _convert_pipecat_audio_to_livekit(self, pipecat_audio: bytes) -> rtc.AudioFrame: - """Convert Pipecat audio data to LiveKit audio frame.""" - bytes_per_sample = 2 # Assuming 16-bit audio - total_samples = len(pipecat_audio) // bytes_per_sample - samples_per_channel = total_samples // self._params.audio_out_channels - - return rtc.AudioFrame( - data=pipecat_audio, - sample_rate=self.sample_rate, - num_channels=self._params.audio_out_channels, - samples_per_channel=samples_per_channel, - ) - - -class LiveKitTransport(BaseTransport): - """Transport implementation for LiveKit real-time communication. - - Provides comprehensive LiveKit integration including audio streaming, data - messaging, participant management, and room event handling for conversational - AI applications. - """ - - def __init__( - self, - url: str, - token: str, - room_name: str, - params: Optional[LiveKitParams] = None, - input_name: Optional[str] = None, - output_name: Optional[str] = None, - ): - """Initialize the LiveKit transport. - - Args: - url: LiveKit server URL to connect to. - token: Authentication token for the room. - room_name: Name of the LiveKit room to join. - params: Configuration parameters for the transport. - input_name: Optional name for the input transport. - output_name: Optional name for the output transport. - """ - super().__init__(input_name=input_name, output_name=output_name) - - callbacks = LiveKitCallbacks( - on_connected=self._on_connected, - on_disconnected=self._on_disconnected, - on_participant_connected=self._on_participant_connected, - on_participant_disconnected=self._on_participant_disconnected, - on_audio_track_subscribed=self._on_audio_track_subscribed, - on_audio_track_unsubscribed=self._on_audio_track_unsubscribed, - on_data_received=self._on_data_received, - on_first_participant_joined=self._on_first_participant_joined, - ) - self._params = params or LiveKitParams() - - self._client = LiveKitTransportClient( - url, token, room_name, self._params, callbacks, self.name - ) - self._input: Optional[LiveKitInputTransport] = None - self._output: Optional[LiveKitOutputTransport] = None - - self._register_event_handler("on_connected") - self._register_event_handler("on_disconnected") - self._register_event_handler("on_participant_connected") - self._register_event_handler("on_participant_disconnected") - self._register_event_handler("on_audio_track_subscribed") - self._register_event_handler("on_audio_track_unsubscribed") - self._register_event_handler("on_data_received") - self._register_event_handler("on_first_participant_joined") - self._register_event_handler("on_participant_left") - self._register_event_handler("on_call_state_updated") - - def input(self) -> LiveKitInputTransport: - """Get the input transport for receiving media and events. - - Returns: - The LiveKit input transport instance. - """ - if not self._input: - self._input = LiveKitInputTransport( - self, self._client, self._params, name=self._input_name - ) - return self._input - - def output(self) -> LiveKitOutputTransport: - """Get the output transport for sending media and events. - - Returns: - The LiveKit output transport instance. - """ - if not self._output: - self._output = LiveKitOutputTransport( - self, self._client, self._params, name=self._output_name - ) - return self._output - - @property - def participant_id(self) -> str: - """Get the participant ID for this transport. - - Returns: - The participant ID assigned by LiveKit. - """ - return self._client.participant_id - - async def send_audio(self, frame: OutputAudioRawFrame): - """Send an audio frame to the LiveKit room. - - Args: - frame: The audio frame to send. - """ - if self._output: - await self._output.queue_frame(frame, FrameDirection.DOWNSTREAM) - - def get_participants(self) -> List[str]: - """Get list of participant IDs in the room. - - Returns: - List of participant IDs. - """ - return self._client.get_participants() - - async def get_participant_metadata(self, participant_id: str) -> dict: - """Get metadata for a specific participant. - - Args: - participant_id: ID of the participant to get metadata for. - - Returns: - Dictionary containing participant metadata. - """ - return await self._client.get_participant_metadata(participant_id) - - async def set_metadata(self, metadata: str): - """Set metadata for the local participant. - - Args: - metadata: Metadata string to set. - """ - await self._client.set_participant_metadata(metadata) - - async def mute_participant(self, participant_id: str): - """Mute a specific participant's audio tracks. - - Args: - participant_id: ID of the participant to mute. - """ - await self._client.mute_participant(participant_id) - - async def unmute_participant(self, participant_id: str): - """Unmute a specific participant's audio tracks. - - Args: - participant_id: ID of the participant to unmute. - """ - await self._client.unmute_participant(participant_id) - - async def _on_connected(self): - """Handle room connected events.""" - await self._call_event_handler("on_connected") - - async def _on_disconnected(self): - """Handle room disconnected events.""" - await self._call_event_handler("on_disconnected") - - async def _on_participant_connected(self, participant_id: str): - """Handle participant connected events.""" - await self._call_event_handler("on_participant_connected", participant_id) - - async def _on_participant_disconnected(self, participant_id: str): - """Handle participant disconnected events.""" - await self._call_event_handler("on_participant_disconnected", participant_id) - await self._call_event_handler("on_participant_left", participant_id, "disconnected") - - async def _on_audio_track_subscribed(self, participant_id: str): - """Handle audio track subscribed events.""" - await self._call_event_handler("on_audio_track_subscribed", participant_id) - participant = self._client.room.remote_participants.get(participant_id) - if participant: - for publication in participant.audio_tracks.values(): - self._client._on_track_subscribed_wrapper( - publication.track, publication, participant - ) - - async def _on_audio_track_unsubscribed(self, participant_id: str): - """Handle audio track unsubscribed events.""" - await self._call_event_handler("on_audio_track_unsubscribed", participant_id) - - async def _on_data_received(self, data: bytes, participant_id: str): - """Handle data received events.""" - if self._input: - await self._input.push_app_message(data.decode(), participant_id) - await self._call_event_handler("on_data_received", data, participant_id) - - async def send_message(self, message: str, participant_id: Optional[str] = None): - """Send a message to participants in the room. - - Args: - message: The message string to send. - participant_id: Optional specific participant to send to. - """ - if self._output: - frame = LiveKitTransportMessageFrame(message=message, participant_id=participant_id) - await self._output.send_message(frame) - - async def send_message_urgent(self, message: str, participant_id: Optional[str] = None): - """Send an urgent message to participants in the room. - - Args: - message: The urgent message string to send. - participant_id: Optional specific participant to send to. - """ - if self._output: - frame = LiveKitTransportMessageUrgentFrame( - message=message, participant_id=participant_id - ) - await self._output.send_message(frame) - - async def on_room_event(self, event): - """Handle room events. - - Args: - event: The room event to handle. - """ - # Handle room events - pass - - async def on_participant_event(self, event): - """Handle participant events. - - Args: - event: The participant event to handle. - """ - # Handle participant events - pass - - async def on_track_event(self, event): - """Handle track events. - - Args: - event: The track event to handle. - """ - # Handle track events - pass - - async def _on_call_state_updated(self, state: str): - """Handle call state update events.""" - await self._call_event_handler("on_call_state_updated", self, state) - - async def _on_first_participant_joined(self, participant_id: str): - """Handle first participant joined events.""" - await self._call_event_handler("on_first_participant_joined", participant_id) +import warnings + +from pipecat.transports.livekit.transport import * + +with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "Module `pipecat.transports.services.livekit` is deprecated, " + "use `pipecat.transports.livekit.transport` instead.", + DeprecationWarning, + stacklevel=2, + ) diff --git a/src/pipecat/transports/services/tavus.py b/src/pipecat/transports/services/tavus.py index e4e57717a..c37a01a2e 100644 --- a/src/pipecat/transports/services/tavus.py +++ b/src/pipecat/transports/services/tavus.py @@ -11,760 +11,15 @@ AI applications with avatars. It manages conversation sessions and provides real audio/video streaming capabilities through the Tavus API. """ -import os -from functools import partial -from typing import Any, Awaitable, Callable, Mapping, Optional - -import aiohttp -from daily.daily import AudioData -from loguru import logger -from pydantic import BaseModel - -from pipecat.frames.frames import ( - CancelFrame, - EndFrame, - Frame, - InputAudioRawFrame, - OutputAudioRawFrame, - StartFrame, - StartInterruptionFrame, - TransportMessageFrame, - TransportMessageUrgentFrame, -) -from pipecat.processors.frame_processor import FrameDirection, FrameProcessor, FrameProcessorSetup -from pipecat.transports.base_input import BaseInputTransport -from pipecat.transports.base_output import BaseOutputTransport -from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.services.daily import ( - DailyCallbacks, - DailyParams, - DailyTransportClient, -) - - -class TavusApi: - """Helper class for interacting with the Tavus API (v2). - - Provides methods for creating and managing conversations with Tavus avatars, - including conversation lifecycle management and persona information retrieval. - """ - - BASE_URL = "https://tavusapi.com/v2" - MOCK_CONVERSATION_ID = "dev-conversation" - MOCK_PERSONA_NAME = "TestTavusTransport" - - def __init__(self, api_key: str, session: aiohttp.ClientSession): - """Initialize the TavusApi client. - - Args: - api_key: Tavus API key for authentication. - session: An aiohttp session for making HTTP requests. - """ - self._api_key = api_key - self._session = session - self._headers = {"Content-Type": "application/json", "x-api-key": self._api_key} - # Only for development - self._dev_room_url = os.getenv("TAVUS_SAMPLE_ROOM_URL") - - async def create_conversation(self, replica_id: str, persona_id: str) -> dict: - """Create a new conversation with the specified replica and persona. - - Args: - replica_id: ID of the replica to use in the conversation. - persona_id: ID of the persona to use in the conversation. - - Returns: - Dictionary containing conversation_id and conversation_url. - """ - if self._dev_room_url: - return { - "conversation_id": self.MOCK_CONVERSATION_ID, - "conversation_url": self._dev_room_url, - } - - logger.debug(f"Creating Tavus conversation: replica={replica_id}, persona={persona_id}") - url = f"{self.BASE_URL}/conversations" - payload = { - "replica_id": replica_id, - "persona_id": persona_id, - } - async with self._session.post(url, headers=self._headers, json=payload) as r: - r.raise_for_status() - response = await r.json() - logger.debug(f"Created Tavus conversation: {response}") - return response - - async def end_conversation(self, conversation_id: str): - """End an existing conversation. - - Args: - conversation_id: ID of the conversation to end. - """ - if conversation_id is None or conversation_id == self.MOCK_CONVERSATION_ID: - return - - url = f"{self.BASE_URL}/conversations/{conversation_id}/end" - async with self._session.post(url, headers=self._headers) as r: - r.raise_for_status() - logger.debug(f"Ended Tavus conversation {conversation_id}") - - async def get_persona_name(self, persona_id: str) -> str: - """Get the name of a persona by ID. - - Args: - persona_id: ID of the persona to retrieve. - - Returns: - The name of the persona. - """ - if self._dev_room_url is not None: - return self.MOCK_PERSONA_NAME - - url = f"{self.BASE_URL}/personas/{persona_id}" - async with self._session.get(url, headers=self._headers) as r: - r.raise_for_status() - response = await r.json() - logger.debug(f"Fetched Tavus persona: {response}") - return response["persona_name"] - - -class TavusCallbacks(BaseModel): - """Callback handlers for Tavus events. - - Parameters: - on_participant_joined: Called when a participant joins the conversation. - on_participant_left: Called when a participant leaves the conversation. - """ - - on_participant_joined: Callable[[Mapping[str, Any]], Awaitable[None]] - on_participant_left: Callable[[Mapping[str, Any], str], Awaitable[None]] - - -class TavusParams(DailyParams): - """Configuration parameters for the Tavus transport. - - Parameters: - audio_in_enabled: Whether to enable audio input from participants. - audio_out_enabled: Whether to enable audio output to participants. - microphone_out_enabled: Whether to enable microphone output track. - """ - - audio_in_enabled: bool = True - audio_out_enabled: bool = True - microphone_out_enabled: bool = False - - -class TavusTransportClient: - """Transport client that integrates Pipecat with the Tavus platform. - - A transport client that integrates a Pipecat Bot with the Tavus platform by managing - conversation sessions using the Tavus API. - - This client uses `TavusApi` to interact with the Tavus backend services. When a conversation - is started via `TavusApi`, Tavus provides a `roomURL` that can be used to connect the Pipecat Bot - into the same virtual room where the TavusBot is operating. - """ - - def __init__( - self, - *, - bot_name: str, - params: TavusParams = TavusParams(), - callbacks: TavusCallbacks, - api_key: str, - replica_id: str, - persona_id: str = "pipecat-stream", - session: aiohttp.ClientSession, - ) -> None: - """Initialize the Tavus transport client. - - Args: - bot_name: The name of the Pipecat bot instance. - params: Optional parameters for Tavus operation. - callbacks: Callback handlers for Tavus-related events. - api_key: API key for authenticating with Tavus API. - replica_id: ID of the replica to use in the Tavus conversation. - persona_id: ID of the Tavus persona. Defaults to "pipecat-stream", - which signals Tavus to use the TTS voice of the Pipecat bot - instead of a Tavus persona voice. - session: The aiohttp session for making async HTTP requests. - """ - self._bot_name = bot_name - self._api = TavusApi(api_key, session) - self._replica_id = replica_id - self._persona_id = persona_id - self._conversation_id: Optional[str] = None - self._client: Optional[DailyTransportClient] = None - self._callbacks = callbacks - self._params = params - - async def _initialize(self) -> str: - """Initialize the conversation and return the room URL.""" - response = await self._api.create_conversation(self._replica_id, self._persona_id) - self._conversation_id = response["conversation_id"] - return response["conversation_url"] - - async def setup(self, setup: FrameProcessorSetup): - """Setup the client and initialize the conversation. - - Args: - setup: The frame processor setup configuration. - """ - if self._conversation_id is not None: - logger.debug(f"Conversation ID already defined: {self._conversation_id}") - return - try: - room_url = await self._initialize() - daily_callbacks = DailyCallbacks( - on_active_speaker_changed=partial( - self._on_handle_callback, "on_active_speaker_changed" - ), - on_joined=self._on_joined, - on_left=self._on_left, - on_error=partial(self._on_handle_callback, "on_error"), - on_app_message=partial(self._on_handle_callback, "on_app_message"), - on_call_state_updated=partial(self._on_handle_callback, "on_call_state_updated"), - on_client_connected=partial(self._on_handle_callback, "on_client_connected"), - on_client_disconnected=partial(self._on_handle_callback, "on_client_disconnected"), - on_dialin_connected=partial(self._on_handle_callback, "on_dialin_connected"), - on_dialin_ready=partial(self._on_handle_callback, "on_dialin_ready"), - on_dialin_stopped=partial(self._on_handle_callback, "on_dialin_stopped"), - on_dialin_error=partial(self._on_handle_callback, "on_dialin_error"), - on_dialin_warning=partial(self._on_handle_callback, "on_dialin_warning"), - on_dialout_answered=partial(self._on_handle_callback, "on_dialout_answered"), - on_dialout_connected=partial(self._on_handle_callback, "on_dialout_connected"), - on_dialout_stopped=partial(self._on_handle_callback, "on_dialout_stopped"), - on_dialout_error=partial(self._on_handle_callback, "on_dialout_error"), - on_dialout_warning=partial(self._on_handle_callback, "on_dialout_warning"), - on_participant_joined=self._callbacks.on_participant_joined, - on_participant_left=self._callbacks.on_participant_left, - on_participant_updated=partial(self._on_handle_callback, "on_participant_updated"), - on_transcription_message=partial( - self._on_handle_callback, "on_transcription_message" - ), - on_recording_started=partial(self._on_handle_callback, "on_recording_started"), - on_recording_stopped=partial(self._on_handle_callback, "on_recording_stopped"), - on_recording_error=partial(self._on_handle_callback, "on_recording_error"), - on_transcription_stopped=partial( - self._on_handle_callback, "on_transcription_stopped" - ), - on_transcription_error=partial(self._on_handle_callback, "on_transcription_error"), - ) - self._client = DailyTransportClient( - room_url, None, "Pipecat", self._params, daily_callbacks, self._bot_name - ) - await self._client.setup(setup) - except Exception as e: - logger.error(f"Failed to setup TavusTransportClient: {e}") - await self._api.end_conversation(self._conversation_id) - self._conversation_id = None - - async def cleanup(self): - """Cleanup client resources.""" - try: - await self._client.cleanup() - except Exception as e: - logger.exception(f"Exception during cleanup: {e}") - - async def _on_joined(self, data): - """Handle joined event.""" - logger.debug("TavusTransportClient joined!") - - async def _on_left(self): - """Handle left event.""" - logger.debug("TavusTransportClient left!") - - async def _on_handle_callback(self, event_name, *args, **kwargs): - """Handle generic callback events.""" - logger.trace(f"[Callback] {event_name} called with args={args}, kwargs={kwargs}") - - async def get_persona_name(self) -> str: - """Get the persona name from the API. - - Returns: - The name of the current persona. - """ - return await self._api.get_persona_name(self._persona_id) - - async def start(self, frame: StartFrame): - """Start the client and join the room. - - Args: - frame: The start frame containing initialization parameters. - """ - logger.debug("TavusTransportClient start invoked!") - await self._client.start(frame) - await self._client.join() - - async def stop(self): - """Stop the client and end the conversation.""" - await self._client.leave() - await self._api.end_conversation(self._conversation_id) - self._conversation_id = None - - async def capture_participant_video( - self, - participant_id: str, - callback: Callable, - framerate: int = 30, - video_source: str = "camera", - color_format: str = "RGB", - ): - """Capture video from a participant. - - Args: - participant_id: ID of the participant to capture video from. - callback: Callback function to handle video frames. - framerate: Desired framerate for video capture. - video_source: Video source to capture from. - color_format: Color format for video frames. - """ - await self._client.capture_participant_video( - participant_id, callback, framerate, video_source, color_format - ) - - async def capture_participant_audio( - self, - participant_id: str, - callback: Callable, - audio_source: str = "microphone", - sample_rate: int = 16000, - callback_interval_ms: int = 20, - ): - """Capture audio from a participant. - - Args: - participant_id: ID of the participant to capture audio from. - callback: Callback function to handle audio data. - audio_source: Audio source to capture from. - sample_rate: Desired sample rate for audio capture. - callback_interval_ms: Interval between audio callbacks in milliseconds. - """ - await self._client.capture_participant_audio( - participant_id, callback, audio_source, sample_rate, callback_interval_ms - ) - - async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame): - """Send a message to participants. - - Args: - frame: The message frame to send. - """ - await self._client.send_message(frame) - - @property - def out_sample_rate(self) -> int: - """Get the output sample rate. - - Returns: - The output sample rate in Hz. - """ - return self._client.out_sample_rate - - @property - def in_sample_rate(self) -> int: - """Get the input sample rate. - - Returns: - The input sample rate in Hz. - """ - return self._client.in_sample_rate - - async def send_interrupt_message(self) -> None: - """Send an interrupt message to the conversation.""" - transport_frame = TransportMessageUrgentFrame( - message={ - "message_type": "conversation", - "event_type": "conversation.interrupt", - "conversation_id": self._conversation_id, - } - ) - await self.send_message(transport_frame) - - async def update_subscriptions(self, participant_settings=None, profile_settings=None): - """Update subscription settings for participants. - - Args: - participant_settings: Per-participant subscription settings. - profile_settings: Global subscription profile settings. - """ - if not self._client: - return - - await self._client.update_subscriptions( - participant_settings=participant_settings, profile_settings=profile_settings - ) - - async def write_audio_frame(self, frame: OutputAudioRawFrame): - """Write an audio frame to the transport. - - Args: - frame: The audio frame to write. - """ - if not self._client: - return - await self._client.write_audio_frame(frame) - - async def register_audio_destination(self, destination: str): - """Register an audio destination for output. - - Args: - destination: The destination identifier to register. - """ - if not self._client: - return - - await self._client.register_audio_destination(destination) - - -class TavusInputTransport(BaseInputTransport): - """Input transport for receiving audio and events from Tavus conversations. - - Handles incoming audio streams from participants and manages audio capture - from the Daily room connected to the Tavus conversation. - """ - - def __init__( - self, - client: TavusTransportClient, - params: TransportParams, - **kwargs, - ): - """Initialize the Tavus input transport. - - Args: - client: The Tavus transport client instance. - params: Transport configuration parameters. - **kwargs: Additional arguments passed to parent class. - """ - super().__init__(params, **kwargs) - self._client = client - self._params = params - # Whether we have seen a StartFrame already. - self._initialized = False - - async def setup(self, setup: FrameProcessorSetup): - """Setup the input transport. - - Args: - setup: The frame processor setup configuration. - """ - await super().setup(setup) - await self._client.setup(setup) - - async def cleanup(self): - """Cleanup input transport resources.""" - await super().cleanup() - await self._client.cleanup() - - async def start(self, frame: StartFrame): - """Start the input transport. - - Args: - frame: The start frame containing initialization parameters. - """ - await super().start(frame) - - if self._initialized: - return - - self._initialized = True - - await self._client.start(frame) - await self.set_transport_ready(frame) - - async def stop(self, frame: EndFrame): - """Stop the input transport. - - Args: - frame: The end frame signaling transport shutdown. - """ - await super().stop(frame) - await self._client.stop() - - async def cancel(self, frame: CancelFrame): - """Cancel the input transport. - - Args: - frame: The cancel frame signaling immediate cancellation. - """ - await super().cancel(frame) - await self._client.stop() - - async def start_capturing_audio(self, participant): - """Start capturing audio from a participant. - - Args: - participant: The participant to capture audio from. - """ - if self._params.audio_in_enabled: - logger.info( - f"TavusTransportClient start capturing audio for participant {participant['id']}" - ) - await self._client.capture_participant_audio( - participant_id=participant["id"], - callback=self._on_participant_audio_data, - sample_rate=self._client.in_sample_rate, - ) - - async def _on_participant_audio_data( - self, participant_id: str, audio: AudioData, audio_source: str - ): - """Handle received participant audio data.""" - frame = InputAudioRawFrame( - audio=audio.audio_frames, - sample_rate=audio.audio_frames, - num_channels=audio.num_channels, - ) - frame.transport_source = audio_source - await self.push_audio_frame(frame) - - -class TavusOutputTransport(BaseOutputTransport): - """Output transport for sending audio and events to Tavus conversations. - - Handles outgoing audio streams to participants and manages the custom - audio track expected by the Tavus platform. - """ - - def __init__( - self, - client: TavusTransportClient, - params: TransportParams, - **kwargs, - ): - """Initialize the Tavus output transport. - - Args: - client: The Tavus transport client instance. - params: Transport configuration parameters. - **kwargs: Additional arguments passed to parent class. - """ - super().__init__(params, **kwargs) - self._client = client - self._params = params - - # Whether we have seen a StartFrame already. - self._initialized = False - # This is the custom track destination expected by Tavus - self._transport_destination: Optional[str] = "stream" - - async def setup(self, setup: FrameProcessorSetup): - """Setup the output transport. - - Args: - setup: The frame processor setup configuration. - """ - await super().setup(setup) - await self._client.setup(setup) - - async def cleanup(self): - """Cleanup output transport resources.""" - await super().cleanup() - await self._client.cleanup() - - async def start(self, frame: StartFrame): - """Start the output transport. - - Args: - frame: The start frame containing initialization parameters. - """ - await super().start(frame) - - if self._initialized: - return - - self._initialized = True - - await self._client.start(frame) - - if self._transport_destination: - await self._client.register_audio_destination(self._transport_destination) - - await self.set_transport_ready(frame) - - async def stop(self, frame: EndFrame): - """Stop the output transport. - - Args: - frame: The end frame signaling transport shutdown. - """ - await super().stop(frame) - await self._client.stop() - - async def cancel(self, frame: CancelFrame): - """Cancel the output transport. - - Args: - frame: The cancel frame signaling immediate cancellation. - """ - await super().cancel(frame) - await self._client.stop() - - async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame): - """Send a message to participants. - - Args: - frame: The message frame to send. - """ - logger.info(f"TavusOutputTransport sending message {frame}") - await self._client.send_message(frame) - - async def process_frame(self, frame: Frame, direction: FrameDirection): - """Process frames and handle interruptions. - - Args: - frame: The frame to process. - direction: The direction of frame flow in the pipeline. - """ - await super().process_frame(frame, direction) - if isinstance(frame, StartInterruptionFrame): - await self._handle_interruptions() - - async def _handle_interruptions(self): - """Handle interruption events by sending interrupt message.""" - await self._client.send_interrupt_message() - - async def write_audio_frame(self, frame: OutputAudioRawFrame): - """Write an audio frame to the Tavus transport. - - Args: - frame: The audio frame to write. - """ - # This is the custom track destination expected by Tavus - frame.transport_destination = self._transport_destination - await self._client.write_audio_frame(frame) - - async def register_audio_destination(self, destination: str): - """Register an audio destination. - - Args: - destination: The destination identifier to register. - """ - await self._client.register_audio_destination(destination) - - -class TavusTransport(BaseTransport): - """Transport implementation for Tavus video calls. - - When used, the Pipecat bot joins the same virtual room as the Tavus Avatar and the user. - This is achieved by using `TavusTransportClient`, which initiates the conversation via - `TavusApi` and obtains a room URL that all participants connect to. - """ - - def __init__( - self, - bot_name: str, - session: aiohttp.ClientSession, - api_key: str, - replica_id: str, - persona_id: str = "pipecat-stream", - params: TavusParams = TavusParams(), - input_name: Optional[str] = None, - output_name: Optional[str] = None, - ): - """Initialize the Tavus transport. - - Args: - bot_name: The name of the Pipecat bot. - session: aiohttp session used for async HTTP requests. - api_key: Tavus API key for authentication. - replica_id: ID of the replica model used for voice generation. - persona_id: ID of the Tavus persona. Defaults to "pipecat-stream" - to use the Pipecat TTS voice. - params: Optional Tavus-specific configuration parameters. - input_name: Optional name for the input transport. - output_name: Optional name for the output transport. - """ - super().__init__(input_name=input_name, output_name=output_name) - self._params = params - - callbacks = TavusCallbacks( - on_participant_joined=self._on_participant_joined, - on_participant_left=self._on_participant_left, - ) - self._client = TavusTransportClient( - bot_name="Pipecat", - callbacks=callbacks, - api_key=api_key, - replica_id=replica_id, - persona_id=persona_id, - session=session, - params=params, - ) - self._input: Optional[TavusInputTransport] = None - self._output: Optional[TavusOutputTransport] = None - self._tavus_participant_id = None - - # Register supported handlers. The user will only be able to register - # these handlers. - self._register_event_handler("on_client_connected") - self._register_event_handler("on_client_disconnected") - - async def _on_participant_left(self, participant, reason): - """Handle participant left events.""" - persona_name = await self._client.get_persona_name() - if participant.get("info", {}).get("userName", "") != persona_name: - await self._on_client_disconnected(participant) - - async def _on_participant_joined(self, participant): - """Handle participant joined events.""" - # get persona, look up persona_name, set this as the bot name to ignore - persona_name = await self._client.get_persona_name() - - # Ignore the Tavus replica's microphone - if participant.get("info", {}).get("userName", "") == persona_name: - self._tavus_participant_id = participant["id"] - else: - await self._on_client_connected(participant) - if self._tavus_participant_id: - logger.debug(f"Ignoring {self._tavus_participant_id}'s microphone") - await self.update_subscriptions( - participant_settings={ - self._tavus_participant_id: { - "media": {"microphone": "unsubscribed"}, - } - } - ) - if self._input: - await self._input.start_capturing_audio(participant) - - async def update_subscriptions(self, participant_settings=None, profile_settings=None): - """Update subscription settings for participants. - - Args: - participant_settings: Per-participant subscription settings. - profile_settings: Global subscription profile settings. - """ - await self._client.update_subscriptions( - participant_settings=participant_settings, - profile_settings=profile_settings, - ) - - def input(self) -> FrameProcessor: - """Get the input transport for receiving media and events. - - Returns: - The Tavus input transport instance. - """ - if not self._input: - self._input = TavusInputTransport(client=self._client, params=self._params) - return self._input - - def output(self) -> FrameProcessor: - """Get the output transport for sending media and events. - - Returns: - The Tavus output transport instance. - """ - if not self._output: - self._output = TavusOutputTransport(client=self._client, params=self._params) - return self._output - - async def _on_client_connected(self, participant: Any): - """Handle client connected events.""" - await self._call_event_handler("on_client_connected", participant) - - async def _on_client_disconnected(self, participant: Any): - """Handle client disconnected events.""" - await self._call_event_handler("on_client_disconnected", participant) +import warnings + +from pipecat.transports.tavus.transport import * + +with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "Module `pipecat.transports.services.tavus` is deprecated, " + "use `pipecat.transports.tavus.transport` instead.", + DeprecationWarning, + stacklevel=2, + ) diff --git a/src/pipecat/transports/smallwebrtc/__init__.py b/src/pipecat/transports/smallwebrtc/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/pipecat/transports/smallwebrtc/connection.py b/src/pipecat/transports/smallwebrtc/connection.py new file mode 100644 index 000000000..420656f85 --- /dev/null +++ b/src/pipecat/transports/smallwebrtc/connection.py @@ -0,0 +1,612 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Small WebRTC connection implementation for Pipecat. + +This module provides a WebRTC connection implementation using aiortc, +with support for audio/video tracks, data channels, and signaling +for real-time communication applications. +""" + +import asyncio +import json +import time +from typing import Any, List, Literal, Optional, Union + +from loguru import logger +from pydantic import BaseModel, TypeAdapter + +from pipecat.utils.base_object import BaseObject + +try: + from aiortc import ( + MediaStreamTrack, + RTCConfiguration, + RTCIceServer, + RTCPeerConnection, + RTCSessionDescription, + ) + from aiortc.rtcrtpreceiver import RemoteStreamTrack + from av.frame import Frame +except ModuleNotFoundError as e: + logger.error(f"Exception: {e}") + logger.error("In order to use the SmallWebRTC, you need to `pip install pipecat-ai[webrtc]`.") + raise Exception(f"Missing module: {e}") + +SIGNALLING_TYPE = "signalling" +AUDIO_TRANSCEIVER_INDEX = 0 +VIDEO_TRANSCEIVER_INDEX = 1 +SCREEN_VIDEO_TRANSCEIVER_INDEX = 2 + + +class TrackStatusMessage(BaseModel): + """Message for updating track enabled/disabled status. + + Parameters: + type: Message type identifier. + receiver_index: Index of the track receiver to update. + enabled: Whether the track should be enabled or disabled. + """ + + type: Literal["trackStatus"] + receiver_index: int + enabled: bool + + +class RenegotiateMessage(BaseModel): + """Message requesting WebRTC renegotiation. + + Parameters: + type: Message type identifier for renegotiation requests. + """ + + type: Literal["renegotiate"] = "renegotiate" + + +class PeerLeftMessage(BaseModel): + """Message indicating a peer has left the connection. + + Parameters: + type: Message type identifier for peer departure. + """ + + type: Literal["peerLeft"] = "peerLeft" + + +class SignallingMessage: + """Union types for signaling message handling. + + Parameters: + Inbound: Types of messages that can be received from peers. + outbound: Types of messages that can be sent to peers. + """ + + Inbound = Union[TrackStatusMessage] # in case we need to add new messages in the future + outbound = Union[RenegotiateMessage] + + +class SmallWebRTCTrack: + """Wrapper for WebRTC media tracks with enabled/disabled state management. + + Provides additional functionality on top of aiortc MediaStreamTrack including + enable/disable control and frame discarding for audio and video streams. + """ + + def __init__(self, track: MediaStreamTrack): + """Initialize the WebRTC track wrapper. + + Args: + track: The underlying MediaStreamTrack to wrap. + index: The index of the track in the transceiver (0 for mic, 1 for cam, 2 for screen) + """ + self._track = track + self._enabled = True + + def set_enabled(self, enabled: bool) -> None: + """Enable or disable the track. + + Args: + enabled: Whether the track should be enabled for receiving frames. + """ + self._enabled = enabled + + def is_enabled(self) -> bool: + """Check if the track is currently enabled. + + Returns: + True if the track is enabled for receiving frames. + """ + return self._enabled + + async def discard_old_frames(self): + """Discard old frames from the track queue to reduce latency.""" + remote_track = self._track + if isinstance(remote_track, RemoteStreamTrack): + if not hasattr(remote_track, "_queue") or not isinstance( + remote_track._queue, asyncio.Queue + ): + print("Warning: _queue does not exist or has changed in aiortc.") + return + logger.debug("Discarding old frames") + while not remote_track._queue.empty(): + remote_track._queue.get_nowait() # Remove the oldest frame + remote_track._queue.task_done() + + async def recv(self) -> Optional[Frame]: + """Receive the next frame from the track. + + Returns: + The next frame, except for video tracks, where it returns the frame only if the track is enabled, otherwise, returns None. + """ + if not self._enabled and self._track.kind == "video": + return None + return await self._track.recv() + + def __getattr__(self, name): + """Forward attribute access to the underlying track. + + Args: + name: The attribute name to access. + + Returns: + The attribute value from the underlying track. + """ + # Forward other attribute/method calls to the underlying track + return getattr(self._track, name) + + +# Alias so we don't need to expose RTCIceServer +IceServer = RTCIceServer + + +class SmallWebRTCConnection(BaseObject): + """WebRTC connection implementation using aiortc. + + Provides WebRTC peer connection functionality including ICE server configuration, + track management, data channel communication, and connection state handling + for real-time audio/video communication. + """ + + def __init__(self, ice_servers: Optional[Union[List[str], List[IceServer]]] = None): + """Initialize the WebRTC connection. + + Args: + ice_servers: List of ICE servers as URLs or IceServer objects. + + Raises: + TypeError: If ice_servers contains mixed types or unsupported types. + """ + super().__init__() + if not ice_servers: + self.ice_servers: List[IceServer] = [] + elif all(isinstance(s, IceServer) for s in ice_servers): + self.ice_servers = ice_servers + elif all(isinstance(s, str) for s in ice_servers): + self.ice_servers = [IceServer(urls=s) for s in ice_servers] + else: + raise TypeError("ice_servers must be either List[str] or List[RTCIceServer]") + self._connect_invoked = False + self._track_map = {} + self._track_getters = { + AUDIO_TRANSCEIVER_INDEX: self.audio_input_track, + VIDEO_TRANSCEIVER_INDEX: self.video_input_track, + SCREEN_VIDEO_TRANSCEIVER_INDEX: self.screen_video_input_track, + } + + self._initialize() + + # Register supported handlers. The user will only be able to register + # these handlers. + self._register_event_handler("app-message") + self._register_event_handler("track-started") + self._register_event_handler("track-ended") + # connection states + self._register_event_handler("connecting") + self._register_event_handler("connected") + self._register_event_handler("disconnected") + self._register_event_handler("closed") + self._register_event_handler("failed") + self._register_event_handler("new") + + @property + def pc(self) -> RTCPeerConnection: + """Get the underlying RTCPeerConnection. + + Returns: + The aiortc RTCPeerConnection instance. + """ + return self._pc + + @property + def pc_id(self) -> str: + """Get the peer connection identifier. + + Returns: + The unique identifier for this peer connection. + """ + return self._pc_id + + def _initialize(self): + """Initialize the peer connection and associated components.""" + logger.debug("Initializing new peer connection") + rtc_config = RTCConfiguration(iceServers=self.ice_servers) + + self._answer: Optional[RTCSessionDescription] = None + self._pc = RTCPeerConnection(rtc_config) + self._pc_id = self.name + self._setup_listeners() + self._data_channel = None + self._renegotiation_in_progress = False + self._last_received_time = None + self._message_queue = [] + self._pending_app_messages = [] + + def _setup_listeners(self): + """Set up event listeners for the peer connection.""" + + @self._pc.on("datachannel") + def on_datachannel(channel): + self._data_channel = channel + + # Flush queued messages once the data channel is open + @channel.on("open") + async def on_open(): + logger.debug("Data channel is open, flushing queued messages") + while self._message_queue: + message = self._message_queue.pop(0) + self._data_channel.send(message) + + @channel.on("message") + async def on_message(message): + try: + # aiortc does not provide any way so we can be aware when we are disconnected, + # so we are using this keep alive message as a way to implement that + if isinstance(message, str) and message.startswith("ping"): + self._last_received_time = time.time() + else: + json_message = json.loads(message) + if json_message["type"] == SIGNALLING_TYPE and json_message.get("message"): + self._handle_signalling_message(json_message["message"]) + else: + if self.is_connected(): + await self._call_event_handler("app-message", json_message) + else: + logger.debug("Client not connected. Queuing app-message.") + self._pending_app_messages.append(json_message) + except Exception as e: + logger.exception(f"Error parsing JSON message {message}, {e}") + + # Despite the fact that aiortc provides this listener, they don't have a status for "disconnected" + # So, in case we loose connection, this event will not be triggered + @self._pc.on("connectionstatechange") + async def on_connectionstatechange(): + await self._handle_new_connection_state() + + # Despite the fact that aiortc provides this listener, they don't have a status for "disconnected" + # So, in case we loose connection, this event will not be triggered + @self._pc.on("iceconnectionstatechange") + async def on_iceconnectionstatechange(): + logger.debug( + f"ICE connection state is {self._pc.iceConnectionState}, connection is {self._pc.connectionState}" + ) + + @self._pc.on("icegatheringstatechange") + async def on_icegatheringstatechange(): + logger.debug(f"ICE gathering state is {self._pc.iceGatheringState}") + + @self._pc.on("track") + async def on_track(track): + logger.debug(f"Track {track.kind} received") + await self._call_event_handler("track-started", track) + + @track.on("ended") + async def on_ended(): + logger.debug(f"Track {track.kind} ended") + await self._call_event_handler("track-ended", track) + + async def _create_answer(self, sdp: str, type: str): + """Create an SDP answer for the given offer.""" + offer = RTCSessionDescription(sdp=sdp, type=type) + await self._pc.setRemoteDescription(offer) + + # For some reason, aiortc is not respecting the SDP for the transceivers to be sendrcv + # so we are basically forcing it to act this way + self.force_transceivers_to_send_recv() + + # this answer does not contain the ice candidates, which will be gathered later, after the setLocalDescription + logger.debug(f"Creating answer") + local_answer = await self._pc.createAnswer() + await self._pc.setLocalDescription(local_answer) + logger.debug(f"Setting the answer after the local description is created") + self._answer = self._pc.localDescription + + async def initialize(self, sdp: str, type: str): + """Initialize the connection with an SDP offer. + + Args: + sdp: The SDP offer string. + type: The SDP type (usually "offer"). + """ + await self._create_answer(sdp, type) + + async def connect(self): + """Connect the WebRTC peer connection and handle initial setup.""" + self._connect_invoked = True + # If we already connected, trigger again the connected event + if self.is_connected(): + await self._call_event_handler("connected") + logger.debug("Flushing pending app-messages") + for message in self._pending_app_messages: + await self._call_event_handler("app-message", message) + # We are renegotiating here, because likely we have loose the first video frames + # and aiortc does not handle that pretty well. + video_input_track = self.video_input_track() + if video_input_track: + await self.video_input_track().discard_old_frames() + screen_video_input_track = self.screen_video_input_track() + if screen_video_input_track: + await self.screen_video_input_track().discard_old_frames() + if video_input_track or screen_video_input_track: + # This prevents an issue where sometimes the WebRTC connection can be established + # before the bot is ready to receive video. When that happens, we can lose a couple + # of seconds of video before we received a key frame to finally start displaying it. + self.ask_to_renegotiate() + + async def renegotiate(self, sdp: str, type: str, restart_pc: bool = False): + """Renegotiate the WebRTC connection with new parameters. + + Args: + sdp: The new SDP offer string. + type: The SDP type (usually "offer"). + restart_pc: Whether to restart the peer connection entirely. + """ + logger.debug(f"Renegotiating {self._pc_id}") + + if restart_pc: + await self._call_event_handler("disconnected") + logger.debug("Closing old peer connection") + # removing the listeners to prevent the bot from closing + self._pc.remove_all_listeners() + await self._close() + # we are initializing a new peer connection in this case. + self._initialize() + + await self._create_answer(sdp, type) + + # Maybe we should refactor to receive a message from the client side when the renegotiation is completed. + # or look at the peer connection listeners + # but this is good enough for now for testing. + async def delayed_task(): + await asyncio.sleep(2) + self._renegotiation_in_progress = False + + asyncio.create_task(delayed_task()) + + def force_transceivers_to_send_recv(self): + """Force all transceivers to bidirectional send/receive mode.""" + transceivers = self._pc.getTransceivers() + # For now, we only support sendrecv for camera audio and video (the first two transceivers) + for i, transceiver in enumerate(transceivers): + if i < 2: # First two transceivers (camera audio and video) + transceiver.direction = "sendrecv" + else: + transceiver.direction = "recvonly" + # logger.debug( + # f"Transceiver: {transceiver}, Mid: {transceiver.mid}, Direction: {transceiver.direction}" + # ) + # logger.debug(f"Sender track: {transceiver.sender.track}") + + def replace_audio_track(self, track): + """Replace the audio track in the first transceiver. + + Args: + track: The new audio track to use for sending. + """ + logger.debug(f"Replacing audio track {track.kind}") + # Transceivers always appear in creation-order for both peers + # For now we are only considering that we are going to have 02 transceivers, + # one for audio and one for video + transceivers = self._pc.getTransceivers() + if len(transceivers) > 0 and transceivers[0].sender: + transceivers[0].sender.replaceTrack(track) + else: + logger.warning("Audio transceiver not found. Cannot replace audio track.") + + def replace_video_track(self, track): + """Replace the video track in the second transceiver. + + Args: + track: The new video track to use for sending. + """ + logger.debug(f"Replacing video track {track.kind}") + # Transceivers always appear in creation-order for both peers + # For now we are only considering that we are going to have 02 transceivers, + # one for audio and one for video + transceivers = self._pc.getTransceivers() + if len(transceivers) > 1 and transceivers[1].sender: + transceivers[1].sender.replaceTrack(track) + else: + logger.warning("Video transceiver not found. Cannot replace video track.") + + def replace_screen_video_track(self, track): + """Replace the screen video track in the second transceiver. + + Args: + track: The new screen video track to use for sending. + """ + logger.debug(f"Replacing screen video track {track.kind}") + # Transceivers always appear in creation-order for both peers + # For now we are only considering that we are going to have 02 transceivers, + # one for audio and one for video + transceivers = self._pc.getTransceivers() + if len(transceivers) > 2 and transceivers[2].sender: + transceivers[2].sender.replaceTrack(track) + else: + logger.warning("Screen video transceiver not found. Cannot replace screen video track.") + + async def disconnect(self): + """Disconnect from the WebRTC peer connection.""" + self.send_app_message({"type": SIGNALLING_TYPE, "message": PeerLeftMessage().model_dump()}) + await self._close() + + async def _close(self): + """Close the peer connection and cleanup resources.""" + if self._pc: + await self._pc.close() + self._message_queue.clear() + self._pending_app_messages.clear() + self._track_map = {} + + def get_answer(self): + """Get the SDP answer for the current connection. + + Returns: + Dictionary containing SDP answer, type, and peer connection ID, + or None if no answer is available. + """ + if not self._answer: + return None + + return { + "sdp": self._answer.sdp, + "type": self._answer.type, + "pc_id": self._pc_id, + } + + async def _handle_new_connection_state(self): + """Handle changes in the peer connection state.""" + state = self._pc.connectionState + if state == "connected" and not self._connect_invoked: + # We are going to wait until the pipeline is ready before triggering the event + return + logger.debug(f"Connection state changed to: {state}") + await self._call_event_handler(state) + if state == "failed": + logger.warning("Connection failed, closing peer connection.") + await self._close() + + # Despite the fact that aiortc provides this listener, they don't have a status for "disconnected" + # So, there is no advantage in looking at self._pc.connectionState + # That is why we are trying to keep our own state + def is_connected(self) -> bool: + """Check if the WebRTC connection is currently active. + + Returns: + True if the connection is active and receiving data. + """ + # If the small webrtc transport has never invoked to connect + # we are acting like if we are not connected + if not self._connect_invoked: + return False + + if self._last_received_time is None: + # if we have never received a message, it is probably because the client has not created a data channel + # so we are going to trust aiortc in this case + return self._pc.connectionState == "connected" + # Checks if the last received ping was within the last 3 seconds. + return (time.time() - self._last_received_time) < 3 + + def audio_input_track(self): + """Get the audio input track wrapper. + + Returns: + SmallWebRTCTrack wrapper for the audio track, or None if unavailable. + """ + if self._track_map.get(AUDIO_TRANSCEIVER_INDEX): + return self._track_map[AUDIO_TRANSCEIVER_INDEX] + + # Transceivers always appear in creation-order for both peers + # For support 3 receivers in the following order: + # audio, video, screenVideo + transceivers = self._pc.getTransceivers() + if len(transceivers) == 0 or not transceivers[AUDIO_TRANSCEIVER_INDEX].receiver: + logger.warning("No audio transceiver is available") + return None + + track = transceivers[AUDIO_TRANSCEIVER_INDEX].receiver.track + audio_track = SmallWebRTCTrack(track) if track else None + self._track_map[AUDIO_TRANSCEIVER_INDEX] = audio_track + return audio_track + + def video_input_track(self): + """Get the video input track wrapper. + + Returns: + SmallWebRTCTrack wrapper for the video track, or None if unavailable. + """ + if self._track_map.get(VIDEO_TRANSCEIVER_INDEX): + return self._track_map[VIDEO_TRANSCEIVER_INDEX] + + # Transceivers always appear in creation-order for both peers + # For support 3 receivers in the following order: + # audio, video, screenVideo + transceivers = self._pc.getTransceivers() + if len(transceivers) <= 1 or not transceivers[VIDEO_TRANSCEIVER_INDEX].receiver: + logger.warning("No video transceiver is available") + return None + + track = transceivers[VIDEO_TRANSCEIVER_INDEX].receiver.track + video_track = SmallWebRTCTrack(track) if track else None + self._track_map[VIDEO_TRANSCEIVER_INDEX] = video_track + return video_track + + def screen_video_input_track(self): + """Get the screen video input track wrapper. + + Returns: + SmallWebRTCTrack wrapper for the screen video track, or None if unavailable. + """ + if self._track_map.get(SCREEN_VIDEO_TRANSCEIVER_INDEX): + return self._track_map[SCREEN_VIDEO_TRANSCEIVER_INDEX] + + # Transceivers always appear in creation-order for both peers + # For support 3 receivers in the following order: + # audio, video, screenVideo + transceivers = self._pc.getTransceivers() + if len(transceivers) <= 2 or not transceivers[SCREEN_VIDEO_TRANSCEIVER_INDEX].receiver: + logger.warning("No screen video transceiver is available") + return None + + track = transceivers[SCREEN_VIDEO_TRANSCEIVER_INDEX].receiver.track + video_track = SmallWebRTCTrack(track) if track else None + self._track_map[SCREEN_VIDEO_TRANSCEIVER_INDEX] = video_track + return video_track + + def send_app_message(self, message: Any): + """Send an application message through the data channel. + + Args: + message: The message to send (will be JSON serialized). + """ + json_message = json.dumps(message) + if self._data_channel and self._data_channel.readyState == "open": + self._data_channel.send(json_message) + else: + logger.debug("Data channel not ready, queuing message") + self._message_queue.append(json_message) + + def ask_to_renegotiate(self): + """Request renegotiation of the WebRTC connection.""" + if self._renegotiation_in_progress: + return + + self._renegotiation_in_progress = True + self.send_app_message( + {"type": SIGNALLING_TYPE, "message": RenegotiateMessage().model_dump()} + ) + + def _handle_signalling_message(self, message): + """Handle incoming signaling messages.""" + logger.debug(f"Signalling message received: {message}") + inbound_adapter = TypeAdapter(SignallingMessage.Inbound) + signalling_message = inbound_adapter.validate_python(message) + match signalling_message: + case TrackStatusMessage(): + track = ( + self._track_getters.get(signalling_message.receiver_index) or (lambda: None) + )() + if track: + track.set_enabled(signalling_message.enabled) diff --git a/src/pipecat/transports/smallwebrtc/transport.py b/src/pipecat/transports/smallwebrtc/transport.py new file mode 100644 index 000000000..fd7a87f8c --- /dev/null +++ b/src/pipecat/transports/smallwebrtc/transport.py @@ -0,0 +1,935 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Small WebRTC transport implementation for Pipecat. + +This module provides a WebRTC transport implementation using aiortc for +real-time audio and video communication. It supports bidirectional media +streaming, application messaging, and client connection management. +""" + +import asyncio +import fractions +import time +from collections import deque +from typing import Any, Awaitable, Callable, Optional + +import numpy as np +from loguru import logger +from pydantic import BaseModel + +from pipecat.frames.frames import ( + CancelFrame, + EndFrame, + Frame, + InputAudioRawFrame, + OutputAudioRawFrame, + OutputImageRawFrame, + SpriteFrame, + StartFrame, + TransportMessageFrame, + TransportMessageUrgentFrame, + UserImageRawFrame, + UserImageRequestFrame, +) +from pipecat.processors.frame_processor import FrameDirection +from pipecat.transports.base_input import BaseInputTransport +from pipecat.transports.base_output import BaseOutputTransport +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.smallwebrtc.connection import SmallWebRTCConnection + +try: + import cv2 + from aiortc import VideoStreamTrack + from aiortc.mediastreams import AudioStreamTrack, MediaStreamError + from av import AudioFrame, AudioResampler, VideoFrame +except ModuleNotFoundError as e: + logger.error(f"Exception: {e}") + logger.error("In order to use the SmallWebRTC, you need to `pip install pipecat-ai[webrtc]`.") + raise Exception(f"Missing module: {e}") + +CAM_VIDEO_SOURCE = "camera" +SCREEN_VIDEO_SOURCE = "screenVideo" +MIC_AUDIO_SOURCE = "microphone" + + +class SmallWebRTCCallbacks(BaseModel): + """Callback handlers for SmallWebRTC events. + + Parameters: + on_app_message: Called when an application message is received. + on_client_connected: Called when a client establishes connection. + on_client_disconnected: Called when a client disconnects. + """ + + on_app_message: Callable[[Any], Awaitable[None]] + on_client_connected: Callable[[SmallWebRTCConnection], Awaitable[None]] + on_client_disconnected: Callable[[SmallWebRTCConnection], Awaitable[None]] + + +class RawAudioTrack(AudioStreamTrack): + """Custom audio stream track for WebRTC output. + + Handles audio frame generation and timing for WebRTC transmission, + supporting queued audio data with proper synchronization. + """ + + def __init__(self, sample_rate): + """Initialize the raw audio track. + + Args: + sample_rate: The audio sample rate in Hz. + """ + super().__init__() + self._sample_rate = sample_rate + self._samples_per_10ms = sample_rate * 10 // 1000 + self._bytes_per_10ms = self._samples_per_10ms * 2 # 16-bit (2 bytes per sample) + self._timestamp = 0 + self._start = time.time() + # Queue of (bytes, future), broken into 10ms sub chunks as needed + self._chunk_queue = deque() + + def add_audio_bytes(self, audio_bytes: bytes): + """Add audio bytes to the buffer for transmission. + + Args: + audio_bytes: Raw audio data to queue for transmission. + + Returns: + A Future that completes when the data is processed. + + Raises: + ValueError: If audio bytes are not a multiple of 10ms size. + """ + if len(audio_bytes) % self._bytes_per_10ms != 0: + raise ValueError("Audio bytes must be a multiple of 10ms size.") + future = asyncio.get_running_loop().create_future() + + # Break input into 10ms chunks + for i in range(0, len(audio_bytes), self._bytes_per_10ms): + chunk = audio_bytes[i : i + self._bytes_per_10ms] + # Only the last chunk carries the future to be resolved once fully consumed + fut = future if i + self._bytes_per_10ms >= len(audio_bytes) else None + self._chunk_queue.append((chunk, fut)) + + return future + + async def recv(self): + """Return the next audio frame for WebRTC transmission. + + Returns: + An AudioFrame containing the next audio data or silence. + """ + # Compute required wait time for synchronization + if self._timestamp > 0: + wait = self._start + (self._timestamp / self._sample_rate) - time.time() + if wait > 0: + await asyncio.sleep(wait) + + if self._chunk_queue: + chunk, future = self._chunk_queue.popleft() + if future and not future.done(): + future.set_result(True) + else: + chunk = bytes(self._bytes_per_10ms) # silence + + # Convert the byte data to an ndarray of int16 samples + samples = np.frombuffer(chunk, dtype=np.int16) + + # Create AudioFrame + frame = AudioFrame.from_ndarray(samples[None, :], layout="mono") + frame.sample_rate = self._sample_rate + frame.pts = self._timestamp + frame.time_base = fractions.Fraction(1, self._sample_rate) + self._timestamp += self._samples_per_10ms + return frame + + +class RawVideoTrack(VideoStreamTrack): + """Custom video stream track for WebRTC output. + + Handles video frame queuing and conversion for WebRTC transmission. + """ + + def __init__(self, width, height): + """Initialize the raw video track. + + Args: + width: Video frame width in pixels. + height: Video frame height in pixels. + """ + super().__init__() + self._width = width + self._height = height + self._video_buffer = asyncio.Queue() + + def add_video_frame(self, frame): + """Add a video frame to the transmission buffer. + + Args: + frame: The video frame to queue for transmission. + """ + self._video_buffer.put_nowait(frame) + + async def recv(self): + """Return the next video frame for WebRTC transmission. + + Returns: + A VideoFrame ready for WebRTC transmission. + """ + raw_frame = await self._video_buffer.get() + + # Convert bytes to NumPy array + frame_data = np.frombuffer(raw_frame.image, dtype=np.uint8).reshape( + (self._height, self._width, 3) + ) + + frame = VideoFrame.from_ndarray(frame_data, format="rgb24") + + # Assign timestamp + frame.pts, frame.time_base = await self.next_timestamp() + + return frame + + +class SmallWebRTCClient: + """WebRTC client implementation for handling connections and media streams. + + Manages WebRTC peer connections, audio/video streaming, and application + messaging through the SmallWebRTCConnection interface. + """ + + FORMAT_CONVERSIONS = { + "yuv420p": cv2.COLOR_YUV2RGB_I420, + "yuvj420p": cv2.COLOR_YUV2RGB_I420, # OpenCV treats both the same + "nv12": cv2.COLOR_YUV2RGB_NV12, + "gray": cv2.COLOR_GRAY2RGB, + } + + def __init__(self, webrtc_connection: SmallWebRTCConnection, callbacks: SmallWebRTCCallbacks): + """Initialize the WebRTC client. + + Args: + webrtc_connection: The underlying WebRTC connection handler. + callbacks: Event callbacks for connection and message handling. + """ + self._webrtc_connection = webrtc_connection + self._closing = False + self._callbacks = callbacks + + self._audio_output_track = None + self._video_output_track = None + self._audio_input_track: Optional[AudioStreamTrack] = None + self._video_input_track: Optional[VideoStreamTrack] = None + self._screen_video_track: Optional[VideoStreamTrack] = None + + self._params = None + self._audio_in_channels = None + self._in_sample_rate = None + self._out_sample_rate = None + self._leave_counter = 0 + + # We are always resampling it for 16000 if the sample_rate that we receive is bigger than that. + # otherwise we face issues with Silero VAD + self._pipecat_resampler = AudioResampler("s16", "mono", 16000) + + @self._webrtc_connection.event_handler("connected") + async def on_connected(connection: SmallWebRTCConnection): + logger.debug("Peer connection established.") + await self._handle_client_connected() + + @self._webrtc_connection.event_handler("disconnected") + async def on_disconnected(connection: SmallWebRTCConnection): + logger.debug("Peer connection lost.") + await self._handle_peer_disconnected() + + @self._webrtc_connection.event_handler("closed") + async def on_closed(connection: SmallWebRTCConnection): + logger.debug("Client connection closed.") + await self._handle_client_closed() + + @self._webrtc_connection.event_handler("app-message") + async def on_app_message(connection: SmallWebRTCConnection, message: Any): + await self._handle_app_message(message) + + def _convert_frame(self, frame_array: np.ndarray, format_name: str) -> np.ndarray: + """Convert a video frame to RGB format based on the input format. + + Args: + frame_array: The input frame as a NumPy array. + format_name: The format of the input frame. + + Returns: + The converted RGB frame as a NumPy array. + + Raises: + ValueError: If the format is unsupported. + """ + if format_name.startswith("rgb"): # Already in RGB, no conversion needed + return frame_array + + conversion_code = SmallWebRTCClient.FORMAT_CONVERSIONS.get(format_name) + + if conversion_code is None: + raise ValueError(f"Unsupported format: {format_name}") + + return cv2.cvtColor(frame_array, conversion_code) + + async def read_video_frame(self, video_source: str): + """Read video frames from the WebRTC connection. + + Reads a video frame from the given MediaStreamTrack, converts it to RGB, + and creates an InputImageRawFrame. + + Args: + video_source: Video source to capture ("camera" or "screenVideo"). + + Yields: + UserImageRawFrame objects containing video data from the peer. + """ + while True: + video_track = ( + self._video_input_track + if video_source == CAM_VIDEO_SOURCE + else self._screen_video_track + ) + if video_track is None: + await asyncio.sleep(0.01) + continue + + try: + frame = await asyncio.wait_for(video_track.recv(), timeout=2.0) + except asyncio.TimeoutError: + if self._webrtc_connection.is_connected(): + logger.warning("Timeout: No video frame received within the specified time.") + # self._webrtc_connection.ask_to_renegotiate() + frame = None + except MediaStreamError: + logger.warning("Received an unexpected media stream error while reading the audio.") + frame = None + + if frame is None or not isinstance(frame, VideoFrame): + # If no valid frame, sleep for a bit + await asyncio.sleep(0.01) + continue + + format_name = frame.format.name + # Convert frame to NumPy array in its native format + frame_array = frame.to_ndarray(format=format_name) + frame_rgb = self._convert_frame(frame_array, format_name) + + image_frame = UserImageRawFrame( + user_id=self._webrtc_connection.pc_id, + image=frame_rgb.tobytes(), + size=(frame.width, frame.height), + format="RGB", + ) + image_frame.transport_source = video_source + + yield image_frame + + async def read_audio_frame(self): + """Read audio frames from the WebRTC connection. + + Reads 20ms of audio from the given MediaStreamTrack and creates an InputAudioRawFrame. + + Yields: + InputAudioRawFrame objects containing audio data from the peer. + """ + while True: + if self._audio_input_track is None: + await asyncio.sleep(0.01) + continue + + try: + frame = await asyncio.wait_for(self._audio_input_track.recv(), timeout=2.0) + except asyncio.TimeoutError: + if self._webrtc_connection.is_connected(): + logger.warning("Timeout: No audio frame received within the specified time.") + frame = None + except MediaStreamError: + logger.warning("Received an unexpected media stream error while reading the audio.") + frame = None + + if frame is None or not isinstance(frame, AudioFrame): + # If we don't read any audio let's sleep for a little bit (i.e. busy wait). + await asyncio.sleep(0.01) + continue + + if frame.sample_rate > self._in_sample_rate: + resampled_frames = self._pipecat_resampler.resample(frame) + for resampled_frame in resampled_frames: + # 16-bit PCM bytes + pcm_bytes = resampled_frame.to_ndarray().astype(np.int16).tobytes() + audio_frame = InputAudioRawFrame( + audio=pcm_bytes, + sample_rate=resampled_frame.sample_rate, + num_channels=self._audio_in_channels, + ) + yield audio_frame + else: + # 16-bit PCM bytes + pcm_bytes = frame.to_ndarray().astype(np.int16).tobytes() + audio_frame = InputAudioRawFrame( + audio=pcm_bytes, + sample_rate=frame.sample_rate, + num_channels=self._audio_in_channels, + ) + yield audio_frame + + async def write_audio_frame(self, frame: OutputAudioRawFrame): + """Write an audio frame to the WebRTC connection. + + Args: + frame: The audio frame to transmit. + """ + if self._can_send() and self._audio_output_track: + await self._audio_output_track.add_audio_bytes(frame.audio) + + async def write_video_frame(self, frame: OutputImageRawFrame): + """Write a video frame to the WebRTC connection. + + Args: + frame: The video frame to transmit. + """ + if self._can_send() and self._video_output_track: + self._video_output_track.add_video_frame(frame) + + async def setup(self, _params: TransportParams, frame): + """Set up the client with transport parameters. + + Args: + _params: Transport configuration parameters. + frame: The initialization frame containing setup data. + """ + self._audio_in_channels = _params.audio_in_channels + self._in_sample_rate = _params.audio_in_sample_rate or frame.audio_in_sample_rate + self._out_sample_rate = _params.audio_out_sample_rate or frame.audio_out_sample_rate + self._params = _params + self._leave_counter += 1 + + async def connect(self): + """Establish the WebRTC connection.""" + if self._webrtc_connection.is_connected(): + # already initialized + return + + logger.info(f"Connecting to Small WebRTC") + await self._webrtc_connection.connect() + + async def disconnect(self): + """Disconnect from the WebRTC peer.""" + self._leave_counter -= 1 + if self._leave_counter > 0: + return + + if self.is_connected and not self.is_closing: + logger.info(f"Disconnecting to Small WebRTC") + self._closing = True + await self._webrtc_connection.disconnect() + await self._handle_peer_disconnected() + + async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame): + """Send an application message through the WebRTC connection. + + Args: + frame: The message frame to send. + """ + if self._can_send(): + self._webrtc_connection.send_app_message(frame.message) + + async def _handle_client_connected(self): + """Handle client connection establishment.""" + # There is nothing to do here yet, the pipeline is still not ready + if not self._params: + return + + self._audio_input_track = self._webrtc_connection.audio_input_track() + self._video_input_track = self._webrtc_connection.video_input_track() + self._screen_video_track = self._webrtc_connection.screen_video_input_track() + if self._params.audio_out_enabled: + self._audio_output_track = RawAudioTrack(sample_rate=self._out_sample_rate) + self._webrtc_connection.replace_audio_track(self._audio_output_track) + + if self._params.video_out_enabled: + self._video_output_track = RawVideoTrack( + width=self._params.video_out_width, height=self._params.video_out_height + ) + self._webrtc_connection.replace_video_track(self._video_output_track) + + await self._callbacks.on_client_connected(self._webrtc_connection) + + async def _handle_peer_disconnected(self): + """Handle peer disconnection cleanup.""" + self._audio_input_track = None + self._video_input_track = None + self._screen_video_track = None + self._audio_output_track = None + self._video_output_track = None + + async def _handle_client_closed(self): + """Handle client connection closure.""" + self._audio_input_track = None + self._video_input_track = None + self._screen_video_track = None + self._audio_output_track = None + self._video_output_track = None + await self._callbacks.on_client_disconnected(self._webrtc_connection) + + async def _handle_app_message(self, message: Any): + """Handle incoming application messages.""" + await self._callbacks.on_app_message(message) + + def _can_send(self): + """Check if the connection is ready for sending data.""" + return self.is_connected and not self.is_closing + + @property + def is_connected(self) -> bool: + """Check if the WebRTC connection is established. + + Returns: + True if connected to the peer. + """ + return self._webrtc_connection.is_connected() + + @property + def is_closing(self) -> bool: + """Check if the connection is in the process of closing. + + Returns: + True if the connection is closing. + """ + return self._closing + + +class SmallWebRTCInputTransport(BaseInputTransport): + """Input transport implementation for SmallWebRTC. + + Handles incoming audio and video streams from WebRTC peers, + including user image requests and application message handling. + """ + + def __init__( + self, + client: SmallWebRTCClient, + params: TransportParams, + **kwargs, + ): + """Initialize the WebRTC input transport. + + Args: + client: The WebRTC client instance. + params: Transport configuration parameters. + **kwargs: Additional arguments passed to parent class. + """ + super().__init__(params, **kwargs) + self._client = client + self._params = params + self._receive_audio_task = None + self._receive_video_task = None + self._receive_screen_video_task = None + self._image_requests = {} + + # Whether we have seen a StartFrame already. + self._initialized = False + + async def process_frame(self, frame: Frame, direction: FrameDirection): + """Process incoming frames including user image requests. + + Args: + frame: The frame to process. + direction: The direction of frame flow in the pipeline. + """ + await super().process_frame(frame, direction) + + if isinstance(frame, UserImageRequestFrame): + await self.request_participant_image(frame) + + async def start(self, frame: StartFrame): + """Start the input transport and establish WebRTC connection. + + Args: + frame: The start frame containing initialization parameters. + """ + await super().start(frame) + + if self._initialized: + return + + self._initialized = True + + await self._client.setup(self._params, frame) + await self._client.connect() + await self.set_transport_ready(frame) + if not self._receive_audio_task and self._params.audio_in_enabled: + self._receive_audio_task = self.create_task(self._receive_audio()) + if not self._receive_video_task and self._params.video_in_enabled: + self._receive_video_task = self.create_task(self._receive_video(CAM_VIDEO_SOURCE)) + + async def _stop_tasks(self): + """Stop all background tasks.""" + if self._receive_audio_task: + await self.cancel_task(self._receive_audio_task) + self._receive_audio_task = None + if self._receive_video_task: + await self.cancel_task(self._receive_video_task) + self._receive_video_task = None + + async def stop(self, frame: EndFrame): + """Stop the input transport and disconnect from WebRTC. + + Args: + frame: The end frame signaling transport shutdown. + """ + await super().stop(frame) + await self._stop_tasks() + await self._client.disconnect() + + async def cancel(self, frame: CancelFrame): + """Cancel the input transport and disconnect immediately. + + Args: + frame: The cancel frame signaling immediate cancellation. + """ + await super().cancel(frame) + await self._stop_tasks() + await self._client.disconnect() + + async def _receive_audio(self): + """Background task for receiving audio frames from WebRTC.""" + try: + audio_iterator = self._client.read_audio_frame() + async for audio_frame in audio_iterator: + if audio_frame: + await self.push_audio_frame(audio_frame) + + except Exception as e: + logger.error(f"{self} exception receiving data: {e.__class__.__name__} ({e})") + + async def _receive_video(self, video_source: str): + """Background task for receiving video frames from WebRTC. + + Args: + video_source: Video source to capture ("camera" or "screenVideo"). + """ + try: + video_iterator = self._client.read_video_frame(video_source) + async for video_frame in video_iterator: + if video_frame: + await self.push_video_frame(video_frame) + + # Check if there are any pending image requests and create UserImageRawFrame + if self._image_requests: + for req_id, request_frame in list(self._image_requests.items()): + if request_frame.video_source == video_source: + # Create UserImageRawFrame using the current video frame + image_frame = UserImageRawFrame( + user_id=request_frame.user_id, + request=request_frame, + image=video_frame.image, + size=video_frame.size, + format=video_frame.format, + ) + image_frame.transport_source = video_source + # Push the frame to the pipeline + await self.push_video_frame(image_frame) + # Remove from pending requests + del self._image_requests[req_id] + + except Exception as e: + logger.error(f"{self} exception receiving data: {e.__class__.__name__} ({e})") + + async def push_app_message(self, message: Any): + """Push an application message into the pipeline. + + Args: + message: The application message to process. + """ + logger.debug(f"Received app message inside SmallWebRTCInputTransport {message}") + frame = TransportMessageUrgentFrame(message=message) + await self.push_frame(frame) + + # Add this method similar to DailyInputTransport.request_participant_image + async def request_participant_image(self, frame: UserImageRequestFrame): + """Request an image frame from the participant's video stream. + + When a UserImageRequestFrame is received, this method will store the request + and the next video frame received will be converted to a UserImageRawFrame. + + Args: + frame: The user image request frame. + """ + logger.debug(f"Requesting image from participant: {frame.user_id}") + + # Store the request + request_id = f"{frame.function_name}:{frame.tool_call_id}" + self._image_requests[request_id] = frame + + # Default to camera if no source specified + if frame.video_source is None: + frame.video_source = CAM_VIDEO_SOURCE + # If we're not already receiving video, try to get a frame now + if ( + frame.video_source == CAM_VIDEO_SOURCE + and not self._receive_video_task + and self._params.video_in_enabled + ): + # Start video reception if it's not already running + self._receive_video_task = self.create_task(self._receive_video(CAM_VIDEO_SOURCE)) + elif ( + frame.video_source == SCREEN_VIDEO_SOURCE + and not self._receive_screen_video_task + and self._params.video_in_enabled + ): + # Start screen video reception if it's not already running + self._receive_screen_video_task = self.create_task( + self._receive_video(SCREEN_VIDEO_SOURCE) + ) + + async def capture_participant_media( + self, + source: str = CAM_VIDEO_SOURCE, + ): + """Capture media from a specific participant. + + Args: + source: Media source to capture from. ("camera", "microphone", or "screenVideo") + """ + # If we're not already receiving video, try to get a frame now + if ( + source == MIC_AUDIO_SOURCE + and not self._receive_audio_task + and self._params.audio_in_enabled + ): + # Start audio reception if it's not already running + self._receive_audio_task = self.create_task(self._receive_audio()) + elif ( + source == CAM_VIDEO_SOURCE + and not self._receive_video_task + and self._params.video_in_enabled + ): + # Start video reception if it's not already running + self._receive_video_task = self.create_task(self._receive_video(CAM_VIDEO_SOURCE)) + elif ( + source == SCREEN_VIDEO_SOURCE + and not self._receive_screen_video_task + and self._params.video_in_enabled + ): + # Start screen video reception if it's not already running + self._receive_screen_video_task = self.create_task( + self._receive_video(SCREEN_VIDEO_SOURCE) + ) + + +class SmallWebRTCOutputTransport(BaseOutputTransport): + """Output transport implementation for SmallWebRTC. + + Handles outgoing audio and video streams to WebRTC peers, + including transport message sending. + """ + + def __init__( + self, + client: SmallWebRTCClient, + params: TransportParams, + **kwargs, + ): + """Initialize the WebRTC output transport. + + Args: + client: The WebRTC client instance. + params: Transport configuration parameters. + **kwargs: Additional arguments passed to parent class. + """ + super().__init__(params, **kwargs) + self._client = client + self._params = params + + # Whether we have seen a StartFrame already. + self._initialized = False + + async def start(self, frame: StartFrame): + """Start the output transport and establish WebRTC connection. + + Args: + frame: The start frame containing initialization parameters. + """ + await super().start(frame) + + if self._initialized: + return + + self._initialized = True + + await self._client.setup(self._params, frame) + await self._client.connect() + await self.set_transport_ready(frame) + + async def stop(self, frame: EndFrame): + """Stop the output transport and disconnect from WebRTC. + + Args: + frame: The end frame signaling transport shutdown. + """ + await super().stop(frame) + await self._client.disconnect() + + async def cancel(self, frame: CancelFrame): + """Cancel the output transport and disconnect immediately. + + Args: + frame: The cancel frame signaling immediate cancellation. + """ + await super().cancel(frame) + await self._client.disconnect() + + async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame): + """Send a transport message through the WebRTC connection. + + Args: + frame: The transport message frame to send. + """ + await self._client.send_message(frame) + + async def write_audio_frame(self, frame: OutputAudioRawFrame): + """Write an audio frame to the WebRTC connection. + + Args: + frame: The output audio frame to transmit. + """ + await self._client.write_audio_frame(frame) + + async def write_video_frame(self, frame: OutputImageRawFrame): + """Write a video frame to the WebRTC connection. + + Args: + frame: The output video frame to transmit. + """ + await self._client.write_video_frame(frame) + + +class SmallWebRTCTransport(BaseTransport): + """WebRTC transport implementation for real-time communication. + + Provides bidirectional audio and video streaming over WebRTC connections + with support for application messaging and connection event handling. + """ + + def __init__( + self, + webrtc_connection: SmallWebRTCConnection, + params: TransportParams, + input_name: Optional[str] = None, + output_name: Optional[str] = None, + ): + """Initialize the WebRTC transport. + + Args: + webrtc_connection: The underlying WebRTC connection handler. + params: Transport configuration parameters. + input_name: Optional name for the input processor. + output_name: Optional name for the output processor. + """ + super().__init__(input_name=input_name, output_name=output_name) + self._params = params + + self._callbacks = SmallWebRTCCallbacks( + on_app_message=self._on_app_message, + on_client_connected=self._on_client_connected, + on_client_disconnected=self._on_client_disconnected, + ) + + self._client = SmallWebRTCClient(webrtc_connection, self._callbacks) + + self._input: Optional[SmallWebRTCInputTransport] = None + self._output: Optional[SmallWebRTCOutputTransport] = None + + # Register supported handlers. The user will only be able to register + # these handlers. + self._register_event_handler("on_app_message") + self._register_event_handler("on_client_connected") + self._register_event_handler("on_client_disconnected") + + def input(self) -> SmallWebRTCInputTransport: + """Get the input transport processor. + + Returns: + The input transport for handling incoming media streams. + """ + if not self._input: + self._input = SmallWebRTCInputTransport( + self._client, self._params, name=self._input_name + ) + return self._input + + def output(self) -> SmallWebRTCOutputTransport: + """Get the output transport processor. + + Returns: + The output transport for handling outgoing media streams. + """ + if not self._output: + self._output = SmallWebRTCOutputTransport( + self._client, self._params, name=self._input_name + ) + return self._output + + async def send_image(self, frame: OutputImageRawFrame | SpriteFrame): + """Send an image frame through the transport. + + Args: + frame: The image frame to send. + """ + if self._output: + await self._output.queue_frame(frame, FrameDirection.DOWNSTREAM) + + async def send_audio(self, frame: OutputAudioRawFrame): + """Send an audio frame through the transport. + + Args: + frame: The audio frame to send. + """ + if self._output: + await self._output.queue_frame(frame, FrameDirection.DOWNSTREAM) + + async def _on_app_message(self, message: Any): + """Handle incoming application messages.""" + if self._input: + await self._input.push_app_message(message) + await self._call_event_handler("on_app_message", message) + + async def _on_client_connected(self, webrtc_connection): + """Handle client connection events.""" + await self._call_event_handler("on_client_connected", webrtc_connection) + + async def _on_client_disconnected(self, webrtc_connection): + """Handle client disconnection events.""" + await self._call_event_handler("on_client_disconnected", webrtc_connection) + + async def capture_participant_video( + self, + video_source: str = CAM_VIDEO_SOURCE, + ): + """Capture video from a specific participant. + + Args: + video_source: Video source to capture from ("camera" or "screenVideo"). + """ + if self._input: + await self._input.capture_participant_media(source=video_source) + + async def capture_participant_audio( + self, + audio_source: str = MIC_AUDIO_SOURCE, + ): + """Capture audio from a specific participant. + + Args: + audio_source: Audio source to capture from. (currently, "microphone" is the only supported option) + """ + if self._input: + await self._input.capture_participant_media(source=audio_source) diff --git a/src/pipecat/transports/tavus/__init__.py b/src/pipecat/transports/tavus/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/pipecat/transports/tavus/transport.py b/src/pipecat/transports/tavus/transport.py new file mode 100644 index 000000000..aea30f72f --- /dev/null +++ b/src/pipecat/transports/tavus/transport.py @@ -0,0 +1,770 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Tavus transport implementation for Pipecat. + +This module provides integration with the Tavus platform for creating conversational +AI applications with avatars. It manages conversation sessions and provides real-time +audio/video streaming capabilities through the Tavus API. +""" + +import os +from functools import partial +from typing import Any, Awaitable, Callable, Mapping, Optional + +import aiohttp +from daily.daily import AudioData +from loguru import logger +from pydantic import BaseModel + +from pipecat.frames.frames import ( + CancelFrame, + EndFrame, + Frame, + InputAudioRawFrame, + OutputAudioRawFrame, + StartFrame, + StartInterruptionFrame, + TransportMessageFrame, + TransportMessageUrgentFrame, +) +from pipecat.processors.frame_processor import FrameDirection, FrameProcessor, FrameProcessorSetup +from pipecat.transports.base_input import BaseInputTransport +from pipecat.transports.base_output import BaseOutputTransport +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import ( + DailyCallbacks, + DailyParams, + DailyTransportClient, +) + + +class TavusApi: + """Helper class for interacting with the Tavus API (v2). + + Provides methods for creating and managing conversations with Tavus avatars, + including conversation lifecycle management and persona information retrieval. + """ + + BASE_URL = "https://tavusapi.com/v2" + MOCK_CONVERSATION_ID = "dev-conversation" + MOCK_PERSONA_NAME = "TestTavusTransport" + + def __init__(self, api_key: str, session: aiohttp.ClientSession): + """Initialize the TavusApi client. + + Args: + api_key: Tavus API key for authentication. + session: An aiohttp session for making HTTP requests. + """ + self._api_key = api_key + self._session = session + self._headers = {"Content-Type": "application/json", "x-api-key": self._api_key} + # Only for development + self._dev_room_url = os.getenv("TAVUS_SAMPLE_ROOM_URL") + + async def create_conversation(self, replica_id: str, persona_id: str) -> dict: + """Create a new conversation with the specified replica and persona. + + Args: + replica_id: ID of the replica to use in the conversation. + persona_id: ID of the persona to use in the conversation. + + Returns: + Dictionary containing conversation_id and conversation_url. + """ + if self._dev_room_url: + return { + "conversation_id": self.MOCK_CONVERSATION_ID, + "conversation_url": self._dev_room_url, + } + + logger.debug(f"Creating Tavus conversation: replica={replica_id}, persona={persona_id}") + url = f"{self.BASE_URL}/conversations" + payload = { + "replica_id": replica_id, + "persona_id": persona_id, + } + async with self._session.post(url, headers=self._headers, json=payload) as r: + r.raise_for_status() + response = await r.json() + logger.debug(f"Created Tavus conversation: {response}") + return response + + async def end_conversation(self, conversation_id: str): + """End an existing conversation. + + Args: + conversation_id: ID of the conversation to end. + """ + if conversation_id is None or conversation_id == self.MOCK_CONVERSATION_ID: + return + + url = f"{self.BASE_URL}/conversations/{conversation_id}/end" + async with self._session.post(url, headers=self._headers) as r: + r.raise_for_status() + logger.debug(f"Ended Tavus conversation {conversation_id}") + + async def get_persona_name(self, persona_id: str) -> str: + """Get the name of a persona by ID. + + Args: + persona_id: ID of the persona to retrieve. + + Returns: + The name of the persona. + """ + if self._dev_room_url is not None: + return self.MOCK_PERSONA_NAME + + url = f"{self.BASE_URL}/personas/{persona_id}" + async with self._session.get(url, headers=self._headers) as r: + r.raise_for_status() + response = await r.json() + logger.debug(f"Fetched Tavus persona: {response}") + return response["persona_name"] + + +class TavusCallbacks(BaseModel): + """Callback handlers for Tavus events. + + Parameters: + on_participant_joined: Called when a participant joins the conversation. + on_participant_left: Called when a participant leaves the conversation. + """ + + on_participant_joined: Callable[[Mapping[str, Any]], Awaitable[None]] + on_participant_left: Callable[[Mapping[str, Any], str], Awaitable[None]] + + +class TavusParams(DailyParams): + """Configuration parameters for the Tavus transport. + + Parameters: + audio_in_enabled: Whether to enable audio input from participants. + audio_out_enabled: Whether to enable audio output to participants. + microphone_out_enabled: Whether to enable microphone output track. + """ + + audio_in_enabled: bool = True + audio_out_enabled: bool = True + microphone_out_enabled: bool = False + + +class TavusTransportClient: + """Transport client that integrates Pipecat with the Tavus platform. + + A transport client that integrates a Pipecat Bot with the Tavus platform by managing + conversation sessions using the Tavus API. + + This client uses `TavusApi` to interact with the Tavus backend services. When a conversation + is started via `TavusApi`, Tavus provides a `roomURL` that can be used to connect the Pipecat Bot + into the same virtual room where the TavusBot is operating. + """ + + def __init__( + self, + *, + bot_name: str, + params: TavusParams = TavusParams(), + callbacks: TavusCallbacks, + api_key: str, + replica_id: str, + persona_id: str = "pipecat-stream", + session: aiohttp.ClientSession, + ) -> None: + """Initialize the Tavus transport client. + + Args: + bot_name: The name of the Pipecat bot instance. + params: Optional parameters for Tavus operation. + callbacks: Callback handlers for Tavus-related events. + api_key: API key for authenticating with Tavus API. + replica_id: ID of the replica to use in the Tavus conversation. + persona_id: ID of the Tavus persona. Defaults to "pipecat-stream", + which signals Tavus to use the TTS voice of the Pipecat bot + instead of a Tavus persona voice. + session: The aiohttp session for making async HTTP requests. + """ + self._bot_name = bot_name + self._api = TavusApi(api_key, session) + self._replica_id = replica_id + self._persona_id = persona_id + self._conversation_id: Optional[str] = None + self._client: Optional[DailyTransportClient] = None + self._callbacks = callbacks + self._params = params + + async def _initialize(self) -> str: + """Initialize the conversation and return the room URL.""" + response = await self._api.create_conversation(self._replica_id, self._persona_id) + self._conversation_id = response["conversation_id"] + return response["conversation_url"] + + async def setup(self, setup: FrameProcessorSetup): + """Setup the client and initialize the conversation. + + Args: + setup: The frame processor setup configuration. + """ + if self._conversation_id is not None: + logger.debug(f"Conversation ID already defined: {self._conversation_id}") + return + try: + room_url = await self._initialize() + daily_callbacks = DailyCallbacks( + on_active_speaker_changed=partial( + self._on_handle_callback, "on_active_speaker_changed" + ), + on_joined=self._on_joined, + on_left=self._on_left, + on_error=partial(self._on_handle_callback, "on_error"), + on_app_message=partial(self._on_handle_callback, "on_app_message"), + on_call_state_updated=partial(self._on_handle_callback, "on_call_state_updated"), + on_client_connected=partial(self._on_handle_callback, "on_client_connected"), + on_client_disconnected=partial(self._on_handle_callback, "on_client_disconnected"), + on_dialin_connected=partial(self._on_handle_callback, "on_dialin_connected"), + on_dialin_ready=partial(self._on_handle_callback, "on_dialin_ready"), + on_dialin_stopped=partial(self._on_handle_callback, "on_dialin_stopped"), + on_dialin_error=partial(self._on_handle_callback, "on_dialin_error"), + on_dialin_warning=partial(self._on_handle_callback, "on_dialin_warning"), + on_dialout_answered=partial(self._on_handle_callback, "on_dialout_answered"), + on_dialout_connected=partial(self._on_handle_callback, "on_dialout_connected"), + on_dialout_stopped=partial(self._on_handle_callback, "on_dialout_stopped"), + on_dialout_error=partial(self._on_handle_callback, "on_dialout_error"), + on_dialout_warning=partial(self._on_handle_callback, "on_dialout_warning"), + on_participant_joined=self._callbacks.on_participant_joined, + on_participant_left=self._callbacks.on_participant_left, + on_participant_updated=partial(self._on_handle_callback, "on_participant_updated"), + on_transcription_message=partial( + self._on_handle_callback, "on_transcription_message" + ), + on_recording_started=partial(self._on_handle_callback, "on_recording_started"), + on_recording_stopped=partial(self._on_handle_callback, "on_recording_stopped"), + on_recording_error=partial(self._on_handle_callback, "on_recording_error"), + on_transcription_stopped=partial( + self._on_handle_callback, "on_transcription_stopped" + ), + on_transcription_error=partial(self._on_handle_callback, "on_transcription_error"), + ) + self._client = DailyTransportClient( + room_url, None, "Pipecat", self._params, daily_callbacks, self._bot_name + ) + await self._client.setup(setup) + except Exception as e: + logger.error(f"Failed to setup TavusTransportClient: {e}") + await self._api.end_conversation(self._conversation_id) + self._conversation_id = None + + async def cleanup(self): + """Cleanup client resources.""" + try: + await self._client.cleanup() + except Exception as e: + logger.exception(f"Exception during cleanup: {e}") + + async def _on_joined(self, data): + """Handle joined event.""" + logger.debug("TavusTransportClient joined!") + + async def _on_left(self): + """Handle left event.""" + logger.debug("TavusTransportClient left!") + + async def _on_handle_callback(self, event_name, *args, **kwargs): + """Handle generic callback events.""" + logger.trace(f"[Callback] {event_name} called with args={args}, kwargs={kwargs}") + + async def get_persona_name(self) -> str: + """Get the persona name from the API. + + Returns: + The name of the current persona. + """ + return await self._api.get_persona_name(self._persona_id) + + async def start(self, frame: StartFrame): + """Start the client and join the room. + + Args: + frame: The start frame containing initialization parameters. + """ + logger.debug("TavusTransportClient start invoked!") + await self._client.start(frame) + await self._client.join() + + async def stop(self): + """Stop the client and end the conversation.""" + await self._client.leave() + await self._api.end_conversation(self._conversation_id) + self._conversation_id = None + + async def capture_participant_video( + self, + participant_id: str, + callback: Callable, + framerate: int = 30, + video_source: str = "camera", + color_format: str = "RGB", + ): + """Capture video from a participant. + + Args: + participant_id: ID of the participant to capture video from. + callback: Callback function to handle video frames. + framerate: Desired framerate for video capture. + video_source: Video source to capture from. + color_format: Color format for video frames. + """ + await self._client.capture_participant_video( + participant_id, callback, framerate, video_source, color_format + ) + + async def capture_participant_audio( + self, + participant_id: str, + callback: Callable, + audio_source: str = "microphone", + sample_rate: int = 16000, + callback_interval_ms: int = 20, + ): + """Capture audio from a participant. + + Args: + participant_id: ID of the participant to capture audio from. + callback: Callback function to handle audio data. + audio_source: Audio source to capture from. + sample_rate: Desired sample rate for audio capture. + callback_interval_ms: Interval between audio callbacks in milliseconds. + """ + await self._client.capture_participant_audio( + participant_id, callback, audio_source, sample_rate, callback_interval_ms + ) + + async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame): + """Send a message to participants. + + Args: + frame: The message frame to send. + """ + await self._client.send_message(frame) + + @property + def out_sample_rate(self) -> int: + """Get the output sample rate. + + Returns: + The output sample rate in Hz. + """ + return self._client.out_sample_rate + + @property + def in_sample_rate(self) -> int: + """Get the input sample rate. + + Returns: + The input sample rate in Hz. + """ + return self._client.in_sample_rate + + async def send_interrupt_message(self) -> None: + """Send an interrupt message to the conversation.""" + transport_frame = TransportMessageUrgentFrame( + message={ + "message_type": "conversation", + "event_type": "conversation.interrupt", + "conversation_id": self._conversation_id, + } + ) + await self.send_message(transport_frame) + + async def update_subscriptions(self, participant_settings=None, profile_settings=None): + """Update subscription settings for participants. + + Args: + participant_settings: Per-participant subscription settings. + profile_settings: Global subscription profile settings. + """ + if not self._client: + return + + await self._client.update_subscriptions( + participant_settings=participant_settings, profile_settings=profile_settings + ) + + async def write_audio_frame(self, frame: OutputAudioRawFrame): + """Write an audio frame to the transport. + + Args: + frame: The audio frame to write. + """ + if not self._client: + return + await self._client.write_audio_frame(frame) + + async def register_audio_destination(self, destination: str): + """Register an audio destination for output. + + Args: + destination: The destination identifier to register. + """ + if not self._client: + return + + await self._client.register_audio_destination(destination) + + +class TavusInputTransport(BaseInputTransport): + """Input transport for receiving audio and events from Tavus conversations. + + Handles incoming audio streams from participants and manages audio capture + from the Daily room connected to the Tavus conversation. + """ + + def __init__( + self, + client: TavusTransportClient, + params: TransportParams, + **kwargs, + ): + """Initialize the Tavus input transport. + + Args: + client: The Tavus transport client instance. + params: Transport configuration parameters. + **kwargs: Additional arguments passed to parent class. + """ + super().__init__(params, **kwargs) + self._client = client + self._params = params + # Whether we have seen a StartFrame already. + self._initialized = False + + async def setup(self, setup: FrameProcessorSetup): + """Setup the input transport. + + Args: + setup: The frame processor setup configuration. + """ + await super().setup(setup) + await self._client.setup(setup) + + async def cleanup(self): + """Cleanup input transport resources.""" + await super().cleanup() + await self._client.cleanup() + + async def start(self, frame: StartFrame): + """Start the input transport. + + Args: + frame: The start frame containing initialization parameters. + """ + await super().start(frame) + + if self._initialized: + return + + self._initialized = True + + await self._client.start(frame) + await self.set_transport_ready(frame) + + async def stop(self, frame: EndFrame): + """Stop the input transport. + + Args: + frame: The end frame signaling transport shutdown. + """ + await super().stop(frame) + await self._client.stop() + + async def cancel(self, frame: CancelFrame): + """Cancel the input transport. + + Args: + frame: The cancel frame signaling immediate cancellation. + """ + await super().cancel(frame) + await self._client.stop() + + async def start_capturing_audio(self, participant): + """Start capturing audio from a participant. + + Args: + participant: The participant to capture audio from. + """ + if self._params.audio_in_enabled: + logger.info( + f"TavusTransportClient start capturing audio for participant {participant['id']}" + ) + await self._client.capture_participant_audio( + participant_id=participant["id"], + callback=self._on_participant_audio_data, + sample_rate=self._client.in_sample_rate, + ) + + async def _on_participant_audio_data( + self, participant_id: str, audio: AudioData, audio_source: str + ): + """Handle received participant audio data.""" + frame = InputAudioRawFrame( + audio=audio.audio_frames, + sample_rate=audio.audio_frames, + num_channels=audio.num_channels, + ) + frame.transport_source = audio_source + await self.push_audio_frame(frame) + + +class TavusOutputTransport(BaseOutputTransport): + """Output transport for sending audio and events to Tavus conversations. + + Handles outgoing audio streams to participants and manages the custom + audio track expected by the Tavus platform. + """ + + def __init__( + self, + client: TavusTransportClient, + params: TransportParams, + **kwargs, + ): + """Initialize the Tavus output transport. + + Args: + client: The Tavus transport client instance. + params: Transport configuration parameters. + **kwargs: Additional arguments passed to parent class. + """ + super().__init__(params, **kwargs) + self._client = client + self._params = params + + # Whether we have seen a StartFrame already. + self._initialized = False + # This is the custom track destination expected by Tavus + self._transport_destination: Optional[str] = "stream" + + async def setup(self, setup: FrameProcessorSetup): + """Setup the output transport. + + Args: + setup: The frame processor setup configuration. + """ + await super().setup(setup) + await self._client.setup(setup) + + async def cleanup(self): + """Cleanup output transport resources.""" + await super().cleanup() + await self._client.cleanup() + + async def start(self, frame: StartFrame): + """Start the output transport. + + Args: + frame: The start frame containing initialization parameters. + """ + await super().start(frame) + + if self._initialized: + return + + self._initialized = True + + await self._client.start(frame) + + if self._transport_destination: + await self._client.register_audio_destination(self._transport_destination) + + await self.set_transport_ready(frame) + + async def stop(self, frame: EndFrame): + """Stop the output transport. + + Args: + frame: The end frame signaling transport shutdown. + """ + await super().stop(frame) + await self._client.stop() + + async def cancel(self, frame: CancelFrame): + """Cancel the output transport. + + Args: + frame: The cancel frame signaling immediate cancellation. + """ + await super().cancel(frame) + await self._client.stop() + + async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame): + """Send a message to participants. + + Args: + frame: The message frame to send. + """ + logger.info(f"TavusOutputTransport sending message {frame}") + await self._client.send_message(frame) + + async def process_frame(self, frame: Frame, direction: FrameDirection): + """Process frames and handle interruptions. + + Args: + frame: The frame to process. + direction: The direction of frame flow in the pipeline. + """ + await super().process_frame(frame, direction) + if isinstance(frame, StartInterruptionFrame): + await self._handle_interruptions() + + async def _handle_interruptions(self): + """Handle interruption events by sending interrupt message.""" + await self._client.send_interrupt_message() + + async def write_audio_frame(self, frame: OutputAudioRawFrame): + """Write an audio frame to the Tavus transport. + + Args: + frame: The audio frame to write. + """ + # This is the custom track destination expected by Tavus + frame.transport_destination = self._transport_destination + await self._client.write_audio_frame(frame) + + async def register_audio_destination(self, destination: str): + """Register an audio destination. + + Args: + destination: The destination identifier to register. + """ + await self._client.register_audio_destination(destination) + + +class TavusTransport(BaseTransport): + """Transport implementation for Tavus video calls. + + When used, the Pipecat bot joins the same virtual room as the Tavus Avatar and the user. + This is achieved by using `TavusTransportClient`, which initiates the conversation via + `TavusApi` and obtains a room URL that all participants connect to. + """ + + def __init__( + self, + bot_name: str, + session: aiohttp.ClientSession, + api_key: str, + replica_id: str, + persona_id: str = "pipecat-stream", + params: TavusParams = TavusParams(), + input_name: Optional[str] = None, + output_name: Optional[str] = None, + ): + """Initialize the Tavus transport. + + Args: + bot_name: The name of the Pipecat bot. + session: aiohttp session used for async HTTP requests. + api_key: Tavus API key for authentication. + replica_id: ID of the replica model used for voice generation. + persona_id: ID of the Tavus persona. Defaults to "pipecat-stream" + to use the Pipecat TTS voice. + params: Optional Tavus-specific configuration parameters. + input_name: Optional name for the input transport. + output_name: Optional name for the output transport. + """ + super().__init__(input_name=input_name, output_name=output_name) + self._params = params + + callbacks = TavusCallbacks( + on_participant_joined=self._on_participant_joined, + on_participant_left=self._on_participant_left, + ) + self._client = TavusTransportClient( + bot_name="Pipecat", + callbacks=callbacks, + api_key=api_key, + replica_id=replica_id, + persona_id=persona_id, + session=session, + params=params, + ) + self._input: Optional[TavusInputTransport] = None + self._output: Optional[TavusOutputTransport] = None + self._tavus_participant_id = None + + # Register supported handlers. The user will only be able to register + # these handlers. + self._register_event_handler("on_client_connected") + self._register_event_handler("on_client_disconnected") + + async def _on_participant_left(self, participant, reason): + """Handle participant left events.""" + persona_name = await self._client.get_persona_name() + if participant.get("info", {}).get("userName", "") != persona_name: + await self._on_client_disconnected(participant) + + async def _on_participant_joined(self, participant): + """Handle participant joined events.""" + # get persona, look up persona_name, set this as the bot name to ignore + persona_name = await self._client.get_persona_name() + + # Ignore the Tavus replica's microphone + if participant.get("info", {}).get("userName", "") == persona_name: + self._tavus_participant_id = participant["id"] + else: + await self._on_client_connected(participant) + if self._tavus_participant_id: + logger.debug(f"Ignoring {self._tavus_participant_id}'s microphone") + await self.update_subscriptions( + participant_settings={ + self._tavus_participant_id: { + "media": {"microphone": "unsubscribed"}, + } + } + ) + if self._input: + await self._input.start_capturing_audio(participant) + + async def update_subscriptions(self, participant_settings=None, profile_settings=None): + """Update subscription settings for participants. + + Args: + participant_settings: Per-participant subscription settings. + profile_settings: Global subscription profile settings. + """ + await self._client.update_subscriptions( + participant_settings=participant_settings, + profile_settings=profile_settings, + ) + + def input(self) -> FrameProcessor: + """Get the input transport for receiving media and events. + + Returns: + The Tavus input transport instance. + """ + if not self._input: + self._input = TavusInputTransport(client=self._client, params=self._params) + return self._input + + def output(self) -> FrameProcessor: + """Get the output transport for sending media and events. + + Returns: + The Tavus output transport instance. + """ + if not self._output: + self._output = TavusOutputTransport(client=self._client, params=self._params) + return self._output + + async def _on_client_connected(self, participant: Any): + """Handle client connected events.""" + await self._call_event_handler("on_client_connected", participant) + + async def _on_client_disconnected(self, participant: Any): + """Handle client disconnected events.""" + await self._call_event_handler("on_client_disconnected", participant) diff --git a/src/pipecat/transports/websocket/__init__.py b/src/pipecat/transports/websocket/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/pipecat/transports/websocket/client.py b/src/pipecat/transports/websocket/client.py new file mode 100644 index 000000000..d141b52f3 --- /dev/null +++ b/src/pipecat/transports/websocket/client.py @@ -0,0 +1,494 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""WebSocket client transport implementation for Pipecat. + +This module provides a WebSocket client transport that enables bidirectional +communication over WebSocket connections, with support for audio streaming, +frame serialization, and connection management. +""" + +import asyncio +import io +import time +import wave +from typing import Awaitable, Callable, Optional + +import websockets +from loguru import logger +from pydantic.main import BaseModel +from websockets.asyncio.client import connect as websocket_connect + +from pipecat.frames.frames import ( + CancelFrame, + EndFrame, + Frame, + InputAudioRawFrame, + OutputAudioRawFrame, + StartFrame, + TransportMessageFrame, + TransportMessageUrgentFrame, +) +from pipecat.processors.frame_processor import FrameProcessorSetup +from pipecat.serializers.base_serializer import FrameSerializer +from pipecat.serializers.protobuf import ProtobufFrameSerializer +from pipecat.transports.base_input import BaseInputTransport +from pipecat.transports.base_output import BaseOutputTransport +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.utils.asyncio.task_manager import BaseTaskManager + + +class WebsocketClientParams(TransportParams): + """Configuration parameters for WebSocket client transport. + + Parameters: + add_wav_header: Whether to add WAV headers to audio frames. + serializer: Frame serializer for encoding/decoding messages. + """ + + add_wav_header: bool = True + serializer: Optional[FrameSerializer] = None + + +class WebsocketClientCallbacks(BaseModel): + """Callback functions for WebSocket client events. + + Parameters: + on_connected: Called when WebSocket connection is established. + on_disconnected: Called when WebSocket connection is closed. + on_message: Called when a message is received from the WebSocket. + """ + + on_connected: Callable[[websockets.WebSocketClientProtocol], Awaitable[None]] + on_disconnected: Callable[[websockets.WebSocketClientProtocol], Awaitable[None]] + on_message: Callable[[websockets.WebSocketClientProtocol, websockets.Data], Awaitable[None]] + + +class WebsocketClientSession: + """Manages a WebSocket client connection session. + + Handles connection lifecycle, message sending/receiving, and provides + callback mechanisms for connection events. + """ + + def __init__( + self, + uri: str, + params: WebsocketClientParams, + callbacks: WebsocketClientCallbacks, + transport_name: str, + ): + """Initialize the WebSocket client session. + + Args: + uri: The WebSocket URI to connect to. + params: Configuration parameters for the session. + callbacks: Callback functions for session events. + transport_name: Name of the parent transport for logging. + """ + self._uri = uri + self._params = params + self._callbacks = callbacks + self._transport_name = transport_name + + self._leave_counter = 0 + self._task_manager: Optional[BaseTaskManager] = None + self._websocket: Optional[websockets.WebSocketClientProtocol] = None + + @property + def task_manager(self) -> BaseTaskManager: + """Get the task manager for this session. + + Returns: + The task manager instance. + + Raises: + Exception: If task manager is not initialized. + """ + if not self._task_manager: + raise Exception( + f"{self._transport_name}::WebsocketClientSession: TaskManager not initialized (pipeline not started?)" + ) + return self._task_manager + + async def setup(self, task_manager: BaseTaskManager): + """Set up the session with a task manager. + + Args: + task_manager: The task manager to use for session tasks. + """ + self._leave_counter += 1 + if not self._task_manager: + self._task_manager = task_manager + + async def connect(self): + """Connect to the WebSocket server.""" + if self._websocket: + return + + try: + self._websocket = await websocket_connect(uri=self._uri, open_timeout=10) + self._client_task = self.task_manager.create_task( + self._client_task_handler(), + f"{self._transport_name}::WebsocketClientSession::_client_task_handler", + ) + await self._callbacks.on_connected(self._websocket) + except TimeoutError: + logger.error(f"Timeout connecting to {self._uri}") + + async def disconnect(self): + """Disconnect from the WebSocket server.""" + self._leave_counter -= 1 + if not self._websocket or self._leave_counter > 0: + return + + await self.task_manager.cancel_task(self._client_task) + + await self._websocket.close() + self._websocket = None + + async def send(self, message: websockets.Data): + """Send a message through the WebSocket connection. + + Args: + message: The message data to send. + """ + try: + if self._websocket: + await self._websocket.send(message) + except Exception as e: + logger.error(f"{self} exception sending data: {e.__class__.__name__} ({e})") + + async def _client_task_handler(self): + """Handle incoming messages from the WebSocket connection.""" + try: + # Handle incoming messages + async for message in self._websocket: + await self._callbacks.on_message(self._websocket, message) + except Exception as e: + logger.error(f"{self} exception receiving data: {e.__class__.__name__} ({e})") + + await self._callbacks.on_disconnected(self._websocket) + + def __str__(self): + """String representation of the WebSocket client session.""" + return f"{self._transport_name}::WebsocketClientSession" + + +class WebsocketClientInputTransport(BaseInputTransport): + """WebSocket client input transport for receiving frames. + + Handles incoming WebSocket messages, deserializes them to frames, + and pushes them downstream in the processing pipeline. + """ + + def __init__( + self, + transport: BaseTransport, + session: WebsocketClientSession, + params: WebsocketClientParams, + ): + """Initialize the WebSocket client input transport. + + Args: + transport: The parent transport instance. + session: The WebSocket session to use for communication. + params: Configuration parameters for the transport. + """ + super().__init__(params) + + self._transport = transport + self._session = session + self._params = params + + # Whether we have seen a StartFrame already. + self._initialized = False + + async def setup(self, setup: FrameProcessorSetup): + """Set up the input transport with the frame processor setup. + + Args: + setup: The frame processor setup configuration. + """ + await super().setup(setup) + await self._session.setup(setup.task_manager) + + async def start(self, frame: StartFrame): + """Start the input transport and initialize the WebSocket connection. + + Args: + frame: The start frame containing initialization parameters. + """ + await super().start(frame) + + if self._initialized: + return + + self._initialized = True + + if self._params.serializer: + await self._params.serializer.setup(frame) + await self._session.connect() + await self.set_transport_ready(frame) + + async def stop(self, frame: EndFrame): + """Stop the input transport and disconnect from WebSocket. + + Args: + frame: The end frame signaling transport shutdown. + """ + await super().stop(frame) + await self._session.disconnect() + + async def cancel(self, frame: CancelFrame): + """Cancel the input transport and disconnect from WebSocket. + + Args: + frame: The cancel frame signaling immediate cancellation. + """ + await super().cancel(frame) + await self._session.disconnect() + + async def cleanup(self): + """Clean up the input transport resources.""" + await super().cleanup() + await self._transport.cleanup() + + async def on_message(self, websocket, message): + """Handle incoming WebSocket messages. + + Args: + websocket: The WebSocket connection that received the message. + message: The received message data. + """ + if not self._params.serializer: + return + frame = await self._params.serializer.deserialize(message) + if not frame: + return + if isinstance(frame, InputAudioRawFrame) and self._params.audio_in_enabled: + await self.push_audio_frame(frame) + else: + await self.push_frame(frame) + + +class WebsocketClientOutputTransport(BaseOutputTransport): + """WebSocket client output transport for sending frames. + + Handles outgoing frames, serializes them for WebSocket transmission, + and manages audio streaming with proper timing simulation. + """ + + def __init__( + self, + transport: BaseTransport, + session: WebsocketClientSession, + params: WebsocketClientParams, + ): + """Initialize the WebSocket client output transport. + + Args: + transport: The parent transport instance. + session: The WebSocket session to use for communication. + params: Configuration parameters for the transport. + """ + super().__init__(params) + + self._transport = transport + self._session = session + self._params = params + + # write_audio_frame() is called quickly, as soon as we get audio + # (e.g. from the TTS), and since this is just a network connection we + # would be sending it to quickly. Instead, we want to block to emulate + # an audio device, this is what the send interval is. It will be + # computed on StartFrame. + self._send_interval = 0 + self._next_send_time = 0 + + # Whether we have seen a StartFrame already. + self._initialized = False + + async def setup(self, setup: FrameProcessorSetup): + """Set up the output transport with the frame processor setup. + + Args: + setup: The frame processor setup configuration. + """ + await super().setup(setup) + await self._session.setup(setup.task_manager) + + async def start(self, frame: StartFrame): + """Start the output transport and initialize the WebSocket connection. + + Args: + frame: The start frame containing initialization parameters. + """ + await super().start(frame) + + if self._initialized: + return + + self._initialized = True + + self._send_interval = (self.audio_chunk_size / self.sample_rate) / 2 + if self._params.serializer: + await self._params.serializer.setup(frame) + await self._session.connect() + await self.set_transport_ready(frame) + + async def stop(self, frame: EndFrame): + """Stop the output transport and disconnect from WebSocket. + + Args: + frame: The end frame signaling transport shutdown. + """ + await super().stop(frame) + await self._session.disconnect() + + async def cancel(self, frame: CancelFrame): + """Cancel the output transport and disconnect from WebSocket. + + Args: + frame: The cancel frame signaling immediate cancellation. + """ + await super().cancel(frame) + await self._session.disconnect() + + async def cleanup(self): + """Clean up the output transport resources.""" + await super().cleanup() + await self._transport.cleanup() + + async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame): + """Send a transport message through the WebSocket. + + Args: + frame: The transport message frame to send. + """ + await self._write_frame(frame) + + async def write_audio_frame(self, frame: OutputAudioRawFrame): + """Write an audio frame to the WebSocket with optional WAV header. + + Args: + frame: The output audio frame to write. + """ + frame = OutputAudioRawFrame( + audio=frame.audio, + sample_rate=self.sample_rate, + num_channels=self._params.audio_out_channels, + ) + + if self._params.add_wav_header: + with io.BytesIO() as buffer: + with wave.open(buffer, "wb") as wf: + wf.setsampwidth(2) + wf.setnchannels(frame.num_channels) + wf.setframerate(frame.sample_rate) + wf.writeframes(frame.audio) + wav_frame = OutputAudioRawFrame( + buffer.getvalue(), + sample_rate=frame.sample_rate, + num_channels=frame.num_channels, + ) + frame = wav_frame + + await self._write_frame(frame) + + # Simulate audio playback with a sleep. + await self._write_audio_sleep() + + async def _write_frame(self, frame: Frame): + """Write a frame to the WebSocket after serialization.""" + if not self._params.serializer: + return + payload = await self._params.serializer.serialize(frame) + if payload: + await self._session.send(payload) + + async def _write_audio_sleep(self): + """Simulate audio playback timing with sleep delays.""" + # Simulate a clock. + current_time = time.monotonic() + sleep_duration = max(0, self._next_send_time - current_time) + await asyncio.sleep(sleep_duration) + if sleep_duration == 0: + self._next_send_time = time.monotonic() + self._send_interval + else: + self._next_send_time += self._send_interval + + +class WebsocketClientTransport(BaseTransport): + """WebSocket client transport for bidirectional communication. + + Provides a complete WebSocket client transport implementation with + input and output capabilities, connection management, and event handling. + """ + + def __init__( + self, + uri: str, + params: Optional[WebsocketClientParams] = None, + ): + """Initialize the WebSocket client transport. + + Args: + uri: The WebSocket URI to connect to. + params: Optional configuration parameters for the transport. + """ + super().__init__() + + self._params = params or WebsocketClientParams() + self._params.serializer = self._params.serializer or ProtobufFrameSerializer() + + callbacks = WebsocketClientCallbacks( + on_connected=self._on_connected, + on_disconnected=self._on_disconnected, + on_message=self._on_message, + ) + + self._session = WebsocketClientSession(uri, self._params, callbacks, self.name) + self._input: Optional[WebsocketClientInputTransport] = None + self._output: Optional[WebsocketClientOutputTransport] = None + + # Register supported handlers. The user will only be able to register + # these handlers. + self._register_event_handler("on_connected") + self._register_event_handler("on_disconnected") + + def input(self) -> WebsocketClientInputTransport: + """Get the input transport for receiving frames. + + Returns: + The WebSocket client input transport instance. + """ + if not self._input: + self._input = WebsocketClientInputTransport(self, self._session, self._params) + return self._input + + def output(self) -> WebsocketClientOutputTransport: + """Get the output transport for sending frames. + + Returns: + The WebSocket client output transport instance. + """ + if not self._output: + self._output = WebsocketClientOutputTransport(self, self._session, self._params) + return self._output + + async def _on_connected(self, websocket): + """Handle WebSocket connection established event.""" + await self._call_event_handler("on_connected", websocket) + + async def _on_disconnected(self, websocket): + """Handle WebSocket connection closed event.""" + await self._call_event_handler("on_disconnected", websocket) + + async def _on_message(self, websocket, message): + """Handle incoming WebSocket message.""" + if self._input: + await self._input.on_message(websocket, message) diff --git a/src/pipecat/transports/websocket/fastapi.py b/src/pipecat/transports/websocket/fastapi.py new file mode 100644 index 000000000..8287783c2 --- /dev/null +++ b/src/pipecat/transports/websocket/fastapi.py @@ -0,0 +1,547 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""FastAPI WebSocket transport implementation for Pipecat. + +This module provides WebSocket-based transport for real-time audio/video streaming +using FastAPI and WebSocket connections. Supports binary and text serialization +with configurable session timeouts and WAV header generation. +""" + +import asyncio +import io +import time +import typing +import wave +from typing import Awaitable, Callable, Optional + +from loguru import logger +from pydantic import BaseModel + +from pipecat.frames.frames import ( + CancelFrame, + EndFrame, + Frame, + InputAudioRawFrame, + OutputAudioRawFrame, + StartFrame, + StartInterruptionFrame, + TransportMessageFrame, + TransportMessageUrgentFrame, +) +from pipecat.processors.frame_processor import FrameDirection +from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType +from pipecat.transports.base_input import BaseInputTransport +from pipecat.transports.base_output import BaseOutputTransport +from pipecat.transports.base_transport import BaseTransport, TransportParams + +try: + from fastapi import WebSocket + from starlette.websockets import WebSocketState +except ModuleNotFoundError as e: + logger.error(f"Exception: {e}") + logger.error( + "In order to use FastAPI websockets, you need to `pip install pipecat-ai[websocket]`." + ) + raise Exception(f"Missing module: {e}") + + +class FastAPIWebsocketParams(TransportParams): + """Configuration parameters for FastAPI WebSocket transport. + + Parameters: + add_wav_header: Whether to add WAV headers to audio frames. + serializer: Frame serializer for encoding/decoding messages. + session_timeout: Session timeout in seconds, None for no timeout. + """ + + add_wav_header: bool = False + serializer: Optional[FrameSerializer] = None + session_timeout: Optional[int] = None + + +class FastAPIWebsocketCallbacks(BaseModel): + """Callback functions for WebSocket events. + + Parameters: + on_client_connected: Called when a client connects to the WebSocket. + on_client_disconnected: Called when a client disconnects from the WebSocket. + on_session_timeout: Called when a session timeout occurs. + """ + + on_client_connected: Callable[[WebSocket], Awaitable[None]] + on_client_disconnected: Callable[[WebSocket], Awaitable[None]] + on_session_timeout: Callable[[WebSocket], Awaitable[None]] + + +class FastAPIWebsocketClient: + """WebSocket client wrapper for handling connections and message passing. + + Manages WebSocket state, message sending/receiving, and connection lifecycle + with support for both binary and text message types. + """ + + def __init__(self, websocket: WebSocket, is_binary: bool, callbacks: FastAPIWebsocketCallbacks): + """Initialize the WebSocket client. + + Args: + websocket: The FastAPI WebSocket connection. + is_binary: Whether to use binary message format. + callbacks: Event callback functions. + """ + self._websocket = websocket + self._closing = False + self._is_binary = is_binary + self._callbacks = callbacks + self._leave_counter = 0 + + async def setup(self, _: StartFrame): + """Set up the WebSocket client. + + Args: + _: The start frame (unused). + """ + self._leave_counter += 1 + + def receive(self) -> typing.AsyncIterator[bytes | str]: + """Get an async iterator for receiving WebSocket messages. + + Returns: + An async iterator yielding bytes or strings based on message type. + """ + return self._websocket.iter_bytes() if self._is_binary else self._websocket.iter_text() + + async def send(self, data: str | bytes): + """Send data through the WebSocket connection. + + Args: + data: The data to send (string or bytes). + """ + try: + if self._can_send(): + if self._is_binary: + await self._websocket.send_bytes(data) + else: + await self._websocket.send_text(data) + except Exception as e: + logger.error( + f"{self} exception sending data: {e.__class__.__name__} ({e}), application_state: {self._websocket.application_state}" + ) + # For some reason the websocket is disconnected, and we are not able to send data + # So let's properly handle it and disconnect the transport if it is not already disconnecting + if ( + self._websocket.application_state == WebSocketState.DISCONNECTED + and not self.is_closing + ): + logger.warning("Closing already disconnected websocket!") + self._closing = True + await self.trigger_client_disconnected() + + async def disconnect(self): + """Disconnect the WebSocket client.""" + self._leave_counter -= 1 + if self._leave_counter > 0: + return + + if self.is_connected and not self.is_closing: + self._closing = True + try: + await self._websocket.close() + except Exception as e: + logger.error(f"{self} exception while closing the websocket: {e}") + finally: + await self.trigger_client_disconnected() + + async def trigger_client_disconnected(self): + """Trigger the client disconnected callback.""" + await self._callbacks.on_client_disconnected(self._websocket) + + async def trigger_client_connected(self): + """Trigger the client connected callback.""" + await self._callbacks.on_client_connected(self._websocket) + + async def trigger_client_timeout(self): + """Trigger the client timeout callback.""" + await self._callbacks.on_session_timeout(self._websocket) + + def _can_send(self): + """Check if data can be sent through the WebSocket.""" + return self.is_connected and not self.is_closing + + @property + def is_connected(self) -> bool: + """Check if the WebSocket is currently connected. + + Returns: + True if the WebSocket is in connected state. + """ + return self._websocket.client_state == WebSocketState.CONNECTED + + @property + def is_closing(self) -> bool: + """Check if the WebSocket is currently closing. + + Returns: + True if the WebSocket is in the process of closing. + """ + return self._closing + + +class FastAPIWebsocketInputTransport(BaseInputTransport): + """Input transport for FastAPI WebSocket connections. + + Handles incoming WebSocket messages, deserializes frames, and manages + connection monitoring with optional session timeouts. + """ + + def __init__( + self, + transport: BaseTransport, + client: FastAPIWebsocketClient, + params: FastAPIWebsocketParams, + **kwargs, + ): + """Initialize the WebSocket input transport. + + Args: + transport: The parent transport instance. + client: The WebSocket client wrapper. + params: Transport configuration parameters. + **kwargs: Additional arguments passed to parent class. + """ + super().__init__(params, **kwargs) + self._transport = transport + self._client = client + self._params = params + self._receive_task = None + self._monitor_websocket_task = None + + # Whether we have seen a StartFrame already. + self._initialized = False + + async def start(self, frame: StartFrame): + """Start the input transport and begin message processing. + + Args: + frame: The start frame containing initialization parameters. + """ + await super().start(frame) + + if self._initialized: + return + + self._initialized = True + + await self._client.setup(frame) + if self._params.serializer: + await self._params.serializer.setup(frame) + if not self._monitor_websocket_task and self._params.session_timeout: + self._monitor_websocket_task = self.create_task(self._monitor_websocket()) + await self._client.trigger_client_connected() + if not self._receive_task: + self._receive_task = self.create_task(self._receive_messages()) + await self.set_transport_ready(frame) + + async def _stop_tasks(self): + """Stop all running tasks.""" + if self._monitor_websocket_task: + await self.cancel_task(self._monitor_websocket_task) + self._monitor_websocket_task = None + if self._receive_task: + await self.cancel_task(self._receive_task) + self._receive_task = None + + async def stop(self, frame: EndFrame): + """Stop the input transport and cleanup resources. + + Args: + frame: The end frame signaling transport shutdown. + """ + await super().stop(frame) + await self._stop_tasks() + await self._client.disconnect() + + async def cancel(self, frame: CancelFrame): + """Cancel the input transport and stop all processing. + + Args: + frame: The cancel frame signaling immediate cancellation. + """ + await super().cancel(frame) + await self._stop_tasks() + await self._client.disconnect() + + async def cleanup(self): + """Clean up transport resources.""" + await super().cleanup() + await self._transport.cleanup() + + async def _receive_messages(self): + """Main message receiving loop for WebSocket messages.""" + try: + async for message in self._client.receive(): + if not self._params.serializer: + continue + + frame = await self._params.serializer.deserialize(message) + + if not frame: + continue + + if isinstance(frame, InputAudioRawFrame): + await self.push_audio_frame(frame) + else: + await self.push_frame(frame) + except Exception as e: + logger.error(f"{self} exception receiving data: {e.__class__.__name__} ({e})") + + await self._client.trigger_client_disconnected() + + async def _monitor_websocket(self): + """Wait for self._params.session_timeout seconds, if the websocket is still open, trigger timeout event.""" + await asyncio.sleep(self._params.session_timeout) + await self._client.trigger_client_timeout() + + +class FastAPIWebsocketOutputTransport(BaseOutputTransport): + """Output transport for FastAPI WebSocket connections. + + Handles outgoing frame serialization, audio streaming with timing simulation, + and WebSocket message transmission with optional WAV header generation. + """ + + def __init__( + self, + transport: BaseTransport, + client: FastAPIWebsocketClient, + params: FastAPIWebsocketParams, + **kwargs, + ): + """Initialize the WebSocket output transport. + + Args: + transport: The parent transport instance. + client: The WebSocket client wrapper. + params: Transport configuration parameters. + **kwargs: Additional arguments passed to parent class. + """ + super().__init__(params, **kwargs) + + self._transport = transport + self._client = client + self._params = params + + # write_audio_frame() is called quickly, as soon as we get audio + # (e.g. from the TTS), and since this is just a network connection we + # would be sending it to quickly. Instead, we want to block to emulate + # an audio device, this is what the send interval is. It will be + # computed on StartFrame. + self._send_interval = 0 + self._next_send_time = 0 + + # Whether we have seen a StartFrame already. + self._initialized = False + + async def start(self, frame: StartFrame): + """Start the output transport and initialize timing. + + Args: + frame: The start frame containing initialization parameters. + """ + await super().start(frame) + + if self._initialized: + return + + self._initialized = True + + await self._client.setup(frame) + if self._params.serializer: + await self._params.serializer.setup(frame) + self._send_interval = (self.audio_chunk_size / self.sample_rate) / 2 + await self.set_transport_ready(frame) + + async def stop(self, frame: EndFrame): + """Stop the output transport and cleanup resources. + + Args: + frame: The end frame signaling transport shutdown. + """ + await super().stop(frame) + await self._write_frame(frame) + await self._client.disconnect() + + async def cancel(self, frame: CancelFrame): + """Cancel the output transport and stop all processing. + + Args: + frame: The cancel frame signaling immediate cancellation. + """ + await super().cancel(frame) + await self._write_frame(frame) + await self._client.disconnect() + + async def cleanup(self): + """Clean up transport resources.""" + await super().cleanup() + await self._transport.cleanup() + + async def process_frame(self, frame: Frame, direction: FrameDirection): + """Process outgoing frames with special handling for interruptions. + + Args: + frame: The frame to process. + direction: The direction of frame flow in the pipeline. + """ + await super().process_frame(frame, direction) + + if isinstance(frame, StartInterruptionFrame): + await self._write_frame(frame) + self._next_send_time = 0 + + async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame): + """Send a transport message frame. + + Args: + frame: The transport message frame to send. + """ + await self._write_frame(frame) + + async def write_audio_frame(self, frame: OutputAudioRawFrame): + """Write an audio frame to the WebSocket with timing simulation. + + Args: + frame: The output audio frame to write. + """ + if self._client.is_closing or not self._client.is_connected: + return + + frame = OutputAudioRawFrame( + audio=frame.audio, + sample_rate=self.sample_rate, + num_channels=self._params.audio_out_channels, + ) + + if self._params.add_wav_header: + with io.BytesIO() as buffer: + with wave.open(buffer, "wb") as wf: + wf.setsampwidth(2) + wf.setnchannels(frame.num_channels) + wf.setframerate(frame.sample_rate) + wf.writeframes(frame.audio) + wav_frame = OutputAudioRawFrame( + buffer.getvalue(), + sample_rate=frame.sample_rate, + num_channels=frame.num_channels, + ) + frame = wav_frame + + await self._write_frame(frame) + + # Simulate audio playback with a sleep. + await self._write_audio_sleep() + + async def _write_frame(self, frame: Frame): + """Serialize and send a frame through the WebSocket.""" + if not self._params.serializer: + return + + try: + payload = await self._params.serializer.serialize(frame) + if payload: + await self._client.send(payload) + except Exception as e: + logger.error(f"{self} exception sending data: {e.__class__.__name__} ({e})") + + async def _write_audio_sleep(self): + """Simulate audio playback timing with appropriate delays.""" + # Simulate a clock. + current_time = time.monotonic() + sleep_duration = max(0, self._next_send_time - current_time) + await asyncio.sleep(sleep_duration) + if sleep_duration == 0: + self._next_send_time = time.monotonic() + self._send_interval + else: + self._next_send_time += self._send_interval + + +class FastAPIWebsocketTransport(BaseTransport): + """FastAPI WebSocket transport for real-time audio/video streaming. + + Provides bidirectional WebSocket communication with frame serialization, + session management, and event handling for client connections and timeouts. + """ + + def __init__( + self, + websocket: WebSocket, + params: FastAPIWebsocketParams, + input_name: Optional[str] = None, + output_name: Optional[str] = None, + ): + """Initialize the FastAPI WebSocket transport. + + Args: + websocket: The FastAPI WebSocket connection. + params: Transport configuration parameters. + input_name: Optional name for the input processor. + output_name: Optional name for the output processor. + """ + super().__init__(input_name=input_name, output_name=output_name) + + self._params = params + + self._callbacks = FastAPIWebsocketCallbacks( + on_client_connected=self._on_client_connected, + on_client_disconnected=self._on_client_disconnected, + on_session_timeout=self._on_session_timeout, + ) + + is_binary = False + if self._params.serializer: + is_binary = self._params.serializer.type == FrameSerializerType.BINARY + self._client = FastAPIWebsocketClient(websocket, is_binary, self._callbacks) + + self._input = FastAPIWebsocketInputTransport( + self, self._client, self._params, name=self._input_name + ) + self._output = FastAPIWebsocketOutputTransport( + self, self._client, self._params, name=self._output_name + ) + + # Register supported handlers. The user will only be able to register + # these handlers. + self._register_event_handler("on_client_connected") + self._register_event_handler("on_client_disconnected") + self._register_event_handler("on_session_timeout") + + def input(self) -> FastAPIWebsocketInputTransport: + """Get the input transport processor. + + Returns: + The WebSocket input transport instance. + """ + return self._input + + def output(self) -> FastAPIWebsocketOutputTransport: + """Get the output transport processor. + + Returns: + The WebSocket output transport instance. + """ + return self._output + + async def _on_client_connected(self, websocket): + """Handle client connected event.""" + await self._call_event_handler("on_client_connected", websocket) + + async def _on_client_disconnected(self, websocket): + """Handle client disconnected event.""" + await self._call_event_handler("on_client_disconnected", websocket) + + async def _on_session_timeout(self, websocket): + """Handle session timeout event.""" + await self._call_event_handler("on_session_timeout", websocket) diff --git a/src/pipecat/transports/websocket/server.py b/src/pipecat/transports/websocket/server.py new file mode 100644 index 000000000..8e73fb47e --- /dev/null +++ b/src/pipecat/transports/websocket/server.py @@ -0,0 +1,500 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""WebSocket server transport implementation for Pipecat. + +This module provides WebSocket server transport functionality for real-time +audio and data streaming, including client connection management, session +handling, and frame serialization. +""" + +import asyncio +import io +import time +import wave +from typing import Awaitable, Callable, Optional + +from loguru import logger +from pydantic import BaseModel + +from pipecat.frames.frames import ( + CancelFrame, + EndFrame, + Frame, + InputAudioRawFrame, + OutputAudioRawFrame, + StartFrame, + StartInterruptionFrame, + TransportMessageFrame, + TransportMessageUrgentFrame, +) +from pipecat.processors.frame_processor import FrameDirection +from pipecat.serializers.base_serializer import FrameSerializer +from pipecat.transports.base_input import BaseInputTransport +from pipecat.transports.base_output import BaseOutputTransport +from pipecat.transports.base_transport import BaseTransport, TransportParams + +try: + import websockets + from websockets.asyncio.server import serve as websocket_serve + from websockets.protocol import State +except ModuleNotFoundError as e: + logger.error(f"Exception: {e}") + logger.error("In order to use websockets, you need to `pip install pipecat-ai[websocket]`.") + raise Exception(f"Missing module: {e}") + + +class WebsocketServerParams(TransportParams): + """Configuration parameters for WebSocket server transport. + + Parameters: + add_wav_header: Whether to add WAV headers to audio frames. + serializer: Frame serializer for message encoding/decoding. + session_timeout: Timeout in seconds for client sessions. + """ + + add_wav_header: bool = False + serializer: Optional[FrameSerializer] = None + session_timeout: Optional[int] = None + + +class WebsocketServerCallbacks(BaseModel): + """Callback functions for WebSocket server events. + + Parameters: + on_client_connected: Called when a client connects to the server. + on_client_disconnected: Called when a client disconnects from the server. + on_session_timeout: Called when a client session times out. + on_websocket_ready: Called when the WebSocket server is ready to accept connections. + """ + + on_client_connected: Callable[[websockets.WebSocketServerProtocol], Awaitable[None]] + on_client_disconnected: Callable[[websockets.WebSocketServerProtocol], Awaitable[None]] + on_session_timeout: Callable[[websockets.WebSocketServerProtocol], Awaitable[None]] + on_websocket_ready: Callable[[], Awaitable[None]] + + +class WebsocketServerInputTransport(BaseInputTransport): + """WebSocket server input transport for receiving client data. + + Handles incoming WebSocket connections, message processing, and client + session management including timeout monitoring and connection lifecycle. + """ + + def __init__( + self, + transport: BaseTransport, + host: str, + port: int, + params: WebsocketServerParams, + callbacks: WebsocketServerCallbacks, + **kwargs, + ): + """Initialize the WebSocket server input transport. + + Args: + transport: The parent transport instance. + host: Host address to bind the WebSocket server to. + port: Port number to bind the WebSocket server to. + params: WebSocket server configuration parameters. + callbacks: Callback functions for WebSocket events. + **kwargs: Additional arguments passed to parent class. + """ + super().__init__(params, **kwargs) + + self._transport = transport + self._host = host + self._port = port + self._params = params + self._callbacks = callbacks + + self._websocket: Optional[websockets.WebSocketServerProtocol] = None + + self._server_task = None + + # This task will monitor the websocket connection periodically. + self._monitor_task = None + + self._stop_server_event = asyncio.Event() + + # Whether we have seen a StartFrame already. + self._initialized = False + + async def start(self, frame: StartFrame): + """Start the WebSocket server and initialize components. + + Args: + frame: The start frame containing initialization parameters. + """ + await super().start(frame) + + if self._initialized: + return + + self._initialized = True + + if self._params.serializer: + await self._params.serializer.setup(frame) + if not self._server_task: + self._server_task = self.create_task(self._server_task_handler()) + await self.set_transport_ready(frame) + + async def stop(self, frame: EndFrame): + """Stop the WebSocket server and cleanup resources. + + Args: + frame: The end frame signaling transport shutdown. + """ + await super().stop(frame) + self._stop_server_event.set() + if self._monitor_task: + await self.cancel_task(self._monitor_task) + self._monitor_task = None + if self._server_task: + await self._server_task + self._server_task = None + + async def cancel(self, frame: CancelFrame): + """Cancel the WebSocket server and stop all processing. + + Args: + frame: The cancel frame signaling immediate cancellation. + """ + await super().cancel(frame) + if self._monitor_task: + await self.cancel_task(self._monitor_task) + self._monitor_task = None + if self._server_task: + await self.cancel_task(self._server_task) + self._server_task = None + + async def cleanup(self): + """Cleanup resources and parent transport.""" + await super().cleanup() + await self._transport.cleanup() + + async def _server_task_handler(self): + """Handle WebSocket server startup and client connections.""" + logger.info(f"Starting websocket server on {self._host}:{self._port}") + async with websocket_serve(self._client_handler, self._host, self._port) as server: + await self._callbacks.on_websocket_ready() + await self._stop_server_event.wait() + + async def _client_handler(self, websocket: websockets.WebSocketServerProtocol): + """Handle individual client connections and message processing.""" + logger.info(f"New client connection from {websocket.remote_address}") + if self._websocket: + await self._websocket.close() + logger.warning("Only one client connected, using new connection") + + self._websocket = websocket + + # Notify + await self._callbacks.on_client_connected(websocket) + + # Create a task to monitor the websocket connection + if not self._monitor_task and self._params.session_timeout: + self._monitor_task = self.create_task( + self._monitor_websocket(websocket, self._params.session_timeout) + ) + + # Handle incoming messages + try: + async for message in websocket: + if not self._params.serializer: + continue + + frame = await self._params.serializer.deserialize(message) + + if not frame: + continue + + if isinstance(frame, InputAudioRawFrame): + await self.push_audio_frame(frame) + else: + await self.push_frame(frame) + except Exception as e: + logger.error(f"{self} exception receiving data: {e.__class__.__name__} ({e})") + + # Notify disconnection + await self._callbacks.on_client_disconnected(websocket) + + await self._websocket.close() + self._websocket = None + + logger.info(f"Client {websocket.remote_address} disconnected") + + async def _monitor_websocket( + self, websocket: websockets.WebSocketServerProtocol, session_timeout: int + ): + """Monitor WebSocket connection for session timeout.""" + try: + await asyncio.sleep(session_timeout) + if websocket.state is not State.CLOSED: + await self._callbacks.on_session_timeout(websocket) + except asyncio.CancelledError: + logger.info(f"Monitoring task cancelled for: {websocket.remote_address}") + raise + + +class WebsocketServerOutputTransport(BaseOutputTransport): + """WebSocket server output transport for sending data to clients. + + Handles outgoing frame serialization, audio streaming with timing control, + and client connection management for WebSocket communication. + """ + + def __init__(self, transport: BaseTransport, params: WebsocketServerParams, **kwargs): + """Initialize the WebSocket server output transport. + + Args: + transport: The parent transport instance. + params: WebSocket server configuration parameters. + **kwargs: Additional arguments passed to parent class. + """ + super().__init__(params, **kwargs) + + self._transport = transport + self._params = params + + self._websocket: Optional[websockets.WebSocketServerProtocol] = None + + # write_audio_frame() is called quickly, as soon as we get audio + # (e.g. from the TTS), and since this is just a network connection we + # would be sending it to quickly. Instead, we want to block to emulate + # an audio device, this is what the send interval is. It will be + # computed on StartFrame. + self._send_interval = 0 + self._next_send_time = 0 + + # Whether we have seen a StartFrame already. + self._initialized = False + + async def set_client_connection(self, websocket: Optional[websockets.WebSocketServerProtocol]): + """Set the active client WebSocket connection. + + Args: + websocket: The WebSocket connection to set as active, or None to clear. + """ + if self._websocket: + await self._websocket.close() + logger.warning("Only one client allowed, using new connection") + self._websocket = websocket + + async def start(self, frame: StartFrame): + """Start the output transport and initialize components. + + Args: + frame: The start frame containing initialization parameters. + """ + await super().start(frame) + + if self._initialized: + return + + self._initialized = True + + if self._params.serializer: + await self._params.serializer.setup(frame) + self._send_interval = (self.audio_chunk_size / self.sample_rate) / 2 + await self.set_transport_ready(frame) + + async def stop(self, frame: EndFrame): + """Stop the output transport and send final frame. + + Args: + frame: The end frame signaling transport shutdown. + """ + await super().stop(frame) + await self._write_frame(frame) + + async def cancel(self, frame: CancelFrame): + """Cancel the output transport and send cancellation frame. + + Args: + frame: The cancel frame signaling immediate cancellation. + """ + await super().cancel(frame) + await self._write_frame(frame) + + async def cleanup(self): + """Cleanup resources and parent transport.""" + await super().cleanup() + await self._transport.cleanup() + + async def process_frame(self, frame: Frame, direction: FrameDirection): + """Process frames and handle interruption timing. + + Args: + frame: The frame to process. + direction: The direction of frame flow in the pipeline. + """ + await super().process_frame(frame, direction) + + if isinstance(frame, StartInterruptionFrame): + await self._write_frame(frame) + self._next_send_time = 0 + + async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame): + """Send a transport message frame to the client. + + Args: + frame: The transport message frame to send. + """ + await self._write_frame(frame) + + async def write_audio_frame(self, frame: OutputAudioRawFrame): + """Write an audio frame to the WebSocket client with timing control. + + Args: + frame: The output audio frame to write. + """ + if not self._websocket: + return + + frame = OutputAudioRawFrame( + audio=frame.audio, + sample_rate=self.sample_rate, + num_channels=self._params.audio_out_channels, + ) + + if self._params.add_wav_header: + with io.BytesIO() as buffer: + with wave.open(buffer, "wb") as wf: + wf.setsampwidth(2) + wf.setnchannels(frame.num_channels) + wf.setframerate(frame.sample_rate) + wf.writeframes(frame.audio) + wav_frame = OutputAudioRawFrame( + buffer.getvalue(), + sample_rate=frame.sample_rate, + num_channels=frame.num_channels, + ) + frame = wav_frame + + await self._write_frame(frame) + + # Simulate audio playback with a sleep. + await self._write_audio_sleep() + + async def _write_frame(self, frame: Frame): + """Serialize and send a frame to the WebSocket client.""" + if not self._params.serializer: + return + + try: + payload = await self._params.serializer.serialize(frame) + if payload and self._websocket: + await self._websocket.send(payload) + except Exception as e: + logger.error(f"{self} exception sending data: {e.__class__.__name__} ({e})") + + async def _write_audio_sleep(self): + """Simulate audio device timing by sleeping between audio chunks.""" + # Simulate a clock. + current_time = time.monotonic() + sleep_duration = max(0, self._next_send_time - current_time) + await asyncio.sleep(sleep_duration) + if sleep_duration == 0: + self._next_send_time = time.monotonic() + self._send_interval + else: + self._next_send_time += self._send_interval + + +class WebsocketServerTransport(BaseTransport): + """WebSocket server transport for bidirectional real-time communication. + + Provides a complete WebSocket server implementation with separate input and + output transports, client connection management, and event handling for + real-time audio and data streaming applications. + """ + + def __init__( + self, + params: WebsocketServerParams, + host: str = "localhost", + port: int = 8765, + input_name: Optional[str] = None, + output_name: Optional[str] = None, + ): + """Initialize the WebSocket server transport. + + Args: + params: WebSocket server configuration parameters. + host: Host address to bind the server to. Defaults to "localhost". + port: Port number to bind the server to. Defaults to 8765. + input_name: Optional name for the input processor. + output_name: Optional name for the output processor. + """ + super().__init__(input_name=input_name, output_name=output_name) + self._host = host + self._port = port + self._params = params + + self._callbacks = WebsocketServerCallbacks( + on_client_connected=self._on_client_connected, + on_client_disconnected=self._on_client_disconnected, + on_session_timeout=self._on_session_timeout, + on_websocket_ready=self._on_websocket_ready, + ) + self._input: Optional[WebsocketServerInputTransport] = None + self._output: Optional[WebsocketServerOutputTransport] = None + self._websocket: Optional[websockets.WebSocketServerProtocol] = None + + # Register supported handlers. The user will only be able to register + # these handlers. + self._register_event_handler("on_client_connected") + self._register_event_handler("on_client_disconnected") + self._register_event_handler("on_session_timeout") + self._register_event_handler("on_websocket_ready") + + def input(self) -> WebsocketServerInputTransport: + """Get the input transport for receiving client data. + + Returns: + The WebSocket server input transport instance. + """ + if not self._input: + self._input = WebsocketServerInputTransport( + self, self._host, self._port, self._params, self._callbacks, name=self._input_name + ) + return self._input + + def output(self) -> WebsocketServerOutputTransport: + """Get the output transport for sending data to clients. + + Returns: + The WebSocket server output transport instance. + """ + if not self._output: + self._output = WebsocketServerOutputTransport( + self, self._params, name=self._output_name + ) + return self._output + + async def _on_client_connected(self, websocket): + """Handle client connection events.""" + if self._output: + await self._output.set_client_connection(websocket) + await self._call_event_handler("on_client_connected", websocket) + else: + logger.error("A WebsocketServerTransport output is missing in the pipeline") + + async def _on_client_disconnected(self, websocket): + """Handle client disconnection events.""" + if self._output: + await self._output.set_client_connection(None) + await self._call_event_handler("on_client_disconnected", websocket) + else: + logger.error("A WebsocketServerTransport output is missing in the pipeline") + + async def _on_session_timeout(self, websocket): + """Handle client session timeout events.""" + await self._call_event_handler("on_session_timeout", websocket) + + async def _on_websocket_ready(self): + """Handle WebSocket server ready events.""" + await self._call_event_handler("on_websocket_ready") diff --git a/src/pipecat/transports/whatsapp/__init__.py b/src/pipecat/transports/whatsapp/__init__.py index d23112945..e69de29bb 100644 --- a/src/pipecat/transports/whatsapp/__init__.py +++ b/src/pipecat/transports/whatsapp/__init__.py @@ -1,5 +0,0 @@ -# -# Copyright (c) 2024–2025, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# diff --git a/src/pipecat/transports/whatsapp/client.py b/src/pipecat/transports/whatsapp/client.py index 25035c651..7c86ca2ce 100644 --- a/src/pipecat/transports/whatsapp/client.py +++ b/src/pipecat/transports/whatsapp/client.py @@ -12,12 +12,12 @@ WhatsApp call events. """ import asyncio -from typing import Awaitable, Callable, Dict, List, Optional, Union +from typing import Awaitable, Callable, Dict, List, Optional import aiohttp from loguru import logger -from pipecat.transports.network.webrtc_connection import IceServer, SmallWebRTCConnection +from pipecat.transports.smallwebrtc.connection import IceServer, SmallWebRTCConnection from pipecat.transports.whatsapp.api import ( WhatsAppApi, WhatsAppConnectCall,